From fb3ce45f42f65ac7f11330148619f0263cdc618e Mon Sep 17 00:00:00 2001 From: Isaiah Raya Date: Tue, 11 Mar 2025 10:32:14 -0700 Subject: [PATCH 1/4] fix: [NPM] Reduce/Refactor Noisy NPM Logs (#3468) * removed all logs from npm dataplane (except error/warning logs) * removed all logs from npm controller (except error/warning logs) * restored logs that are ununused by current npm (v2) * removed test files * resolved comments * keep log related to reconciling chain placement when the chain is not in the right place * added bootup logs back * Removed two more noisy logs * Add loglevel config option when printing application insight logs * Updated all non-error/warning logs to commented out and with a vap TODO * fixed typo * small typo fix * updated configmap with loglevel * updated default value * added a default value for loglevel * fixed typo in json * removed comma * changed loglevel to info in configmap * add a short sleep in TestNetPolInBackgroundSkipAddAfterRemove * test remove dataplane changes to see if race condition fixes * Revert "test remove dataplane changes to see if race condition fixes" This reverts commit 08697ebcb29c398955e4c777b9e5c1ee313eee60. * test * Revert "test" This reverts commit 449c2af03d1f53d55e682326983183dc4bd88278. * test * update dataplane to test if changes are flagged in race * added stop channels to unit tests to avoid race condtiions * add non noisy logs back * increased time * revert time change after RunPeriodicTasks * test with 1000 seconds * 5000 milliseconds * tweaked the delay * update to 1500 for defer * increased to 1500 * increase to 2000 * removed kubernetes --- npm/azure-npm.yaml | 3 +- npm/cmd/start.go | 6 +- npm/cmd/start_daemon.go | 6 +- npm/cmd/start_server.go | 6 +- npm/config/config.go | 4 + npm/metrics/ai-utils.go | 8 +- .../controllers/v2/namespaceController.go | 39 +++++---- .../controllers/v2/networkPolicyController.go | 12 ++- .../controllers/v2/podController.go | 63 ++++++++++----- npm/pkg/dataplane/dataplane.go | 71 ++++++++++------ npm/pkg/dataplane/dataplane_linux_test.go | 29 +++++-- npm/pkg/dataplane/dataplane_test.go | 50 ++++++++++-- npm/pkg/dataplane/dataplane_windows_test.go | 21 ++++- npm/pkg/dataplane/ipsets/ipsetmanager.go | 24 +++--- .../dataplane/ipsets/ipsetmanager_linux.go | 81 ++++++++++--------- npm/pkg/dataplane/types.go | 20 +++-- npm/util/const.go | 2 - npm/util/ioutil/restore.go | 3 +- 18 files changed, 305 insertions(+), 143 deletions(-) diff --git a/npm/azure-npm.yaml b/npm/azure-npm.yaml index 80a7a1a0f7..137a2c5665 100644 --- a/npm/azure-npm.yaml +++ b/npm/azure-npm.yaml @@ -173,5 +173,6 @@ data: "PlaceAzureChainFirst": false, "ApplyInBackground": true, "NetPolInBackground": true - } + }, + "LogLevel": "info" } diff --git a/npm/cmd/start.go b/npm/cmd/start.go index b36d10422d..29e633ecb0 100644 --- a/npm/cmd/start.go +++ b/npm/cmd/start.go @@ -120,7 +120,11 @@ func start(config npmconfig.Config, flags npmconfig.Flags) error { klog.Infof("Resync period for NPM pod is set to %d.", int(resyncPeriod/time.Minute)) factory := informers.NewSharedInformerFactory(clientset, resyncPeriod) - err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata()) + logLevel := config.LogLevel + if logLevel == "" { + logLevel = npmconfig.DefaultConfig.LogLevel + } + err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata(), logLevel) if err != nil { klog.Infof("CreateTelemetryHandle failed with error %v. AITelemetry is not initialized.", err) } diff --git a/npm/cmd/start_daemon.go b/npm/cmd/start_daemon.go index d0af8f276e..1067ff0591 100644 --- a/npm/cmd/start_daemon.go +++ b/npm/cmd/start_daemon.go @@ -94,7 +94,11 @@ func startDaemon(config npmconfig.Config) error { return fmt.Errorf("failed to create dataplane: %w", err) } - err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata()) + logLevel := config.LogLevel + if logLevel == "" { + logLevel = npmconfig.DefaultConfig.LogLevel + } + err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata(), logLevel) if err != nil { klog.Infof("CreateTelemetryHandle failed with error %v. AITelemetry is not initialized.", err) } diff --git a/npm/cmd/start_server.go b/npm/cmd/start_server.go index 6137902d30..fdd863bbe8 100644 --- a/npm/cmd/start_server.go +++ b/npm/cmd/start_server.go @@ -113,7 +113,11 @@ func startControlplane(config npmconfig.Config, flags npmconfig.Flags) error { return fmt.Errorf("failed to create NPM controlplane manager: %w", err) } - err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata()) + logLevel := config.LogLevel + if logLevel == "" { + logLevel = npmconfig.DefaultConfig.LogLevel + } + err = metrics.CreateTelemetryHandle(config.NPMVersion(), version, npm.GetAIMetadata(), logLevel) if err != nil { klog.Infof("CreateTelemetryHandle failed with error %v. AITelemetry is not initialized.", err) } diff --git a/npm/config/config.go b/npm/config/config.go index 0bd45a35d7..b308235d13 100644 --- a/npm/config/config.go +++ b/npm/config/config.go @@ -52,6 +52,9 @@ var DefaultConfig = Config{ // NetPolInBackground is currently used in Linux to apply NetPol controller Add events in the background NetPolInBackground: true, }, + + // Setting LogLevel to "info" by default. Set to "debug" to get application insight logs (creates a listener that outputs diagnosticMessageWriter logs). + LogLevel: "info", } type GrpcServerConfig struct { @@ -81,6 +84,7 @@ type Config struct { MaxPendingNetPols int `json:"MaxPendingNetPols,omitempty"` NetPolInvervalInMilliseconds int `json:"NetPolInvervalInMilliseconds,omitempty"` Toggles Toggles `json:"Toggles,omitempty"` + LogLevel string `json:"LogLevel,omitempty"` } type Toggles struct { diff --git a/npm/metrics/ai-utils.go b/npm/metrics/ai-utils.go index 068fdb8f6a..b6b15f1422 100644 --- a/npm/metrics/ai-utils.go +++ b/npm/metrics/ai-utils.go @@ -3,6 +3,7 @@ package metrics import ( "fmt" "strconv" + "strings" "time" "github.com/Azure/azure-container-networking/aitelemetry" @@ -21,15 +22,18 @@ var ( ) // CreateTelemetryHandle creates a handler to initialize AI telemetry -func CreateTelemetryHandle(npmVersionNum int, imageVersion, aiMetadata string) error { +func CreateTelemetryHandle(npmVersionNum int, imageVersion, aiMetadata, logLevel string) error { npmVersion = npmVersionNum + debugMode := strings.EqualFold(logLevel, "debug") + klog.Infof("LogLevel is %s. Debugmode is set to %v.", logLevel, debugMode) + aiConfig := aitelemetry.AIConfig{ AppName: util.AzureNpmFlag, AppVersion: imageVersion, BatchSize: util.BatchSizeInBytes, BatchInterval: util.BatchIntervalInSecs, RefreshTimeout: util.RefreshTimeoutInSecs, - DebugMode: util.DebugMode, + DebugMode: debugMode, GetEnvRetryCount: util.GetEnvRetryCount, GetEnvRetryWaitTimeInSecs: util.GetEnvRetryWaitTimeInSecs, } diff --git a/npm/pkg/controlplane/controllers/v2/namespaceController.go b/npm/pkg/controlplane/controllers/v2/namespaceController.go index ca85ea3958..654609ced2 100644 --- a/npm/pkg/controlplane/controllers/v2/namespaceController.go +++ b/npm/pkg/controlplane/controllers/v2/namespaceController.go @@ -164,14 +164,17 @@ func (nsc *NamespaceController) Run(stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer nsc.workqueue.ShutDown() - klog.Info("Starting Namespace controller\n") - klog.Info("Starting workers") + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Info("Starting Namespace controller\n") + // klog.Info("Starting workers") // Launch workers to process namespace resources go wait.Until(nsc.runWorker, time.Second, stopCh) - klog.Info("Started workers") + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Info("Started workers") <-stopCh - klog.Info("Shutting down workers") + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Info("Shutting down workers") } func (nsc *NamespaceController) runWorker() { @@ -209,7 +212,8 @@ func (nsc *NamespaceController) processNextWorkItem() bool { // Finally, if no error occurs we Forget this item so it does not // get queued again until another change happens. nsc.workqueue.Forget(obj) - klog.Infof("Successfully synced '%s'", key) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Successfully synced '%s'", key) return nil }(obj) if err != nil { @@ -321,7 +325,8 @@ func (nsc *NamespaceController) syncAddNamespace(nsObj *corev1.Namespace) error // Add the namespace to its label's ipset list. for nsLabelKey, nsLabelVal := range nsObj.ObjectMeta.Labels { nsLabelKeyValue := util.GetIpSetFromLabelKV(nsLabelKey, nsLabelVal) - klog.Infof("Adding namespace %s to ipset list %s and %s", nsObj.ObjectMeta.Name, nsLabelKey, nsLabelKeyValue) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Adding namespace %s to ipset list %s and %s", nsObj.ObjectMeta.Name, nsLabelKey, nsLabelKeyValue) labelIPSets := []*ipsets.IPSetMetadata{ ipsets.NewIPSetMetadata(nsLabelKey, ipsets.KeyLabelOfNamespace), ipsets.NewIPSetMetadata(nsLabelKeyValue, ipsets.KeyValueLabelOfNamespace), @@ -344,7 +349,8 @@ func (nsc *NamespaceController) syncAddNamespace(nsObj *corev1.Namespace) error func (nsc *NamespaceController) syncUpdateNamespace(newNsObj *corev1.Namespace) (metrics.OperationKind, error) { var err error newNsName, newNsLabel := newNsObj.ObjectMeta.Name, newNsObj.ObjectMeta.Labels - klog.Infof("NAMESPACE UPDATING:\n namespace: [%s/%v]", newNsName, newNsLabel) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("NAMESPACE UPDATING:\n namespace: [%s/%v]", newNsName, newNsLabel) // If previous syncAddNamespace failed for some reasons // before caching npm namespace object or syncUpdateNamespace is called due to namespace creation event, @@ -373,7 +379,8 @@ func (nsc *NamespaceController) syncUpdateNamespace(newNsObj *corev1.Namespace) } toBeRemoved := []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(newNsName, ipsets.Namespace)} - klog.Infof("Deleting namespace %s from ipset list %s", newNsName, nsLabelVal) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Deleting namespace %s from ipset list %s", newNsName, nsLabelVal) if err = nsc.dp.RemoveFromList(labelSet, toBeRemoved); err != nil { metrics.SendErrorLogAndMetric(util.NSID, "[UpdateNamespace] Error: failed to delete namespace %s from ipset list %s with err: %v", newNsName, nsLabelVal, err) return metrics.UpdateOp, fmt.Errorf("failed to remove from list during sync update namespace with err %w", err) @@ -389,7 +396,8 @@ func (nsc *NamespaceController) syncUpdateNamespace(newNsObj *corev1.Namespace) // Add the namespace to its label's ipset list. for _, nsLabelVal := range addToIPSets { - klog.Infof("Adding namespace %s to ipset list %s", newNsName, nsLabelVal) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Adding namespace %s to ipset list %s", newNsName, nsLabelVal) var labelSet []*ipsets.IPSetMetadata if util.IsKeyValueLabelSetName(nsLabelVal) { @@ -422,13 +430,14 @@ func (nsc *NamespaceController) syncUpdateNamespace(newNsObj *corev1.Namespace) // cleanDeletedNamespace handles deleting namespace from ipset. func (nsc *NamespaceController) cleanDeletedNamespace(cachedNsKey string) error { - klog.Infof("NAMESPACE DELETING: [%s]", cachedNsKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("NAMESPACE DELETING: [%s]", cachedNsKey) cachedNsObj, exists := nsc.npmNamespaceCache.NsMap[cachedNsKey] if !exists { return nil } - - klog.Infof("NAMESPACE DELETING cached labels: [%s/%v]", cachedNsKey, cachedNsObj.LabelsMap) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("NAMESPACE DELETING cached labels: [%s/%v]", cachedNsKey, cachedNsObj.LabelsMap) var err error toBeDeletedNs := []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(cachedNsKey, ipsets.Namespace)} @@ -436,7 +445,8 @@ func (nsc *NamespaceController) cleanDeletedNamespace(cachedNsKey string) error for nsLabelKey, nsLabelVal := range cachedNsObj.LabelsMap { labelKey := ipsets.NewIPSetMetadata(nsLabelKey, ipsets.KeyLabelOfNamespace) - klog.Infof("Deleting namespace %s from ipset list %s", cachedNsKey, labelKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Deleting namespace %s from ipset list %s", cachedNsKey, labelKey) if err = nsc.dp.RemoveFromList(labelKey, toBeDeletedNs); err != nil { metrics.SendErrorLogAndMetric(util.NSID, "[DeleteNamespace] Error: failed to delete namespace %s from ipset list %s with err: %v", cachedNsKey, labelKey, err) return fmt.Errorf("failed to clean deleted namespace when deleting key with err %w", err) @@ -444,7 +454,8 @@ func (nsc *NamespaceController) cleanDeletedNamespace(cachedNsKey string) error labelIpsetName := util.GetIpSetFromLabelKV(nsLabelKey, nsLabelVal) labelKeyValue := ipsets.NewIPSetMetadata(labelIpsetName, ipsets.KeyValueLabelOfNamespace) - klog.Infof("Deleting namespace %s from ipset list %s", cachedNsKey, labelIpsetName) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Deleting namespace %s from ipset list %s", cachedNsKey, labelIpsetName) if err = nsc.dp.RemoveFromList(labelKeyValue, toBeDeletedNs); err != nil { metrics.SendErrorLogAndMetric(util.NSID, "[DeleteNamespace] Error: failed to delete namespace %s from ipset list %s with err: %v", cachedNsKey, labelIpsetName, err) return fmt.Errorf("failed to clean deleted namespace when deleting key value with err %w", err) diff --git a/npm/pkg/controlplane/controllers/v2/networkPolicyController.go b/npm/pkg/controlplane/controllers/v2/networkPolicyController.go index c8baf98efd..10f53e5d17 100644 --- a/npm/pkg/controlplane/controllers/v2/networkPolicyController.go +++ b/npm/pkg/controlplane/controllers/v2/networkPolicyController.go @@ -146,12 +146,15 @@ func (c *NetworkPolicyController) Run(stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer c.workqueue.ShutDown() - klog.Infof("Starting Network Policy worker") + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Starting Network Policy worker") go wait.Until(c.runWorker, time.Second, stopCh) - klog.Infof("Started Network Policy worker") + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Started Network Policy worker") <-stopCh - klog.Info("Shutting down Network Policy workers") + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Info("Shutting down Network Policy workers") } func (c *NetworkPolicyController) runWorker() { @@ -188,7 +191,8 @@ func (c *NetworkPolicyController) processNextWorkItem() bool { // Finally, if no error occurs we Forget this item so it does not // get queued again until another change happens. c.workqueue.Forget(obj) - klog.Infof("Successfully synced '%s'", key) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Successfully synced '%s'", key) return nil }(obj) if err != nil { diff --git a/npm/pkg/controlplane/controllers/v2/podController.go b/npm/pkg/controlplane/controllers/v2/podController.go index 47b4f0c296..3a3e193058 100644 --- a/npm/pkg/controlplane/controllers/v2/podController.go +++ b/npm/pkg/controlplane/controllers/v2/podController.go @@ -170,7 +170,8 @@ func (c *PodController) deletePod(obj interface{}) { } } - klog.Infof("[POD DELETE EVENT] for %s in %s", podObj.Name, podObj.Namespace) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[POD DELETE EVENT] for %s in %s", podObj.Name, podObj.Namespace) if isHostNetworkPod(podObj) { return } @@ -191,12 +192,15 @@ func (c *PodController) Run(stopCh <-chan struct{}) { defer utilruntime.HandleCrash() defer c.workqueue.ShutDown() - klog.Infof("Starting Pod worker") + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Starting Pod worker") go wait.Until(c.runWorker, time.Second, stopCh) - klog.Info("Started Pod workers") + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Info("Started Pod workers") <-stopCh - klog.Info("Shutting down Pod workers") + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Info("Shutting down Pod workers") } func (c *PodController) runWorker() { @@ -234,7 +238,8 @@ func (c *PodController) processNextWorkItem() bool { // Finally, if no error occurs we Forget this item so it does not // get queued again until another change happens. c.workqueue.Forget(obj) - klog.Infof("Successfully synced '%s'", key) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Successfully synced '%s'", key) return nil }(obj) if err != nil { @@ -347,8 +352,9 @@ func (c *PodController) syncPod(key string) error { } func (c *PodController) syncAddedPod(podObj *corev1.Pod) error { - klog.Infof("POD CREATING: [%s/%s/%s/%s/%+v/%s]", string(podObj.GetUID()), podObj.Namespace, - podObj.Name, podObj.Spec.NodeName, podObj.Labels, podObj.Status.PodIP) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("POD CREATING: [%s/%s/%s/%s/%+v/%s]", string(podObj.GetUID()), podObj.Namespace, + // podObj.Name, podObj.Spec.NodeName, podObj.Labels, podObj.Status.PodIP) if !util.IsIPV4(podObj.Status.PodIP) { msg := fmt.Sprintf("[syncAddedPod] warning: ADD POD [%s/%s/%s/%+v] ignored as the PodIP is not valid ipv4 address. ip: [%s]", podObj.Namespace, @@ -369,7 +375,8 @@ func (c *PodController) syncAddedPod(podObj *corev1.Pod) error { namespaceSet := []*ipsets.IPSetMetadata{ipsets.NewIPSetMetadata(podObj.Namespace, ipsets.Namespace)} // Add the pod ip information into namespace's ipset. - klog.Infof("Adding pod %s (ip : %s) to ipset %s", podKey, podObj.Status.PodIP, podObj.Namespace) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Adding pod %s (ip : %s) to ipset %s", podKey, podObj.Status.PodIP, podObj.Namespace) if err = c.dp.AddToSets(namespaceSet, podMetadata); err != nil { return fmt.Errorf("[syncAddedPod] Error: failed to add pod to namespace ipset with err: %w", err) } @@ -387,8 +394,9 @@ func (c *PodController) syncAddedPod(podObj *corev1.Pod) error { targetSetKeyValue := ipsets.NewIPSetMetadata(labelKeyValue, ipsets.KeyValueLabelOfPod) allSets := []*ipsets.IPSetMetadata{targetSetKey, targetSetKeyValue} - klog.Infof("Creating ipsets %+v and %+v if they do not exist", targetSetKey, targetSetKeyValue) - klog.Infof("Adding pod %s (ip : %s) to ipset %s and %s", podKey, npmPodObj.PodIP, labelKey, labelKeyValue) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Creating ipsets %+v and %+v if they do not exist", targetSetKey, targetSetKeyValue) + // klog.Infof("Adding pod %s (ip : %s) to ipset %s and %s", podKey, npmPodObj.PodIP, labelKey, labelKeyValue) if err = c.dp.AddToSets(allSets, podMetadata); err != nil { return fmt.Errorf("[syncAddedPod] Error: failed to add pod to label ipset with err: %w", err) } @@ -396,7 +404,8 @@ func (c *PodController) syncAddedPod(podObj *corev1.Pod) error { } // Add pod's named ports from its ipset. - klog.Infof("Adding named port ipsets") + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Adding named port ipsets") containerPorts := common.GetContainerPortList(podObj) if err = c.manageNamedPortIpsets(containerPorts, podKey, npmPodObj.PodIP, podObj.Spec.NodeName, addNamedPort); err != nil { return fmt.Errorf("[syncAddedPod] Error: failed to add pod to named port ipset with err: %w", err) @@ -430,7 +439,8 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper c.npmNamespaceCache.Unlock() cachedNpmPod, exists := c.podMap[podKey] - klog.Infof("[syncAddAndUpdatePod] updating Pod with key %s", podKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[syncAddAndUpdatePod] updating Pod with key %s", podKey) // No cached npmPod exists. start adding the pod in a cache if !exists { return metrics.CreateOp, c.syncAddedPod(newPodObj) @@ -446,15 +456,18 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper // NPM should clean up existing references of cached pod obj and its IP. // then, re-add new pod obj. if cachedNpmPod.PodIP != newPodObj.Status.PodIP { - klog.Infof("Pod (Namespace:%s, Name:%s, newUid:%s), has cachedPodIp:%s which is different from PodIp:%s", - newPodObj.Namespace, newPodObj.Name, string(newPodObj.UID), cachedNpmPod.PodIP, newPodObj.Status.PodIP) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Pod (Namespace:%s, Name:%s, newUid:%s), has cachedPodIp:%s which is different from PodIp:%s", + // newPodObj.Namespace, newPodObj.Name, string(newPodObj.UID), cachedNpmPod.PodIP, newPodObj.Status.PodIP) - klog.Infof("Deleting cached Pod with key:%s first due to IP Mistmatch", podKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Deleting cached Pod with key:%s first due to IP Mistmatch", podKey) if er := c.cleanUpDeletedPod(podKey); er != nil { return metrics.UpdateOp, er } - klog.Infof("Adding back Pod with key:%s after IP Mistmatch", podKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Adding back Pod with key:%s after IP Mistmatch", podKey) return metrics.UpdateOp, c.syncAddedPod(newPodObj) } @@ -468,7 +481,8 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper cachedPodMetadata := dataplane.NewPodMetadata(podKey, cachedNpmPod.PodIP, newPodMetadata.NodeName) // Delete the pod from its label's ipset. for _, removeIPSetName := range deleteFromIPSets { - klog.Infof("Deleting pod %s (ip : %s) from ipset %s", podKey, cachedNpmPod.PodIP, removeIPSetName) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Deleting pod %s (ip : %s) from ipset %s", podKey, cachedNpmPod.PodIP, removeIPSetName) var toRemoveSet *ipsets.IPSetMetadata if util.IsKeyValueLabelSetName(removeIPSetName) { @@ -490,7 +504,8 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper // Add the pod to its label's ipset. for _, addIPSetName := range addToIPSets { - klog.Infof("Creating ipset %s if it doesn't already exist", addIPSetName) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Creating ipset %s if it doesn't already exist", addIPSetName) var toAddSet *ipsets.IPSetMetadata if util.IsKeyValueLabelSetName(addIPSetName) { @@ -499,7 +514,8 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper toAddSet = ipsets.NewIPSetMetadata(addIPSetName, ipsets.KeyLabelOfPod) } - klog.Infof("Adding pod %s (ip : %s) to ipset %s", podKey, newPodObj.Status.PodIP, addIPSetName) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Adding pod %s (ip : %s) to ipset %s", podKey, newPodObj.Status.PodIP, addIPSetName) if err = c.dp.AddToSets([]*ipsets.IPSetMetadata{toAddSet}, newPodMetadata); err != nil { return metrics.UpdateOp, fmt.Errorf("[syncAddAndUpdatePod] Error: failed to add pod to label ipset with err: %w", err) } @@ -542,7 +558,8 @@ func (c *PodController) syncAddAndUpdatePod(newPodObj *corev1.Pod) (metrics.Oper // cleanUpDeletedPod cleans up all ipset associated with this pod func (c *PodController) cleanUpDeletedPod(cachedNpmPodKey string) error { - klog.Infof("[cleanUpDeletedPod] deleting Pod with key %s", cachedNpmPodKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[cleanUpDeletedPod] deleting Pod with key %s", cachedNpmPodKey) // If cached npmPod does not exist, return nil cachedNpmPod, exist := c.podMap[cachedNpmPodKey] if !exist { @@ -562,7 +579,8 @@ func (c *PodController) cleanUpDeletedPod(cachedNpmPodKey string) error { // Get lists of podLabelKey and podLabelKey + podLavelValue ,and then start deleting them from ipsets for labelKey, labelVal := range cachedNpmPod.Labels { labelKeyValue := util.GetIpSetFromLabelKV(labelKey, labelVal) - klog.Infof("Deleting pod %s (ip : %s) from ipsets %s and %s", cachedNpmPodKey, cachedNpmPod.PodIP, labelKey, labelKeyValue) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("Deleting pod %s (ip : %s) from ipsets %s and %s", cachedNpmPodKey, cachedNpmPod.PodIP, labelKey, labelKeyValue) if err = c.dp.RemoveFromSets( []*ipsets.IPSetMetadata{ ipsets.NewIPSetMetadata(labelKey, ipsets.KeyLabelOfPod), @@ -595,7 +613,8 @@ func (c *PodController) manageNamedPortIpsets(portList []corev1.ContainerPort, p return nil } for _, port := range portList { - klog.Infof("port is %+v", port) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("port is %+v", port) if port.Name == "" { continue } diff --git a/npm/pkg/dataplane/dataplane.go b/npm/pkg/dataplane/dataplane.go index 4a3ccd68ef..670340aeb7 100644 --- a/npm/pkg/dataplane/dataplane.go +++ b/npm/pkg/dataplane/dataplane.go @@ -255,7 +255,8 @@ func (dp *DataPlane) AddToSets(setNames []*ipsets.IPSetMetadata, podMetadata *Po } if dp.shouldUpdatePod() && podMetadata.NodeName == dp.nodeName { - klog.Infof("[DataPlane] Updating Sets to Add for pod key %s", podMetadata.PodKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] Updating Sets to Add for pod key %s", podMetadata.PodKey) // lock updatePodCache while reading/modifying or setting the updatePod in the cache dp.updatePodCache.Lock() @@ -277,7 +278,8 @@ func (dp *DataPlane) RemoveFromSets(setNames []*ipsets.IPSetMetadata, podMetadat } if dp.shouldUpdatePod() && podMetadata.NodeName == dp.nodeName { - klog.Infof("[DataPlane] Updating Sets to Remove for pod key %s", podMetadata.PodKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] Updating Sets to Remove for pod key %s", podMetadata.PodKey) // lock updatePodCache while reading/modifying or setting the updatePod in the cache dp.updatePodCache.Lock() @@ -326,10 +328,12 @@ func (dp *DataPlane) ApplyDataPlane() error { newCount := dp.applyInfo.numBatches dp.applyInfo.Unlock() - klog.Infof("[DataPlane] [%s] new batch count: %d", contextApplyDP, newCount) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [%s] new batch count: %d", contextApplyDP, newCount) if newCount >= dp.ApplyMaxBatches { - klog.Infof("[DataPlane] [%s] applying now since reached maximum batch count: %d", contextApplyDP, newCount) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [%s] applying now since reached maximum batch count: %d", contextApplyDP, newCount) return dp.applyDataPlaneNow(contextApplyDP) } @@ -337,12 +341,14 @@ func (dp *DataPlane) ApplyDataPlane() error { } func (dp *DataPlane) applyDataPlaneNow(context string) error { - klog.Infof("[DataPlane] [ApplyDataPlane] [%s] starting to apply ipsets", context) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [ApplyDataPlane] [%s] starting to apply ipsets", context) err := dp.ipsetMgr.ApplyIPSets() if err != nil { return fmt.Errorf("[DataPlane] [%s] error while applying IPSets: %w", context, err) } - klog.Infof("[DataPlane] [ApplyDataPlane] [%s] finished applying ipsets", context) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [ApplyDataPlane] [%s] finished applying ipsets", context) // see comment in RemovePolicy() for why this is here dp.setRemovePolicyFailure(false) @@ -363,7 +369,8 @@ func (dp *DataPlane) applyDataPlaneNow(context string) error { } dp.updatePodCache.Unlock() - klog.Infof("[DataPlane] [ApplyDataPlane] [%s] refreshing endpoints before updating pods", context) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [ApplyDataPlane] [%s] refreshing endpoints before updating pods", context) err := dp.refreshPodEndpoints() if err != nil { @@ -372,14 +379,16 @@ func (dp *DataPlane) applyDataPlaneNow(context string) error { return nil } - klog.Infof("[DataPlane] [ApplyDataPlane] [%s] refreshed endpoints", context) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [ApplyDataPlane] [%s] refreshed endpoints", context) // lock updatePodCache while driving goal state to kernel // prevents another ApplyDataplane call from updating the same pods dp.updatePodCache.Lock() defer dp.updatePodCache.Unlock() - klog.Infof("[DataPlane] [ApplyDataPlane] [%s] starting to update pods", context) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [ApplyDataPlane] [%s] starting to update pods", context) for !dp.updatePodCache.isEmpty() { pod := dp.updatePodCache.dequeue() if pod == nil { @@ -397,14 +406,16 @@ func (dp *DataPlane) applyDataPlaneNow(context string) error { } } - klog.Infof("[DataPlane] [ApplyDataPlane] [%s] finished updating pods", context) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [ApplyDataPlane] [%s] finished updating pods", context) } return nil } // AddPolicy takes in a translated NPMNetworkPolicy object and applies on dataplane func (dp *DataPlane) AddPolicy(policy *policies.NPMNetworkPolicy) error { - klog.Infof("[DataPlane] Add Policy called for %s", policy.PolicyKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] Add Policy called for %s", policy.PolicyKey) if !dp.netPolInBackground { return dp.addPolicies([]*policies.NPMNetworkPolicy{policy}) @@ -418,10 +429,12 @@ func (dp *DataPlane) AddPolicy(policy *policies.NPMNetworkPolicy) error { dp.netPolQueue.enqueue(policy) newCount := dp.netPolQueue.len() - klog.Infof("[DataPlane] [%s] new pending netpol count: %d", contextAddNetPol, newCount) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [%s] new pending netpol count: %d", contextAddNetPol, newCount) if newCount >= dp.MaxPendingNetPols { - klog.Infof("[DataPlane] [%s] applying now since reached maximum batch count: %d", contextAddNetPol, newCount) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [%s] applying now since reached maximum batch count: %d", contextAddNetPol, newCount) dp.addPoliciesWithRetry(contextAddNetPol) } return nil @@ -431,12 +444,14 @@ func (dp *DataPlane) AddPolicy(policy *policies.NPMNetworkPolicy) error { // The caller must lock netPolQueue. func (dp *DataPlane) addPoliciesWithRetry(context string) { netPols := dp.netPolQueue.dump() - klog.Infof("[DataPlane] adding policies %+v", netPols) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] adding policies %+v", netPols) err := dp.addPolicies(netPols) if err == nil { // clear queue and return on success - klog.Infof("[DataPlane] [%s] added policies successfully", context) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [%s] added policies successfully", context) dp.netPolQueue.clear() return } @@ -449,7 +464,8 @@ func (dp *DataPlane) addPoliciesWithRetry(context string) { err = dp.addPolicies([]*policies.NPMNetworkPolicy{netPol}) if err == nil { // remove from queue on success - klog.Infof("[DataPlane] [%s] added policy successfully one at a time. policyKey: %s", context, netPol.PolicyKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [%s] added policy successfully one at a time. policyKey: %s", context, netPol.PolicyKey) dp.netPolQueue.delete(netPol.PolicyKey) } else { // keep in queue on failure @@ -467,7 +483,8 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error { } if len(netPols) == 0 { - klog.Infof("[DataPlane] expected to have at least one NetPol in dp.addPolicies()") + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] expected to have at least one NetPol in dp.addPolicies()") return nil } @@ -524,15 +541,18 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error { // increment batch and apply IPSets if needed dp.applyInfo.numBatches++ newCount := dp.applyInfo.numBatches - klog.Infof("[DataPlane] [%s] new batch count: %d", contextAddNetPolBootup, newCount) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [%s] new batch count: %d", contextAddNetPolBootup, newCount) if newCount >= dp.ApplyMaxBatches { - klog.Infof("[DataPlane] [%s] applying now since reached maximum batch count: %d", contextAddNetPolBootup, newCount) - klog.Infof("[DataPlane] [%s] starting to apply ipsets", contextAddNetPolBootup) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [%s] applying now since reached maximum batch count: %d", contextAddNetPolBootup, newCount) + // klog.Infof("[DataPlane] [%s] starting to apply ipsets", contextAddNetPolBootup) err = dp.ipsetMgr.ApplyIPSets() if err != nil { return fmt.Errorf("[DataPlane] [%s] error while applying IPSets: %w", contextAddNetPolBootup, err) } - klog.Infof("[DataPlane] [%s] finished applying ipsets", contextAddNetPolBootup) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] [%s] finished applying ipsets", contextAddNetPolBootup) // see comment in RemovePolicy() for why this is here dp.setRemovePolicyFailure(false) @@ -572,7 +592,8 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error { // RemovePolicy takes in network policyKey (namespace/name of network policy) and removes it from dataplane and cache func (dp *DataPlane) RemovePolicy(policyKey string) error { - klog.Infof("[DataPlane] Remove Policy called for %s", policyKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] Remove Policy called for %s", policyKey) if dp.netPolInBackground { // make sure to not add this NetPol if we're deleting it @@ -648,10 +669,12 @@ func (dp *DataPlane) RemovePolicy(policyKey string) error { // UpdatePolicy takes in updated policy object, calculates the delta and applies changes // onto dataplane accordingly func (dp *DataPlane) UpdatePolicy(policy *policies.NPMNetworkPolicy) error { - klog.Infof("[DataPlane] Update Policy called for %s", policy.PolicyKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] Update Policy called for %s", policy.PolicyKey) ok := dp.policyMgr.PolicyExists(policy.PolicyKey) if !ok { - klog.Infof("[DataPlane] Policy %s is not found.", policy.PolicyKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] Policy %s is not found.", policy.PolicyKey) return dp.AddPolicy(policy) } diff --git a/npm/pkg/dataplane/dataplane_linux_test.go b/npm/pkg/dataplane/dataplane_linux_test.go index 837a33445d..9cb22138ad 100644 --- a/npm/pkg/dataplane/dataplane_linux_test.go +++ b/npm/pkg/dataplane/dataplane_linux_test.go @@ -11,6 +11,7 @@ import ( "github.com/Azure/azure-container-networking/npm/util" testutils "github.com/Azure/azure-container-networking/test/utils" "github.com/stretchr/testify/require" + "k8s.io/klog" ) var netpolInBackgroundCfg = &Config{ @@ -75,20 +76,27 @@ func TestNetPolInBackgroundUpdatePolicy(t *testing.T) { calls = append(calls, getAddPolicyTestCallsForDP(&updatedTestPolicyobj)...) ioshim := common.NewMockIOShim(calls) defer ioshim.VerifyCalls(t, calls) - dp, err := NewDataPlane("testnode", ioshim, netpolInBackgroundCfg, nil) + + stopCh := make(chan struct{}, 1) + dp, err := NewDataPlane("testnode", ioshim, netpolInBackgroundCfg, stopCh) require.NoError(t, err) + defer func() { + stopCh <- struct{}{} + time.Sleep(2000 * time.Millisecond) + klog.Info("defer for TestNetPolInBackgroundUpdatePolicy finished") + }() dp.RunPeriodicTasks() err = dp.AddPolicy(&testPolicyobj) require.NoError(t, err) - time.Sleep(100 * time.Millisecond) + time.Sleep(2000 * time.Millisecond) err = dp.UpdatePolicy(&updatedTestPolicyobj) require.NoError(t, err) - time.Sleep(100 * time.Millisecond) + time.Sleep(2000 * time.Millisecond) linuxPromVals{2, 1, 0, 0, 1}.assert(t) } @@ -99,8 +107,14 @@ func TestNetPolInBackgroundSkipAddAfterRemove(t *testing.T) { calls := getBootupTestCalls() ioshim := common.NewMockIOShim(calls) defer ioshim.VerifyCalls(t, calls) - dp, err := NewDataPlane("testnode", ioshim, netpolInBackgroundCfg, nil) + stopCh := make(chan struct{}, 1) + dp, err := NewDataPlane("testnode", ioshim, netpolInBackgroundCfg, stopCh) require.NoError(t, err) + defer func() { + stopCh <- struct{}{} + time.Sleep(100 * time.Millisecond) + klog.Info("defer for TestNetPolInBackgroundSkipAddAfterRemove finished") + }() require.NoError(t, dp.AddPolicy(&testPolicyobj)) require.NoError(t, dp.RemovePolicy(testPolicyobj.PolicyKey)) @@ -159,8 +173,13 @@ func TestNetPolInBackgroundFailureToAddFirstTime(t *testing.T) { ) ioshim := common.NewMockIOShim(calls) defer ioshim.VerifyCalls(t, calls) - dp, err := NewDataPlane("testnode", ioshim, netpolInBackgroundCfg, nil) + stopCh := make(chan struct{}, 1) + dp, err := NewDataPlane("testnode", ioshim, netpolInBackgroundCfg, stopCh) require.NoError(t, err) + defer func() { + stopCh <- struct{}{} + time.Sleep(100 * time.Millisecond) + }() require.NoError(t, dp.AddPolicy(&testPolicyobj)) require.NoError(t, dp.AddPolicy(&testPolicy2)) diff --git a/npm/pkg/dataplane/dataplane_test.go b/npm/pkg/dataplane/dataplane_test.go index 00e40aaf29..be89796949 100644 --- a/npm/pkg/dataplane/dataplane_test.go +++ b/npm/pkg/dataplane/dataplane_test.go @@ -2,6 +2,7 @@ package dataplane import ( "testing" + "time" "github.com/Azure/azure-container-networking/common" "github.com/Azure/azure-container-networking/npm/metrics" @@ -79,8 +80,13 @@ func TestNewDataPlane(t *testing.T) { calls := getBootupTestCalls() ioshim := common.NewMockIOShim(calls) defer ioshim.VerifyCalls(t, calls) - dp, err := NewDataPlane("testnode", ioshim, dpCfg, nil) + stopCh := make(chan struct{}, 1) + dp, err := NewDataPlane("testnode", ioshim, dpCfg, stopCh) require.NoError(t, err) + defer func() { + stopCh <- struct{}{} + time.Sleep(100 * time.Millisecond) + }() assert.NotNil(t, dp) } @@ -90,8 +96,13 @@ func TestCreateAndDeleteIpSets(t *testing.T) { calls := getBootupTestCalls() ioshim := common.NewMockIOShim(calls) defer ioshim.VerifyCalls(t, calls) - dp, err := NewDataPlane("testnode", ioshim, dpCfg, nil) + stopCh := make(chan struct{}, 1) + dp, err := NewDataPlane("testnode", ioshim, dpCfg, stopCh) require.NoError(t, err) + defer func() { + stopCh <- struct{}{} + time.Sleep(100 * time.Millisecond) + }() assert.NotNil(t, dp) setsTocreate := []*ipsets.IPSetMetadata{ { @@ -132,8 +143,13 @@ func TestAddToSet(t *testing.T) { calls := getBootupTestCalls() ioshim := common.NewMockIOShim(calls) defer ioshim.VerifyCalls(t, calls) - dp, err := NewDataPlane("testnode", ioshim, dpCfg, nil) + stopCh := make(chan struct{}, 1) + dp, err := NewDataPlane("testnode", ioshim, dpCfg, stopCh) require.NoError(t, err) + defer func() { + stopCh <- struct{}{} + time.Sleep(100 * time.Millisecond) + }() setsTocreate := []*ipsets.IPSetMetadata{ { @@ -196,8 +212,13 @@ func TestApplyPolicy(t *testing.T) { calls := append(getBootupTestCalls(), getAddPolicyTestCallsForDP(&testPolicyobj)...) ioshim := common.NewMockIOShim(calls) defer ioshim.VerifyCalls(t, calls) - dp, err := NewDataPlane("testnode", ioshim, dpCfg, nil) + stopCh := make(chan struct{}, 1) + dp, err := NewDataPlane("testnode", ioshim, dpCfg, stopCh) require.NoError(t, err) + defer func() { + stopCh <- struct{}{} + time.Sleep(100 * time.Millisecond) + }() err = dp.AddPolicy(&testPolicyobj) require.NoError(t, err) @@ -210,8 +231,13 @@ func TestRemovePolicy(t *testing.T) { calls = append(calls, getRemovePolicyTestCallsForDP(&testPolicyobj)...) ioshim := common.NewMockIOShim(calls) defer ioshim.VerifyCalls(t, calls) - dp, err := NewDataPlane("testnode", ioshim, dpCfg, nil) + stopCh := make(chan struct{}, 1) + dp, err := NewDataPlane("testnode", ioshim, dpCfg, stopCh) require.NoError(t, err) + defer func() { + stopCh <- struct{}{} + time.Sleep(100 * time.Millisecond) + }() err = dp.AddPolicy(&testPolicyobj) require.NoError(t, err) @@ -234,8 +260,13 @@ func TestHandle2977(t *testing.T) { calls = append(calls, getAddPolicyTestCallsForDP(&testPolicyobj)...) ioshim := common.NewMockIOShim(calls) defer ioshim.VerifyCalls(t, calls) - dp, err := NewDataPlane("testnode", ioshim, dpCfg, nil) + stopCh := make(chan struct{}, 1) + dp, err := NewDataPlane("testnode", ioshim, dpCfg, stopCh) require.NoError(t, err) + defer func() { + stopCh <- struct{}{} + time.Sleep(100 * time.Millisecond) + }() err = dp.AddPolicy(&testPolicyobj) require.NoError(t, err) @@ -263,8 +294,13 @@ func TestUpdatePolicy(t *testing.T) { calls = append(calls, getAddPolicyTestCallsForDP(&updatedTestPolicyobj)...) ioshim := common.NewMockIOShim(calls) defer ioshim.VerifyCalls(t, calls) - dp, err := NewDataPlane("testnode", ioshim, dpCfg, nil) + stopCh := make(chan struct{}, 1) + dp, err := NewDataPlane("testnode", ioshim, dpCfg, stopCh) require.NoError(t, err) + defer func() { + stopCh <- struct{}{} + time.Sleep(100 * time.Millisecond) + }() err = dp.AddPolicy(&testPolicyobj) require.NoError(t, err) diff --git a/npm/pkg/dataplane/dataplane_windows_test.go b/npm/pkg/dataplane/dataplane_windows_test.go index 5cd69a23c2..38646f43ac 100644 --- a/npm/pkg/dataplane/dataplane_windows_test.go +++ b/npm/pkg/dataplane/dataplane_windows_test.go @@ -26,9 +26,14 @@ func TestMetrics(t *testing.T) { hns := ipsets.GetHNSFake(t, cfg.NetworkName) hns.Delay = defaultHNSLatency io := common.NewMockIOShimWithFakeHNS(hns) - dp, err := NewDataPlane(thisNode, io, cfg, nil) + stopCh := make(chan struct{}, 1) + dp, err := NewDataPlane(thisNode, io, cfg, stopCh) require.NoError(t, err, "failed to initialize dp") require.NotNil(t, dp, "failed to initialize dp (nil)") + defer func() { + stopCh <- struct{}{} + time.Sleep(100 * time.Millisecond) + }() count, err := metrics.TotalGetNetworkLatencyCalls() require.Nil(t, err, "failed to get metric") @@ -102,9 +107,14 @@ func testSerialCases(t *testing.T, tests []*SerialTestCase, finalSleep time.Dura require.Nil(t, err, "failed to create initial endpoint %+v", ep) } - dp, err := NewDataPlane(thisNode, io, tt.DpCfg, nil) + stopCh := make(chan struct{}, 1) + dp, err := NewDataPlane(thisNode, io, tt.DpCfg, stopCh) require.NoError(t, err, "failed to initialize dp") require.NotNil(t, dp, "failed to initialize dp (nil)") + defer func() { + stopCh <- struct{}{} + time.Sleep(100 * time.Millisecond) + }() dp.RunPeriodicTasks() @@ -142,8 +152,13 @@ func testMultiJobCases(t *testing.T, tests []*MultiJobTestCase, finalSleep time. } // the dp is necessary for NPM tests - dp, err := NewDataPlane(thisNode, io, tt.DpCfg, nil) + stopCh := make(chan struct{}, 1) + dp, err := NewDataPlane(thisNode, io, tt.DpCfg, stopCh) require.NoError(t, err, "failed to initialize dp") + defer func() { + stopCh <- struct{}{} + time.Sleep(100 * time.Millisecond) + }() dp.RunPeriodicTasks() diff --git a/npm/pkg/dataplane/ipsets/ipsetmanager.go b/npm/pkg/dataplane/ipsets/ipsetmanager.go index b57bf67e91..8f85810000 100644 --- a/npm/pkg/dataplane/ipsets/ipsetmanager.go +++ b/npm/pkg/dataplane/ipsets/ipsetmanager.go @@ -93,7 +93,8 @@ func (iMgr *IPSetManager) Reconcile() { } numRemovedSets := originalNumSets - len(iMgr.setMap) if numRemovedSets > 0 { - klog.Infof("[IPSetManager] removed %d empty/unreferenced ipsets, updating toDeleteCache to: %+v", numRemovedSets, iMgr.dirtyCache.printDeleteCache()) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[IPSetManager] removed %d empty/unreferenced ipsets, updating toDeleteCache to: %+v", numRemovedSets, iMgr.dirtyCache.printDeleteCache()) } } @@ -308,10 +309,11 @@ func (iMgr *IPSetManager) RemoveFromSets(removeFromSets []*IPSetMetadata, ip, po } // in case the IP belongs to a new Pod, then ignore this Delete call as this might be stale if cachedPodKey != podKey { - klog.Infof( - "[IPSetManager] DeleteFromSet: PodOwner has changed for Ip: %s, setName:%s, Old podKey: %s, new podKey: %s. Ignore the delete as this is stale update", - ip, prefixedName, cachedPodKey, podKey, - ) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof( + // "[IPSetManager] DeleteFromSet: PodOwner has changed for Ip: %s, setName:%s, Old podKey: %s, new podKey: %s. Ignore the delete as this is stale update", + // ip, prefixedName, cachedPodKey, podKey, + // ) continue } @@ -453,14 +455,16 @@ func (iMgr *IPSetManager) ApplyIPSets() error { defer iMgr.Unlock() if iMgr.dirtyCache.numSetsToAddOrUpdate() == 0 && iMgr.dirtyCache.numSetsToDelete() == 0 { - klog.Info("[IPSetManager] No IPSets to apply") + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Info("[IPSetManager] No IPSets to apply") return nil } - klog.Infof( - "[IPSetManager] dirty caches. toAddUpdateCache: %s, toDeleteCache: %s", - iMgr.dirtyCache.printAddOrUpdateCache(), iMgr.dirtyCache.printDeleteCache(), - ) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof( + // "[IPSetManager] dirty caches. toAddUpdateCache: %s, toDeleteCache: %s", + // iMgr.dirtyCache.printAddOrUpdateCache(), iMgr.dirtyCache.printDeleteCache(), + // ) iMgr.sanitizeDirtyCache() // Call the appropriate apply ipsets diff --git a/npm/pkg/dataplane/ipsets/ipsetmanager_linux.go b/npm/pkg/dataplane/ipsets/ipsetmanager_linux.go index 1e1f2eaf7e..4c57cfa20a 100644 --- a/npm/pkg/dataplane/ipsets/ipsetmanager_linux.go +++ b/npm/pkg/dataplane/ipsets/ipsetmanager_linux.go @@ -67,36 +67,38 @@ var ( ) /* - based on ipset list output with azure-npm- prefix, create an ipset restore file where we flush all sets first, then destroy all sets - - NOTE: the behavior has changed to run two separate restore files. The first to flush all, the second to destroy all. In between restores, - we determine if there are any sets with leaked ipset reference counts. We ignore destroys for those sets in-line with v1. - - overall error handling: - - if flush fails because the set doesn't exist (should never happen because we're listing sets right before), then ignore it and the destroy - - if flush fails otherwise, then add to destroyFailureCount and continue (aborting the destroy too) - - if destroy fails because the set doesn't exist (should never happen since the flush operation would have worked), then ignore it - - if destroy fails for another reason, then ignore it and add to destroyFailureCount and mark for reconcile (TODO) - - example: - grep output: - azure-npm-123456 - azure-npm-987654 - azure-npm-777777 - - example restore file [flag meanings: -F (flush), -X (destroy)]: - -F azure-npm-123456 - -F azure-npm-987654 - -F azure-npm-777777 - -X azure-npm-123456 - -X azure-npm-987654 - -X azure-npm-777777 - - prometheus metrics: - After this function, NumIPSets should be 0 or the number of NPM IPSets that existed and failed to be destroyed. - When NPM restarts, Prometheus metrics will initialize at 0, but NPM IPSets may exist. - We will reset ipset entry metrics if the restore succeeds whether or not some flushes/destroys failed (NOTE: this is different behavior than v1). - If a flush fails, we could update the num entries for that set, but that would be a lot of overhead. +based on ipset list output with azure-npm- prefix, create an ipset restore file where we flush all sets first, then destroy all sets + +NOTE: the behavior has changed to run two separate restore files. The first to flush all, the second to destroy all. In between restores, +we determine if there are any sets with leaked ipset reference counts. We ignore destroys for those sets in-line with v1. + +overall error handling: +- if flush fails because the set doesn't exist (should never happen because we're listing sets right before), then ignore it and the destroy +- if flush fails otherwise, then add to destroyFailureCount and continue (aborting the destroy too) +- if destroy fails because the set doesn't exist (should never happen since the flush operation would have worked), then ignore it +- if destroy fails for another reason, then ignore it and add to destroyFailureCount and mark for reconcile (TODO) + +example: + + grep output: + azure-npm-123456 + azure-npm-987654 + azure-npm-777777 + + example restore file [flag meanings: -F (flush), -X (destroy)]: + -F azure-npm-123456 + -F azure-npm-987654 + -F azure-npm-777777 + -X azure-npm-123456 + -X azure-npm-987654 + -X azure-npm-777777 + +prometheus metrics: + + After this function, NumIPSets should be 0 or the number of NPM IPSets that existed and failed to be destroyed. + When NPM restarts, Prometheus metrics will initialize at 0, but NPM IPSets may exist. + We will reset ipset entry metrics if the restore succeeds whether or not some flushes/destroys failed (NOTE: this is different behavior than v1). + If a flush fails, we could update the num entries for that set, but that would be a lot of overhead. */ func (iMgr *IPSetManager) resetIPSets() error { if success := iMgr.resetWithoutRestore(); success { @@ -106,7 +108,8 @@ func (iMgr *IPSetManager) resetIPSets() error { // get current NPM ipsets listNamesCommand := iMgr.ioShim.Exec.Command(ipsetCommand, ipsetListFlag, ipsetNameFlag) grepCommand := iMgr.ioShim.Exec.Command(ioutil.Grep, azureNPMPrefix) - klog.Infof("running this command while resetting ipsets: [%s %s %s | %s %s]", ipsetCommand, ipsetListFlag, ipsetNameFlag, ioutil.Grep, azureNPMRegex) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("running this command while resetting ipsets: [%s %s %s | %s %s]", ipsetCommand, ipsetListFlag, ipsetNameFlag, ioutil.Grep, azureNPMRegex) azureIPSets, haveAzureNPMIPSets, commandError := ioutil.PipeCommandToGrep(listNamesCommand, grepCommand) if commandError != nil { return npmerrors.SimpleErrorWrapper("failed to run ipset list for resetting IPSets (prometheus metrics may be off now)", commandError) @@ -143,8 +146,9 @@ func (iMgr *IPSetManager) resetIPSets() error { func (iMgr *IPSetManager) resetWithoutRestore() bool { listNamesCommand := iMgr.ioShim.Exec.Command(ipsetCommand, ipsetListFlag, ipsetNameFlag) grepCommand := iMgr.ioShim.Exec.Command(ioutil.Grep, ioutil.GrepQuietFlag, ioutil.GrepAntiMatchFlag, azureNPMPrefix) - commandString := fmt.Sprintf(" [%s %s %s | %s %s %s %s]", ipsetCommand, ipsetListFlag, ipsetNameFlag, ioutil.Grep, ioutil.GrepQuietFlag, ioutil.GrepAntiMatchFlag, azureNPMPrefix) - klog.Infof("running this command while resetting ipsets: [%s]", commandString) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // commandString := fmt.Sprintf(" [%s %s %s | %s %s %s %s]", ipsetCommand, ipsetListFlag, ipsetNameFlag, ioutil.Grep, ioutil.GrepQuietFlag, ioutil.GrepAntiMatchFlag, azureNPMPrefix) + // klog.Infof("running this command while resetting ipsets: [%s]", commandString) _, haveNonAzureNPMIPSets, commandError := ioutil.PipeCommandToGrep(listNamesCommand, grepCommand) if commandError != nil { metrics.SendErrorLogAndMetric(util.IpsmID, "failed to determine if there were non-azure sets while resetting. err: %v", commandError) @@ -155,7 +159,8 @@ func (iMgr *IPSetManager) resetWithoutRestore() bool { } flushAndDestroy := iMgr.ioShim.Exec.Command(util.BashCommand, util.BashCommandFlag, ipsetFlushAndDestroyString) - klog.Infof("running this command while resetting ipsets: [%s %s '%s']", util.BashCommand, util.BashCommandFlag, ipsetFlushAndDestroyString) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("running this command while resetting ipsets: [%s %s '%s']", util.BashCommand, util.BashCommandFlag, ipsetFlushAndDestroyString) output, err := flushAndDestroy.CombinedOutput() if err != nil { exitCode := -1 @@ -221,9 +226,10 @@ func (iMgr *IPSetManager) setsWithReferences() map[string]struct{} { listAllCommand := iMgr.ioShim.Exec.Command(ipsetCommand, ipsetListFlag) grep1 := iMgr.ioShim.Exec.Command(ioutil.Grep, ioutil.GrepBeforeFlag, referenceGrepLookBack, ioutil.GrepRegexFlag, positiveRefsRegex) grep2 := iMgr.ioShim.Exec.Command(ioutil.Grep, ioutil.GrepOnlyMatchingFlag, ioutil.GrepRegexFlag, azureNPMRegex) - klog.Infof("running this command while resetting ipsets: [%s %s | %s %s %s %s %s | %s %s %s %s]", ipsetCommand, ipsetListFlag, - ioutil.Grep, ioutil.GrepBeforeFlag, referenceGrepLookBack, ioutil.GrepRegexFlag, positiveRefsRegex, - ioutil.Grep, ioutil.GrepOnlyMatchingFlag, ioutil.GrepRegexFlag, azureNPMRegex) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("running this command while resetting ipsets: [%s %s | %s %s %s %s %s | %s %s %s %s]", ipsetCommand, ipsetListFlag, + // ioutil.Grep, ioutil.GrepBeforeFlag, referenceGrepLookBack, ioutil.GrepRegexFlag, positiveRefsRegex, + // ioutil.Grep, ioutil.GrepOnlyMatchingFlag, ioutil.GrepRegexFlag, azureNPMRegex) setsWithReferencesBytes, haveRefsStill, err := ioutil.DoublePipeToGrep(listAllCommand, grep1, grep2) var setsWithReferences map[string]struct{} @@ -385,6 +391,7 @@ func (iMgr *IPSetManager) applyIPSetsWithSaveFile() error { See error handling in applyIPSetsWithSaveFile(). overall format for ipset restore file: + [creates] (random order) [deletes and adds] (sets in random order, where each set has deletes first (random order), then adds (random order)) [flushes] (random order) diff --git a/npm/pkg/dataplane/types.go b/npm/pkg/dataplane/types.go index 2b06c5c079..dd17c714dc 100644 --- a/npm/pkg/dataplane/types.go +++ b/npm/pkg/dataplane/types.go @@ -144,7 +144,8 @@ func (c *updatePodCache) enqueue(m *PodMetadata) *updateNPMPod { } if !ok { - klog.Infof("[DataPlane] pod key %s not found in updatePodCache. creating a new obj", m.PodKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] pod key %s not found in updatePodCache. creating a new obj", m.PodKey) pod = newUpdateNPMPod(m) c.cache[m.PodKey] = pod @@ -157,7 +158,8 @@ func (c *updatePodCache) enqueue(m *PodMetadata) *updateNPMPod { // dequeue returns the first pod in the queue and removes it from the queue. func (c *updatePodCache) dequeue() *updateNPMPod { if c.isEmpty() { - klog.Infof("[DataPlane] updatePodCache is empty. returning nil for dequeue()") + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] updatePodCache is empty. returning nil for dequeue()") return nil } @@ -177,7 +179,8 @@ func (c *updatePodCache) dequeue() *updateNPMPod { func (c *updatePodCache) requeue(pod *updateNPMPod) { if _, ok := c.cache[pod.PodKey]; ok { // should not happen - klog.Infof("[DataPlane] pod key %s already exists in updatePodCache. skipping requeue", pod.PodKey) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("[DataPlane] pod key %s already exists in updatePodCache. skipping requeue", pod.PodKey) return } @@ -208,11 +211,12 @@ func (q *netPolQueue) len() int { // enqueue adds a NetPol to the queue. If the NetPol already exists in the queue, the NetPol object is updated. func (q *netPolQueue) enqueue(policy *policies.NPMNetworkPolicy) { - if _, ok := q.toAdd[policy.PolicyKey]; ok { - klog.Infof("[DataPlane] policy %s exists in netPolQueue. updating", policy.PolicyKey) - } else { - klog.Infof("[DataPlane] enqueuing policy %s in netPolQueue", policy.PolicyKey) - } + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // if _, ok := q.toAdd[policy.PolicyKey]; ok { + // klog.Infof("[DataPlane] policy %s exists in netPolQueue. updating", policy.PolicyKey) + // } else { + // klog.Infof("[DataPlane] enqueuing policy %s in netPolQueue", policy.PolicyKey) + // } q.toAdd[policy.PolicyKey] = policy } diff --git a/npm/util/const.go b/npm/util/const.go index 25a954a354..59ffa2ef05 100644 --- a/npm/util/const.go +++ b/npm/util/const.go @@ -238,8 +238,6 @@ const ( AiInitializeRetryCount int = 3 AiInitializeRetryInMin int = 1 - DebugMode bool = true - ErrorValue float64 = 1 ) diff --git a/npm/util/ioutil/restore.go b/npm/util/ioutil/restore.go index 9255b01ee9..21933ca29b 100644 --- a/npm/util/ioutil/restore.go +++ b/npm/util/ioutil/restore.go @@ -185,7 +185,8 @@ func (creator *FileCreator) runCommandOnceWithFile(fileString, cmd string, args return false, nil } - klog.Infof("running this restore command: [%s]", commandString) + // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level + // klog.Infof("running this restore command: [%s]", commandString) if creator.verbose { creator.logLines(commandString) From 801a2f5bbc4acfb60e41bebfa8a7cb1dcca36dfe Mon Sep 17 00:00:00 2001 From: Isaiah Raya Date: Tue, 29 Apr 2025 17:53:55 +0000 Subject: [PATCH 2/4] removed hostusers misconfigruation --- npm/azure-npm.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/npm/azure-npm.yaml b/npm/azure-npm.yaml index 137a2c5665..a19a1b974e 100644 --- a/npm/azure-npm.yaml +++ b/npm/azure-npm.yaml @@ -112,7 +112,6 @@ spec: - name: tmp mountPath: /tmp hostNetwork: true - hostUsers: false nodeSelector: kubernetes.io/os: linux volumes: From 2115fd0fc908355ef804f5204a1155323a7918eb Mon Sep 17 00:00:00 2001 From: Isaiah Raya Date: Thu, 1 May 2025 17:47:28 +0000 Subject: [PATCH 3/4] added service tag to public ip to comply with SFI rules --- .pipelines/npm/npm-conformance-tests.yaml | 27 +++++++++++++++++++++-- .pipelines/npm/npm-scale-test.yaml | 16 +++++++++++++- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/.pipelines/npm/npm-conformance-tests.yaml b/.pipelines/npm/npm-conformance-tests.yaml index 1c76e11890..2d4661c7ca 100644 --- a/.pipelines/npm/npm-conformance-tests.yaml +++ b/.pipelines/npm/npm-conformance-tests.yaml @@ -121,6 +121,7 @@ stages: RESOURCE_GROUP: $[ stagedependencies.setup.setup.outputs['EnvironmentalVariables.RESOURCE_GROUP'] ] TAG: $[ stagedependencies.setup.setup.outputs['EnvironmentalVariables.TAG'] ] FQDN: empty + PUBLIC_IP_NAME: $(RESOURCE_GROUP)-$(PROFILE)-public-ip steps: - checkout: self @@ -135,6 +136,23 @@ stages: echo created RG $(RESOURCE_GROUP) in $(LOCATION) az version + - task: AzureCLI@2 + displayName: "Create public IP with a service tag" + inputs: + azureSubscription: $(BUILD_VALIDATIONS_SERVICE_CONNECTION) + scriptType: "bash" + scriptLocation: "inlineScript" + inlineScript: | + az network public-ip create \ + --name $(PUBLIC_IP_NAME) \ + --resource-group $(RESOURCE_GROUP) \ + --allocation-method Static \ + --ip-tags 'FirstPartyUsage=/DelegatedNetworkControllerTest' \ + --location $(LOCATION) \ + --sku Standard \ + --tier Regional \ + --version IPv4 + - task: AzureCLI@2 displayName: "Deploy NPM to Test Cluster" inputs: @@ -148,6 +166,9 @@ stages: chmod +x kubectl echo Cluster $(AZURE_CLUSTER) echo Resource $(RESOURCE_GROUP) + echo Public IP $(PUBLIC_IP_NAME) + export PUBLIC_IP_ID=$(az network public-ip show -g $(RESOURCE_GROUP) -n $(PUBLIC_IP_NAME) --query id -o tsv) + echo Public IP ID $PUBLIC_IP_ID if [[ $(AZURE_CLUSTER) == *ws22 ]] # * is used for pattern matching then @@ -164,7 +185,8 @@ stages: --network-plugin azure \ --vm-set-type VirtualMachineScaleSets \ --node-vm-size Standard_D4s_v3 \ - --node-count 1 + --node-count 1 \ + --load-balancer-outbound-ips $PUBLIC_IP_ID if [ $? != 0 ] then @@ -211,7 +233,8 @@ stages: az aks create --no-ssh-key \ --resource-group $(RESOURCE_GROUP) \ --name $(AZURE_CLUSTER) \ - --network-plugin azure + --network-plugin azure \ + --load-balancer-outbound-ips $PUBLIC_IP_ID if [ $? != 0 ] then diff --git a/.pipelines/npm/npm-scale-test.yaml b/.pipelines/npm/npm-scale-test.yaml index 7117e4997d..6e6e63de21 100644 --- a/.pipelines/npm/npm-scale-test.yaml +++ b/.pipelines/npm/npm-scale-test.yaml @@ -141,6 +141,19 @@ jobs: echo "Creating resource group named $(RESOURCE_GROUP)" az group create --name $(RESOURCE_GROUP) -l $(LOCATION) -o table + export PUBLIC_IP_NAME=$(RESOURCE_GROUP)-$(PROFILE)-public-ip + echo "Creating public IP with a service tag named $PUBLIC_IP_NAME" + az network public-ip create \ + --name $PUBLIC_IP_NAME \ + --resource-group $(RESOURCE_GROUP) \ + --allocation-method Static \ + --ip-tags 'FirstPartyUsage=/DelegatedNetworkControllerTest' \ + --location $(LOCATION) \ + --sku Standard \ + --tier Regional \ + --version IPv4 + export PUBLIC_IP_ID=$(az network public-ip show -g $(RESOURCE_GROUP) -n $PUBLIC_IP_NAME --query id -o tsv) + export CLUSTER_NAME=$(RESOURCE_GROUP)-$(PROFILE) echo "Creating cluster named $CLUSTER_NAME" az aks create \ @@ -154,7 +167,8 @@ jobs: --node-vm-size Standard_D4s_v3 \ --node-count 1 \ --tier standard \ - --max-pods 100 + --max-pods 100 \ + --load-balancer-outbound-ips $PUBLIC_IP_ID echo "Getting credentials to $CLUSTER_NAME" az aks get-credentials -g $(RESOURCE_GROUP) -n $CLUSTER_NAME --overwrite-existing --file ./kubeconfig From b7d68b13c3321b00c4b5529795f3ac30780ed9f4 Mon Sep 17 00:00:00 2001 From: Isaiah Raya Date: Thu, 1 May 2025 17:52:05 +0000 Subject: [PATCH 4/4] changed name so that its a unique public ip --- .pipelines/npm/npm-conformance-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/npm/npm-conformance-tests.yaml b/.pipelines/npm/npm-conformance-tests.yaml index 2d4661c7ca..18ad6d3756 100644 --- a/.pipelines/npm/npm-conformance-tests.yaml +++ b/.pipelines/npm/npm-conformance-tests.yaml @@ -110,7 +110,7 @@ stages: IS_STRESS_TEST: "false" v2-linux-stress: AZURE_CLUSTER: "conformance-v2-linux-stress" - PROFILE: "v2-background" + PROFILE: "v2-linux-stress" IS_STRESS_TEST: "true" pool: name: $(BUILD_POOL_NAME_DEFAULT)