Skip to content

Commit 18fa18d

Browse files
committed
Enhancement - Cleanup Orphaned PVCs
1 parent 69a18ea commit 18fa18d

File tree

10 files changed

+161
-2
lines changed

10 files changed

+161
-2
lines changed

pkg/common/config/config.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ const (
6868
// interval after which successful CnsRegisterVolumes will be cleaned up.
6969
// Current default value is set to 12 hours
7070
DefaultCnsRegisterVolumesCleanupIntervalInMin = 720
71+
// DefaultCnsPVCProtectionCleanupIntervalInMin is the default time interval after which
72+
// orphaned PVCs will be cleaned up.
73+
// Current default value is set to 12 hours
74+
DefaultCnsPVCProtectionCleanupIntervalInMin = 720
7175
// DefaultVolumeMigrationCRCleanupIntervalInMin is the default time interval
7276
// after which stale CnsVSphereVolumeMigration CRs will be cleaned up.
7377
// Current default value is set to 2 hours.
@@ -451,6 +455,9 @@ func validateConfig(ctx context.Context, cfg *Config) error {
451455
if cfg.Global.CnsRegisterVolumesCleanupIntervalInMin == 0 {
452456
cfg.Global.CnsRegisterVolumesCleanupIntervalInMin = DefaultCnsRegisterVolumesCleanupIntervalInMin
453457
}
458+
if cfg.Global.CnsPVCProtectionCleanupIntervalInMin == 0 {
459+
cfg.Global.CnsPVCProtectionCleanupIntervalInMin = DefaultCnsPVCProtectionCleanupIntervalInMin
460+
}
454461
if cfg.Global.VolumeMigrationCRCleanupIntervalInMin == 0 {
455462
cfg.Global.VolumeMigrationCRCleanupIntervalInMin = DefaultVolumeMigrationCRCleanupIntervalInMin
456463
}

pkg/common/config/types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ type Config struct {
5151
// CnsRegisterVolumesCleanupIntervalInMin specifies the interval after which
5252
// successful CnsRegisterVolumes will be cleaned up.
5353
CnsRegisterVolumesCleanupIntervalInMin int `gcfg:"cnsregistervolumes-cleanup-intervalinmin"`
54+
// CnsPVCProtectionCleanupIntervalInMin specifies the interval after which
55+
// orphaned PVCs will be cleaned up.
56+
CnsPVCProtectionCleanupIntervalInMin int `gcfg:"cnspvcprotection-cleanup-intervalinmin"`
5457
// VolumeMigrationCRCleanupIntervalInMin specifies the interval after which
5558
// stale CnsVSphereVolumeMigration CRs will be cleaned up.
5659
VolumeMigrationCRCleanupIntervalInMin int `gcfg:"volumemigration-cr-cleanup-intervalinmin"`

pkg/common/unittestcommon/types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
cnstypes "github.com/vmware/govmomi/cns/types"
2525
"github.com/vmware/govmomi/object"
2626
"github.com/vmware/govmomi/vim25/types"
27+
v1 "k8s.io/api/core/v1"
2728
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/apis/migration"
2829
cnsvolume "sigs.k8s.io/vsphere-csi-driver/v3/pkg/common/cns-lib/volume"
2930
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/common/cns-lib/vsphere"
@@ -47,6 +48,8 @@ type FakeK8SOrchestrator struct {
4748
featureStates map[string]string
4849
// CSINodeTopology instances for topology testing
4950
csiNodeTopologyInstances []interface{}
51+
// PVCs for testing
52+
pvcs []*v1.PersistentVolumeClaim
5053
}
5154

5255
// volumeMigration holds mocked migrated volume information

pkg/common/unittestcommon/utils.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,27 @@ func (c *FakeK8SOrchestrator) SetCSINodeTopologyInstances(instances []interface{
543543
c.csiNodeTopologyInstances = instances
544544
}
545545

546+
func (c *FakeK8SOrchestrator) ListPVCs(ctx context.Context, namespace string) []*v1.PersistentVolumeClaim {
547+
if namespace == "" {
548+
// Return all PVCs
549+
return c.pvcs
550+
}
551+
552+
// Filter by namespace
553+
var filteredPVCs []*v1.PersistentVolumeClaim
554+
for _, pvc := range c.pvcs {
555+
if pvc.Namespace == namespace {
556+
filteredPVCs = append(filteredPVCs, pvc)
557+
}
558+
}
559+
return filteredPVCs
560+
}
561+
562+
// SetPVCs sets the PVCs for testing
563+
func (c *FakeK8SOrchestrator) SetPVCs(pvcs []*v1.PersistentVolumeClaim) {
564+
c.pvcs = pvcs
565+
}
566+
546567
// configFromVCSim starts a vcsim instance and returns config for use against the
547568
// vcsim instance. The vcsim instance is configured with an empty tls.Config.
548569
func configFromVCSim(vcsimParams VcsimParams, isTopologyEnv bool) (*config.Config, func()) {

pkg/csi/service/common/commonco/coagnostic.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ type COCommonInterface interface {
128128
// GetPVCNamespacedNameByUID returns the PVC's namespaced name (namespace/name) for the given UID.
129129
// If the PVC is not found in the cache, it returns an empty string and false.
130130
GetPVCNamespacedNameByUID(uid string) (k8stypes.NamespacedName, bool)
131+
ListPVCs(ctx context.Context, namespace string) []*v1.PersistentVolumeClaim
131132
}
132133

133134
// GetContainerOrchestratorInterface returns orchestrator object for a given

pkg/csi/service/common/commonco/k8sorchestrator/k8sorchestrator.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"time"
3131

3232
"google.golang.org/grpc/codes"
33+
"k8s.io/apimachinery/pkg/labels"
3334
"k8s.io/client-go/util/retry"
3435

3536
snapshotterClientSet "github.com/kubernetes-csi/external-snapshotter/client/v8/clientset/versioned"
@@ -2286,3 +2287,15 @@ func GetPVCDataSource(ctx context.Context, claim *v1.PersistentVolumeClaim) (*v1
22862287
}
22872288
return &dataSource, nil
22882289
}
2290+
2291+
func (c *K8sOrchestrator) ListPVCs(ctx context.Context, namespace string) []*v1.PersistentVolumeClaim {
2292+
log := logger.GetLogger(ctx)
2293+
2294+
pvcs, err := c.informerManager.GetPVCLister().PersistentVolumeClaims(namespace).List(labels.Everything())
2295+
if err != nil {
2296+
log.With("namespace", namespace).Error("failed to list PVCs")
2297+
return []*v1.PersistentVolumeClaim{}
2298+
}
2299+
2300+
return pvcs
2301+
}

pkg/syncer/admissionhandler/cnscsi_admissionhandler_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ type MockCOCommonInterface struct {
4747
mock.Mock
4848
}
4949

50+
func (m *MockCOCommonInterface) ListPVCs(ctx context.Context, namespace string) []*corev1.PersistentVolumeClaim {
51+
//TODO implement me
52+
panic("implement me")
53+
}
54+
5055
func (m *MockCOCommonInterface) GetPVCNamespacedNameByUID(uid string) (apitypes.NamespacedName, bool) {
5156
//TODO implement me
5257
panic("implement me")

pkg/syncer/cnsoperator/controller/cnsregistervolume/cnsregistervolume_controller_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,11 @@ func (m *mockVolumeManager) SyncVolume(ctx context.Context,
224224

225225
type mockCOCommon struct{}
226226

227+
func (m *mockCOCommon) ListPVCs(ctx context.Context, namespace string) []*corev1.PersistentVolumeClaim {
228+
//TODO implement me
229+
panic("implement me")
230+
}
231+
227232
func (m *mockCOCommon) GetPVCNamespacedNameByUID(uid string) (types.NamespacedName, bool) {
228233
//TODO implement me
229234
panic("implement me")

pkg/syncer/cnsoperator/manager/cleanupcustomresources.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,15 @@ import (
2121
"time"
2222

2323
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24+
"k8s.io/client-go/kubernetes"
2425
"k8s.io/client-go/rest"
26+
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
2527
cnsoperatorv1alpha1 "sigs.k8s.io/vsphere-csi-driver/v3/pkg/apis/cnsoperator"
28+
batchattachv1a1 "sigs.k8s.io/vsphere-csi-driver/v3/pkg/apis/cnsoperator/cnsnodevmbatchattachment/v1alpha1"
2629
cnsregistervolumev1alpha1 "sigs.k8s.io/vsphere-csi-driver/v3/pkg/apis/cnsoperator/cnsregistervolume/v1alpha1"
2730
cnsunregistervolumev1alpha1 "sigs.k8s.io/vsphere-csi-driver/v3/pkg/apis/cnsoperator/cnsunregistervolume/v1alpha1"
31+
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/csi/service/common/commonco"
32+
cnsoperatortypes "sigs.k8s.io/vsphere-csi-driver/v3/pkg/syncer/cnsoperator/types"
2833

2934
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/csi/service/logger"
3035
k8s "sigs.k8s.io/vsphere-csi-driver/v3/pkg/kubernetes"
@@ -109,3 +114,76 @@ func cleanUpCnsUnregisterVolumeInstances(ctx context.Context, restClientConfig *
109114
}
110115
}
111116
}
117+
118+
var (
119+
newClientForGroup = k8s.NewClientForGroup
120+
newForConfig = func(config *rest.Config) (kubernetes.Interface, error) {
121+
return kubernetes.NewForConfig(config)
122+
}
123+
)
124+
125+
// cleanupPVCs removes the `CNSPvcFinalizer` from the PVCs in cases
126+
// where the CnsNodeVmBatchAttachment CR gets deleted before removing the finalizer.
127+
// This is EXTREMELY UNLIKELY to happen but still a possibility that has to be addressed.
128+
func cleanupPVCs(ctx context.Context, config rest.Config) {
129+
log := logger.GetLogger(ctx)
130+
131+
pvcList := commonco.ContainerOrchestratorUtility.ListPVCs(ctx, "")
132+
if len(pvcList) == 0 {
133+
log.Info("no PVCs found. Exiting...")
134+
return
135+
}
136+
137+
// map to hold all the PVCs that have the finalizer added by the batch attach reconciler
138+
pvcMap := make(map[string]map[string]struct{})
139+
for _, pvc := range pvcList {
140+
// Check if PVC has the CNS finalizer in metadata.finalizers
141+
if !controllerutil.ContainsFinalizer(pvc, cnsoperatortypes.CNSPvcFinalizer) {
142+
// not a PVC that is attached to or being attached to a VM. Can be ignored.
143+
continue
144+
}
145+
146+
if _, ok := pvcMap[pvc.Namespace]; !ok {
147+
pvcMap[pvc.Namespace] = map[string]struct{}{}
148+
}
149+
pvcMap[pvc.Namespace][pvc.Name] = struct{}{}
150+
}
151+
152+
cnsClient, err := newClientForGroup(ctx, &config, cnsoperatorv1alpha1.GroupName)
153+
if err != nil {
154+
log.Error("failed to create cns operator client")
155+
return
156+
}
157+
158+
batchAttachList := batchattachv1a1.CnsNodeVMBatchAttachmentList{}
159+
err = cnsClient.List(ctx, &batchAttachList)
160+
if err != nil {
161+
log.With("kind", batchAttachList.Kind).Error("listing failed")
162+
return
163+
}
164+
165+
for _, cr := range batchAttachList.Items {
166+
for _, vol := range cr.Spec.Volumes {
167+
// Any PVCs that are still in the spec can be safely ignored to be processed by the reconciler.
168+
delete(pvcMap[cr.Namespace], vol.PersistentVolumeClaim.ClaimName)
169+
}
170+
}
171+
172+
c, err := newForConfig(&config)
173+
if err != nil {
174+
log.Error("failed to create core API client")
175+
return
176+
}
177+
178+
// Remove the finalizer for the remaining PVCs
179+
for namespace, pvcs := range pvcMap {
180+
for name := range pvcs {
181+
err := k8s.RemoveFinalizerFromPVC(ctx, c, name, namespace, cnsoperatortypes.CNSPvcFinalizer)
182+
if err != nil {
183+
log.With("name", name).With("namespace", namespace).
184+
Error("failed to remove the finalizer")
185+
continue
186+
}
187+
}
188+
}
189+
}

pkg/syncer/cnsoperator/manager/init.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,29 @@ func InitCnsOperator(ctx context.Context, clusterFlavor cnstypes.CnsClusterFlavo
137137
log.Errorf("failed to create %q CRD. Err: %+v", crdNameNodeVmBatchAttachment, err)
138138
return err
139139
}
140+
141+
err = watcher(ctx, cnsOperator)
142+
if err != nil {
143+
log.Error("Failed to watch on config file for changes to "+
144+
"CnsPVCProtectionCleanupIntervalInMin. Error: %+v", err)
145+
return err
146+
}
147+
148+
// Start cleanup routine to remove orphaned PVC finalizers.
149+
// This handles cases where CnsNodeVMBatchAttachment CRs are deleted before
150+
// removing finalizers from their associated PVCs.
151+
log.Info("Starting go routine to cleanup orphaned PVC finalizers from batch attach.")
152+
go func() {
153+
for {
154+
ctx, log = logger.GetNewContextWithLogger()
155+
log.Info("Triggering PVC finalizer cleanup routine")
156+
cleanupPVCs(ctx, *restConfig)
157+
log.Info("Completed PVC finalizer cleanup")
158+
for i := 1; i <= cnsOperator.configInfo.Cfg.Global.CnsPVCProtectionCleanupIntervalInMin; i++ {
159+
time.Sleep(1 * time.Minute)
160+
}
161+
}
162+
}()
140163
}
141164

142165
// Create CnsVolumeMetadata CRD
@@ -162,7 +185,7 @@ func InitCnsOperator(ctx context.Context, clusterFlavor cnstypes.CnsClusterFlavo
162185
if stretchedSupervisor {
163186
log.Info("Observed stretchedSupervisor setup")
164187
}
165-
if !stretchedSupervisor || (stretchedSupervisor && syncer.IsPodVMOnStretchSupervisorFSSEnabled) {
188+
if !stretchedSupervisor || syncer.IsPodVMOnStretchSupervisorFSSEnabled {
166189
// Create CnsRegisterVolume CRD from manifest.
167190
log.Infof("Creating %q CRD", cnsoperatorv1alpha1.CnsRegisterVolumePlural)
168191
err = k8s.CreateCustomResourceDefinitionFromManifest(ctx, cnsoperatorconfig.EmbedCnsRegisterVolumeCRFile,
@@ -228,7 +251,7 @@ func InitCnsOperator(ctx context.Context, clusterFlavor cnstypes.CnsClusterFlavo
228251
}
229252
}
230253

231-
if !stretchedSupervisor || (stretchedSupervisor && syncer.IsWorkloadDomainIsolationSupported) {
254+
if !stretchedSupervisor || syncer.IsWorkloadDomainIsolationSupported {
232255
if cnsOperator.coCommonInterface.IsFSSEnabled(ctx, common.FileVolume) {
233256
// Create CnsFileAccessConfig CRD from manifest if file volume feature
234257
// is enabled.

0 commit comments

Comments
 (0)