Skip to content

K8SPG-804: Fix internal.percona.com/delete-backup finalizer #1182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions internal/controller/postgrescluster/pgbackrest.go
Original file line number Diff line number Diff line change
Expand Up @@ -2303,7 +2303,28 @@ func (r *Reconciler) reconcileDedicatedRepoHost(ctx context.Context,
// manually by the end-user
func (r *Reconciler) reconcileManualBackup(ctx context.Context,
postgresCluster *v1beta1.PostgresCluster, manualBackupJobs []*batchv1.Job,
serviceAccount *corev1.ServiceAccount, instances *observedInstances) error {
serviceAccount *corev1.ServiceAccount, instances *observedInstances,
) error {
// K8SPG-804: Get the current state of PostgresCluster.
// It's necessary to make internal.percona.com/delete-backup finalizer work.
// Because the reconcileManualBackup can get an outdated postgresCluster,
// resulting in a duplicated backup jobs per one pg-backup resources.
// For more information check the K8SPG-804 PR description.
currentPostgresCluster := new(v1beta1.PostgresCluster)
err := r.Client.Get(ctx, client.ObjectKeyFromObject(postgresCluster), currentPostgresCluster)
if client.IgnoreNotFound(err) != nil {
return err
}

// refPostgresCluster keeps pointer to the postgresCluster which is used in other reconcile functions
// It should be used to assign values to the postgresCluster inside this function
refPostgresCluster := postgresCluster

// If it's the first run of reconcileManualBackup .Status will be nil.
// Nothing will happen if we keep the old postgresCluster.
if !apierrors.IsNotFound(err) && currentPostgresCluster.Status.PGBackRest != nil {
postgresCluster = currentPostgresCluster
}

manualAnnotation := postgresCluster.GetAnnotations()[naming.PGBackRestBackup]
manualStatus := postgresCluster.Status.PGBackRest.ManualBackup
Expand Down Expand Up @@ -2396,7 +2417,7 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context,
meta.RemoveStatusCondition(&postgresCluster.Status.Conditions,
ConditionManualBackupSuccessful)

postgresCluster.Status.PGBackRest.ManualBackup = manualStatus
refPostgresCluster.Status.PGBackRest.ManualBackup = manualStatus
}

// if the status shows the Job is no longer in progress, then simply exit (which means a Job
Expand Down
59 changes: 21 additions & 38 deletions percona/controller/pgbackup/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"path"
"slices"
"strings"
"time"

"github.com/pkg/errors"
Expand Down Expand Up @@ -514,10 +513,17 @@ func finishBackup(ctx context.Context, c client.Client, pgBackup *v2.PerconaPGBa
if err != nil {
return nil, errors.Wrap(err, "get backup in progress")
}

if runningBackup != pgBackup.Name {
// This block only runs after all finalizer operations are complete.
// Or, it runs when the user deletes a backup object that never started.
// In both cases, treat this as the function's exit point.

return nil, nil
}

// deleteAnnotation deletes the annotation from the PerconaPGCluster
// and returns true when it's deleted from crunchy PostgresCluster.
deleteAnnotation := func(annotation string) (bool, error) {
pgCluster := new(v1beta1.PostgresCluster)
if err := c.Get(ctx, types.NamespacedName{Name: pgBackup.Spec.PGCluster, Namespace: pgBackup.Namespace}, pgCluster); err != nil {
Expand Down Expand Up @@ -555,24 +561,20 @@ func finishBackup(ctx context.Context, c client.Client, pgBackup *v2.PerconaPGBa
return nil, errors.Wrapf(err, "delete %s annotation", naming.PGBackRestBackup)
}
if !deleted {
// We should wait until the crunchy reconciler is finished.
// If we delete the job labels without waiting for the reconcile to finish, the Crunchy reconciler will
// receive the pgcluster with the "naming.PGBackRestBackup" annotation, but will not find the manual backup job.
// It will attempt to create a new job with the same name, failing and resulting in a scary error in the logs.
// We should wait until the annotation is deleted from crunchy cluster.
return &reconcile.Result{RequeueAfter: time.Second * 5}, nil
}

// Remove PGBackRest labels to prevent the job from being
// deleted by the cleanupRepoResources method.
// deleted by the cleanupRepoResources method and included in
// repoResources.manualBackupJobs used in reconcileManualBackup method
if job != nil {
if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
j := new(batchv1.Job)
if err := c.Get(ctx, client.ObjectKeyFromObject(job), j); err != nil {
if k8serrors.IsNotFound(err) {
return nil
}
return errors.Wrap(err, "get job")
}

for k := range naming.PGBackRestLabels(pgBackup.Spec.PGCluster) {
delete(j.Labels, k)
}
Expand All @@ -583,6 +585,7 @@ func finishBackup(ctx context.Context, c client.Client, pgBackup *v2.PerconaPGBa
}
}

// We should delete the pgbackrest status to be able to recreate backups with the same name.
if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
pgCluster := new(v1beta1.PostgresCluster)
if err := c.Get(ctx, types.NamespacedName{Name: pgBackup.Spec.PGCluster, Namespace: pgBackup.Namespace}, pgCluster); err != nil {
Expand All @@ -595,39 +598,19 @@ func finishBackup(ctx context.Context, c client.Client, pgBackup *v2.PerconaPGBa
return nil, errors.Wrap(err, "update postgrescluster")
}

deleted, err = deleteAnnotation(pNaming.AnnotationBackupInProgress)
_, err = deleteAnnotation(pNaming.AnnotationBackupInProgress)
if err != nil {
return nil, errors.Wrapf(err, "delete %s annotation", pNaming.AnnotationBackupInProgress)
}
if !deleted {
return &reconcile.Result{RequeueAfter: time.Second * 5}, nil
}

if job != nil && checkBackupJob(job) != v2.BackupSucceeded {
// Remove all crunchy labels to prevent the job from being included in
// repoResources.manualBackupJobs used in reconcileManualBackup method.
if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
j := new(batchv1.Job)
if err := c.Get(ctx, client.ObjectKeyFromObject(job), j); err != nil {
if k8serrors.IsNotFound(err) {
return nil
}
return errors.Wrap(err, "get job")
}

for k := range j.Labels {
if strings.HasPrefix(k, pNaming.PrefixCrunchy) {
delete(j.Labels, k)
}
}

return c.Update(ctx, j)
}); err != nil {
return nil, errors.Wrap(err, "delete backup job labels")
}
}
// Do not add any code after this comment.
//
// The code after the comment may or may not execute, depending on whether the
// crunchy cluster removes the AnnotationBackupInProgress annotation in time.
//
// Once AnnotationBackupInProgress is deleted, the successful return of finalizer must happen inside the
// `if runningBackup != pgBackup.Name { ... }` block.

return nil, nil
return &reconcile.Result{RequeueAfter: time.Second * 5}, nil
}

func startBackup(ctx context.Context, c client.Client, pb *v2.PerconaPGBackup) error {
Expand Down
Loading