-
Notifications
You must be signed in to change notification settings - Fork 17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Wait for updated cache when patching status #245
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,7 @@ import ( | |
"k8s.io/apimachinery/pkg/runtime" | ||
utilerrors "k8s.io/apimachinery/pkg/util/errors" | ||
"k8s.io/apimachinery/pkg/util/intstr" | ||
"k8s.io/apimachinery/pkg/util/wait" | ||
"k8s.io/client-go/tools/record" | ||
"k8s.io/utils/pointer" | ||
ctrl "sigs.k8s.io/controller-runtime" | ||
|
@@ -219,7 +220,7 @@ func (r *NodeHealthCheckReconciler) Reconcile(ctx context.Context, req ctrl.Requ | |
if finalRequeueAfter != nil { | ||
result.RequeueAfter = *finalRequeueAfter | ||
} | ||
patchErr := r.patchStatus(nhc, nhcOrig) | ||
patchErr := r.patchStatus(nhc, nhcOrig, ctx) | ||
if patchErr != nil { | ||
log.Error(err, "failed to update status") | ||
// check if we have an error from the rest of the code already | ||
|
@@ -733,7 +734,7 @@ func (r *NodeHealthCheckReconciler) isControlPlaneRemediationAllowed(node *v1.No | |
return len(controlPlaneRemediationCRs) == 0, nil | ||
} | ||
|
||
func (r *NodeHealthCheckReconciler) patchStatus(nhc, nhcOrig *remediationv1alpha1.NodeHealthCheck) error { | ||
func (r *NodeHealthCheckReconciler) patchStatus(nhc, nhcOrig *remediationv1alpha1.NodeHealthCheck, ctx context.Context) error { | ||
|
||
log := utils.GetLogWithNHC(r.Log, nhc) | ||
|
||
|
@@ -767,7 +768,33 @@ func (r *NodeHealthCheckReconciler) patchStatus(nhc, nhcOrig *remediationv1alpha | |
log.Info("Patching NHC status", "new status", nhc.Status, "patch", string(patchBytes)) | ||
} | ||
|
||
return r.Client.Status().Patch(context.Background(), nhc, mergeFrom) | ||
// only update lastUpdate when there were other changes | ||
now := metav1.Now() | ||
nhc.Status.LastUpdateTime = &now | ||
|
||
if err := r.Client.Status().Patch(ctx, nhc, mergeFrom); err != nil { | ||
return err | ||
} | ||
|
||
// Wait until the cache is updated in order to prevent reading a stale status in the next reconcile | ||
// and making wrong decisions based on it. The chance to run into this is very low, because we use RequeueAfter | ||
// with a minimum delay of 1 second everywhere instead of Requeue: true, but this needs to be fixed because | ||
// it bypasses the controller's rate limiter! | ||
err := wait.PollWithContext(ctx, 200*time.Millisecond, 5*time.Second, func(ctx context.Context) (bool, error) { | ||
tmpNhc := &remediationv1alpha1.NodeHealthCheck{} | ||
if err := r.Client.Get(ctx, client.ObjectKeyFromObject(nhc), tmpNhc); err != nil { | ||
if apierrors.IsNotFound(err) { | ||
// nothing we can do anymore | ||
return true, nil | ||
} | ||
return false, nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Return an error? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why should I return an error here? 🤔 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the function signature is fixed, it's an argument to wait.Poll() There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oho, I didn't notice that it's an argument to wait.Poll() |
||
} | ||
return tmpNhc.Status.LastUpdateTime != nil && (tmpNhc.Status.LastUpdateTime.Equal(nhc.Status.LastUpdateTime) || tmpNhc.Status.LastUpdateTime.After(nhc.Status.LastUpdateTime.Time)), nil | ||
}) | ||
if err != nil { | ||
return errors.Wrapf(err, "failed to wait for updated cache after status patch") | ||
} | ||
return nil | ||
} | ||
|
||
func (r *NodeHealthCheckReconciler) alertOldRemediationCR(remediationCR *unstructured.Unstructured) (bool, *time.Duration) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NIT: I think I have noticed that
wait.PollWithContext
is deprecated andwait.PollUntilContextTimeout
is advised to be used instead.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see that in the apimachinery version I use, but I will keep an eye on it when updating dependencies