Skip to content

Commit ab69c3b

Browse files
committed
tests pass again and scale reduction is now done via machine delete
1 parent 54ba516 commit ab69c3b

File tree

19 files changed

+382
-274
lines changed

19 files changed

+382
-274
lines changed

api/v1alpha3/tags.go

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,6 @@ func (t Tags) Equals(other Tags) bool {
2929
return reflect.DeepEqual(t, other)
3030
}
3131

32-
// HasMatchingSpecVersionHash returns true if the resource has been tagged with a matching resource spec hash value.
33-
func (t Tags) HasMatchingSpecVersionHash(hash string) bool {
34-
value, ok := t[SpecVersionHashTagKey()]
35-
return ok && value == hash
36-
}
37-
3832
// HasOwned returns true if the tags contains a tag that marks the resource as owned by the cluster from the perspective of this management tooling.
3933
func (t Tags) HasOwned(cluster string) bool {
4034
value, ok := t[ClusterTagKey(cluster)]
@@ -74,12 +68,6 @@ func (t Tags) Merge(other Tags) {
7468
}
7569
}
7670

77-
// AddSpecVersionHashTag adds a spec version hash to the Azure resource tags to determine if state has changed quickly
78-
func (t Tags) AddSpecVersionHashTag(hash string) Tags {
79-
t[SpecVersionHashTagKey()] = hash
80-
return t
81-
}
82-
8371
// ResourceLifecycle configures the lifecycle of a resource
8472
type ResourceLifecycle string
8573

@@ -138,11 +126,6 @@ const (
138126
VMTagsLastAppliedAnnotation = "sigs.k8s.io/cluster-api-provider-azure-last-applied-tags-vm"
139127
)
140128

141-
// SpecVersionHashTagKey is the key for the spec version hash used to enable quick spec difference comparison
142-
func SpecVersionHashTagKey() string {
143-
return fmt.Sprintf("%s%s", NameAzureProviderPrefix, "spec-version-hash")
144-
}
145-
146129
// ClusterTagKey generates the key for resources associated with a cluster.
147130
func ClusterTagKey(name string) string {
148131
return fmt.Sprintf("%s%s", NameAzureProviderOwned, name)

cloud/converters/vmss_test.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,12 @@ func Test_SDKToVMSS(t *testing.T) {
9999

100100
for i := 0; i < 2; i++ {
101101
expected.Instances[i] = infrav1exp.VMSSVM{
102-
ID: fmt.Sprintf("vm/%d", i),
103-
InstanceID: fmt.Sprintf("%d", i),
104-
Name: fmt.Sprintf("instance-00000%d", i),
105-
AvailabilityZone: fmt.Sprintf("zone%d", i),
106-
State: "Succeeded",
102+
ID: fmt.Sprintf("vm/%d", i),
103+
InstanceID: fmt.Sprintf("%d", i),
104+
Name: fmt.Sprintf("instance-00000%d", i),
105+
AvailabilityZone: fmt.Sprintf("zone%d", i),
106+
State: "Succeeded",
107+
LatestModelApplied: true,
107108
}
108109
}
109110
g.Expect(actual).To(gomega.Equal(&expected))

cloud/scope/machinepool.go

Lines changed: 84 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
"github.com/go-logr/logr"
2626
"github.com/pkg/errors"
2727
corev1 "k8s.io/api/core/v1"
28-
apierrors "k8s.io/apimachinery/pkg/api/errors"
2928
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3029
"k8s.io/apimachinery/pkg/types"
3130
"k8s.io/klog/klogr"
@@ -241,19 +240,85 @@ func (m *MachinePoolScope) applyAzureMachinePoolMachines(ctx context.Context) er
241240
}
242241
}
243242

244-
// determine which machines need to be deleted since they are not in Azure
245-
for key, val := range existingMachinesByProviderID {
246-
val := val
247-
if _, ok := latestMachinesByProviderID[key]; !ok {
248-
if err := m.client.Delete(ctx, &val); err != nil && !apierrors.IsNotFound(err) {
249-
return errors.Wrap(err, "failed deleting machine")
250-
}
243+
// select machines to delete to lower the replica count
244+
toDelete := m.selectMachinesToDelete(existingMachinesByProviderID)
245+
for _, machine := range toDelete {
246+
machine := machine
247+
if err := m.client.Delete(ctx, &machine); err != nil {
248+
return errors.Wrap(err, "failed deleting machine to reduce replica count")
251249
}
252250
}
253251

254252
return nil
255253
}
256254

255+
func getProviderIDs(machinesByProviderID map[string]infrav1exp.AzureMachinePoolMachine) []string {
256+
ids := make([]string, len(machinesByProviderID))
257+
idx := 0
258+
for k := range machinesByProviderID {
259+
ids[idx] = k
260+
idx++
261+
}
262+
return ids
263+
}
264+
265+
func (m *MachinePoolScope) selectMachinesToDelete(machinesByProviderID map[string]infrav1exp.AzureMachinePoolMachine) map[string]infrav1exp.AzureMachinePoolMachine {
266+
desiredReplicaCount := int(*m.MachinePool.Spec.Replicas)
267+
readyMachines := getReadyMachines(machinesByProviderID)
268+
if desiredReplicaCount > len(readyMachines) {
269+
m.V(4).Info("we don't have enough ready machines, so don't try to delete any", "desired", desiredReplicaCount, "ready", getProviderIDs(readyMachines))
270+
return nil
271+
}
272+
273+
readyAndNotMarkedForDelete := make(map[string]infrav1exp.AzureMachinePoolMachine)
274+
for providerID, machine := range readyMachines {
275+
if machine.ObjectMeta.DeletionTimestamp.IsZero() {
276+
// this is not marked for delete
277+
readyAndNotMarkedForDelete[providerID] = machine
278+
}
279+
}
280+
281+
numberOfOverProvisioned := len(machinesByProviderID) - desiredReplicaCount
282+
if numberOfOverProvisioned <= 0 {
283+
m.V(4).Info("no over-provisioned machines", "desired", desiredReplicaCount, "overprovisioned", numberOfOverProvisioned)
284+
return nil // no over-provisioned machines
285+
}
286+
287+
m.V(4).Info("found over-provisioned machines", "desired", desiredReplicaCount, "overprovisioned", numberOfOverProvisioned)
288+
// pick machines that are not
289+
numberofMachinesSelectedToBeDeleted := 0
290+
toDelete := make(map[string]infrav1exp.AzureMachinePoolMachine)
291+
for k, v := range machinesByProviderID {
292+
if _, ok := readyAndNotMarkedForDelete[k]; ok {
293+
// if not a known good machine move on
294+
continue
295+
}
296+
297+
toDelete[k] = v
298+
numberofMachinesSelectedToBeDeleted++
299+
if numberofMachinesSelectedToBeDeleted >= numberOfOverProvisioned {
300+
break
301+
}
302+
}
303+
304+
if len(toDelete) >= numberOfOverProvisioned {
305+
m.V(4).Info("found enough machines not ready or marked for delete", "toDelete", getProviderIDs(toDelete), "numToDelete", len(toDelete), "overprovisioned", numberOfOverProvisioned)
306+
return toDelete
307+
}
308+
309+
// we are still over-provisioned, select ready machines to delete
310+
for k, v := range readyAndNotMarkedForDelete {
311+
toDelete[k] = v
312+
numberofMachinesSelectedToBeDeleted++
313+
if numberofMachinesSelectedToBeDeleted >= numberOfOverProvisioned {
314+
break
315+
}
316+
}
317+
318+
m.V(4).Info("included ready machines to delete", "toDelete", getProviderIDs(toDelete), "numToDelete", len(toDelete), "overprovisioned", numberOfOverProvisioned)
319+
return toDelete
320+
}
321+
257322
func (m *MachinePoolScope) createMachine(ctx context.Context, machine infrav1exp.VMSSVM) error {
258323
if machine.InstanceID == "" {
259324
return errors.New("machine.InstanceID must not be empty")
@@ -524,3 +589,14 @@ func getAzureMachineTemplate(ctx context.Context, c client.Client, name, namespa
524589
}
525590
return m, nil
526591
}
592+
593+
func getReadyMachines(machinesByProviderID map[string]infrav1exp.AzureMachinePoolMachine) map[string]infrav1exp.AzureMachinePoolMachine {
594+
readyMachines := make(map[string]infrav1exp.AzureMachinePoolMachine)
595+
for k, v := range machinesByProviderID {
596+
if v.Status.Ready && v.Status.ProvisioningState != nil && *v.Status.ProvisioningState == infrav1.VMStateSucceeded {
597+
readyMachines[k] = v
598+
}
599+
}
600+
601+
return readyMachines
602+
}

cloud/scope/machinepoolmachine.go

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -133,18 +133,14 @@ func (s *MachinePoolMachineScope) ProvisioningState() infrav1.VMState {
133133
return ""
134134
}
135135

136-
// SetReady sets the AzureMachinePoolMachine Ready Status to true.
137-
func (s *MachinePoolMachineScope) SetReady() {
138-
s.AzureMachinePool.Status.Ready = true
139-
}
140-
141-
// SetNotReady sets the AzureMachinePoolMachine Ready Status to false.
142-
func (s *MachinePoolMachineScope) SetNotReady() {
143-
s.AzureMachinePool.Status.Ready = false
136+
// IsReady indicates the machine has successfully provisioned and has a node ref associated
137+
func (s *MachinePoolMachineScope) IsReady() bool {
138+
state := s.AzureMachinePoolMachine.Status.ProvisioningState
139+
return s.AzureMachinePoolMachine.Status.Ready && state != nil && *state == infrav1.VMStateSucceeded
144140
}
145141

146142
// SetFailureMessage sets the AzureMachinePoolMachine status failure message.
147-
func (s *MachinePoolMachineScope) SetFailureMessage(v error) {
143+
func (s *MachinePoolMachineScope) SetFailureMessage(v error) {
148144
s.AzureMachinePool.Status.FailureMessage = pointer.StringPtr(v.Error())
149145
}
150146

@@ -153,7 +149,6 @@ func (s *MachinePoolMachineScope) SetFailureReason(v capierrors.MachineStatusErr
153149
s.AzureMachinePool.Status.FailureReason = &v
154150
}
155151

156-
157152
// ProviderID returns the AzureMachinePool ID by parsing Spec.ProviderID.
158153
func (s *MachinePoolMachineScope) ProviderID() string {
159154
return s.AzureMachinePoolMachine.Spec.ProviderID
@@ -164,10 +159,6 @@ func (s *MachinePoolMachineScope) Close(ctx context.Context) error {
164159
ctx, span := tele.Tracer().Start(ctx, "scope.MachinePoolMachineScope.Close")
165160
defer span.End()
166161

167-
if err := s.updateState(ctx); err != nil {
168-
return errors.Wrap(err, "failed to update state")
169-
}
170-
171162
return s.patchHelper.Patch(ctx, s.AzureMachinePoolMachine)
172163
}
173164

@@ -199,7 +190,9 @@ func (s *MachinePoolMachineScope) Drain(ctx context.Context) error {
199190
return nil
200191
}
201192

202-
func (s *MachinePoolMachineScope) updateState(ctx context.Context) error {
193+
// UpdateStatus updates the node reference for the machine and other status fields. This func should be called at the
194+
// end of a reconcile request and after updating the scope with the most recent Azure data.
195+
func (s *MachinePoolMachineScope) UpdateStatus(ctx context.Context) error {
203196
ctx, span := tele.Tracer().Start(ctx, "scope.MachinePoolMachineScope.Get")
204197
defer span.End()
205198

cloud/services/scalesets/mock_scalesets/scalesets_mock.go

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cloud/services/scalesets/scalesets.go

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"github.com/Azure/go-autorest/autorest/to"
2828
"github.com/go-logr/logr"
2929
"github.com/pkg/errors"
30+
3031
"sigs.k8s.io/cluster-api-provider-azure/util/generators"
3132

3233
infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1alpha3"
@@ -50,6 +51,7 @@ type (
5051
NeedsK8sVersionUpdate() bool
5152
SaveK8sVersion()
5253
ScaleSetSpec() azure.ScaleSetSpec
54+
VMSSExtensionSpecs() []azure.VMSSExtensionSpec
5355
SetAnnotation(string, string)
5456
SetLongRunningOperationState(*infrav1.Future)
5557
SetProviderID(string)
@@ -176,7 +178,6 @@ func (s *Service) createVMSS(ctx context.Context) (*infrav1.Future, error) {
176178
}
177179

178180
vmss := result.VMSSWithoutHash
179-
vmss.Tags = converters.TagsToMap(result.Tags.AddSpecVersionHashTag(result.Hash))
180181
future, err := s.Client.CreateOrUpdateAsync(ctx, s.Scope.ResourceGroup(), spec.Name, vmss)
181182
if err != nil {
182183
return future, errors.Wrapf(err, "cannot create VMSS")
@@ -199,31 +200,25 @@ func (s *Service) patchVMSSIfNeeded(ctx context.Context, infraVMSS *infrav1exp.V
199200
return nil, errors.Wrapf(err, "failed to generate scale set update parameters for %s", spec.Name)
200201
}
201202

202-
if infraVMSS.Tags.HasMatchingSpecVersionHash(result.Hash) {
203-
// The VMSS built from the AzureMachinePool spec matches the hash in the tag of the existing VMSS. This means
204-
// the VMSS does not need to be patched since it has not changed.
205-
//
206-
// hash(AzureMachinePool.Spec)
207-
//
208-
// Note: if a user were to mutate the VMSS in Azure rather than through CAPZ, this hash match may match, but not
209-
// reflect the state of the specification in K8s.
210-
return nil, nil
211-
}
212-
213203
vmss := result.VMSSWithoutHash
214-
vmss.Tags = converters.TagsToMap(result.Tags.AddSpecVersionHashTag(result.Hash))
215204
patch, err := getVMSSUpdateFromVMSS(vmss)
216205
if err != nil {
217206
return nil, errors.Wrapf(err, "failed to generate vmss patch for %s", spec.Name)
218207
}
219208

220-
if s.Scope.MaxSurge() > 0 && hasModelModifyingDifferences(infraVMSS, vmss) {
209+
hasModelChanges := hasModelModifyingDifferences(infraVMSS, vmss)
210+
if s.Scope.MaxSurge() > 0 && hasModelChanges {
221211
// surge capacity with the intention of lowering during instance reconciliation
222212
patch.Sku.Capacity = to.Int64Ptr(*patch.Sku.Capacity + int64(s.Scope.MaxSurge()))
223213
}
224214

225-
// wipe out network profile, so updates won't conflict with Cloud Provider updates
226-
patch.VirtualMachineProfile.NetworkProfile = nil
215+
// If there are no model changes and no increase in the replica count, do not update the VMSS.
216+
// Decreases in replica count is handled by deleting AzureMachinePoolMachine instances in the MachinePoolScope
217+
if *patch.Sku.Capacity <= infraVMSS.Capacity && !hasModelChanges {
218+
s.Scope.V(4).Info("nothing to update on vmss", "scale set", spec.Name, "newReplicas", *patch.Sku.Capacity, "oldReplicas", infraVMSS.Capacity, "hasChanges", hasModelChanges)
219+
return nil, nil
220+
}
221+
227222
future, err := s.UpdateAsync(ctx, s.Scope.ResourceGroup(), spec.Name, patch)
228223
if err != nil {
229224
if azure.ResourceConflict(err) {
@@ -332,6 +327,8 @@ func (s *Service) buildVMSSFromSpec(ctx context.Context, vmssSpec azure.ScaleSet
332327
return result, err
333328
}
334329

330+
extensions := s.generateExtensions()
331+
335332
vmss := compute.VirtualMachineScaleSet{
336333
Location: to.StringPtr(s.Scope.Location()),
337334
Sku: &compute.Sku{
@@ -381,6 +378,9 @@ func (s *Service) buildVMSSFromSpec(ctx context.Context, vmssSpec azure.ScaleSet
381378
Priority: priority,
382379
EvictionPolicy: evictionPolicy,
383380
BillingProfile: billingProfile,
381+
ExtensionProfile: &compute.VirtualMachineScaleSetExtensionProfile{
382+
Extensions: &extensions,
383+
},
384384
},
385385
},
386386
}
@@ -476,6 +476,23 @@ func (s *Service) getVirtualMachineScaleSetIfDone(ctx context.Context, future *i
476476
return converters.SDKToVMSS(vmss, vmssInstances), nil
477477
}
478478

479+
func (s *Service) generateExtensions() []compute.VirtualMachineScaleSetExtension {
480+
extensions := make([]compute.VirtualMachineScaleSetExtension, len(s.Scope.VMSSExtensionSpecs()))
481+
for i, extensionSpec := range s.Scope.VMSSExtensionSpecs() {
482+
extensions[i] = compute.VirtualMachineScaleSetExtension{
483+
Name: &extensionSpec.Name,
484+
VirtualMachineScaleSetExtensionProperties: &compute.VirtualMachineScaleSetExtensionProperties{
485+
Publisher: to.StringPtr(extensionSpec.Publisher),
486+
Type: to.StringPtr(extensionSpec.Name),
487+
TypeHandlerVersion: to.StringPtr(extensionSpec.Version),
488+
Settings: nil,
489+
ProtectedSettings: nil,
490+
},
491+
}
492+
}
493+
return extensions
494+
}
495+
479496
// generateStorageProfile generates a pointer to a compute.VirtualMachineScaleSetStorageProfile which can utilized for VM creation.
480497
func (s *Service) generateStorageProfile(vmssSpec azure.ScaleSetSpec, sku resourceskus.SKU) (*compute.VirtualMachineScaleSetStorageProfile, error) {
481498
storageProfile := &compute.VirtualMachineScaleSetStorageProfile{
@@ -581,9 +598,15 @@ func getVMSSUpdateFromVMSS(vmss compute.VirtualMachineScaleSet) (compute.Virtual
581598
if err != nil {
582599
return compute.VirtualMachineScaleSetUpdate{}, err
583600
}
601+
584602
var update compute.VirtualMachineScaleSetUpdate
585-
err = update.UnmarshalJSON(jsonData)
586-
return update, err
603+
if err := update.UnmarshalJSON(jsonData); err != nil {
604+
return update, err
605+
}
606+
607+
// wipe out network profile, so updates won't conflict with Cloud Provider updates
608+
update.VirtualMachineProfile.NetworkProfile = nil
609+
return update, nil
587610
}
588611

589612
func getSecurityProfile(vmssSpec azure.ScaleSetSpec, sku resourceskus.SKU) (*compute.SecurityProfile, error) {

0 commit comments

Comments
 (0)