@@ -84,6 +84,8 @@ def __init__(self, client, subscription, metrics, **configs):
84
84
self .config [key ] = configs [key ]
85
85
86
86
self ._subscription = subscription
87
+ self ._is_leader = False
88
+ self ._joined_subscription = set ()
87
89
self ._metadata_snapshot = self ._build_metadata_snapshot (subscription , client .cluster )
88
90
self ._assignment_snapshot = None
89
91
self ._cluster = client .cluster
@@ -132,11 +134,22 @@ def protocol_type(self):
132
134
133
135
def group_protocols (self ):
134
136
"""Returns list of preferred (protocols, metadata)"""
135
- topics = self ._subscription .subscription
136
- assert topics is not None , 'Consumer has not subscribed to topics'
137
+ if self ._subscription .subscription is None :
138
+ raise Errors .IllegalStateError ('Consumer has not subscribed to topics' )
139
+ # dpkp note: I really dislike this.
140
+ # why? because we are using this strange method group_protocols,
141
+ # which is seemingly innocuous, to set internal state (_joined_subscription)
142
+ # that is later used to check whether metadata has changed since we joined a group
143
+ # but there is no guarantee that this method, group_protocols, will get called
144
+ # in the correct sequence or that it will only be called when we want it to be.
145
+ # So this really should be moved elsewhere, but I don't have the energy to
146
+ # work that out right now. If you read this at some later date after the mutable
147
+ # state has bitten you... I'm sorry! It mimics the java client, and that's the
148
+ # best I've got for now.
149
+ self ._joined_subscription = set (self ._subscription .subscription )
137
150
metadata_list = []
138
151
for assignor in self .config ['assignors' ]:
139
- metadata = assignor .metadata (topics )
152
+ metadata = assignor .metadata (self . _joined_subscription )
140
153
group_protocol = (assignor .name , metadata )
141
154
metadata_list .append (group_protocol )
142
155
return metadata_list
@@ -158,21 +171,29 @@ def _handle_metadata_update(self, cluster):
158
171
159
172
# check if there are any changes to the metadata which should trigger
160
173
# a rebalance
161
- if self ._subscription_metadata_changed (cluster ):
162
-
163
- if (self .config ['api_version' ] >= (0 , 9 )
164
- and self .config ['group_id' ] is not None ):
165
-
166
- self ._subscription .mark_for_reassignment ()
167
-
168
- # If we haven't got group coordinator support,
169
- # just assign all partitions locally
170
- else :
171
- self ._subscription .assign_from_subscribed ([
172
- TopicPartition (topic , partition )
173
- for topic in self ._subscription .subscription
174
- for partition in self ._metadata_snapshot [topic ]
175
- ])
174
+ if self ._subscription .partitions_auto_assigned ():
175
+ metadata_snapshot = self ._build_metadata_snapshot (self ._subscription , cluster )
176
+ if self ._metadata_snapshot != metadata_snapshot :
177
+ self ._metadata_snapshot = metadata_snapshot
178
+
179
+ # If we haven't got group coordinator support,
180
+ # just assign all partitions locally
181
+ if self ._auto_assign_all_partitions ():
182
+ self ._subscription .assign_from_subscribed ([
183
+ TopicPartition (topic , partition )
184
+ for topic in self ._subscription .subscription
185
+ for partition in self ._metadata_snapshot [topic ]
186
+ ])
187
+
188
+ def _auto_assign_all_partitions (self ):
189
+ # For users that use "subscribe" without group support,
190
+ # we will simply assign all partitions to this consumer
191
+ if self .config ['api_version' ] < (0 , 9 ):
192
+ return True
193
+ elif self .config ['group_id' ] is None :
194
+ return True
195
+ else :
196
+ return False
176
197
177
198
def _build_metadata_snapshot (self , subscription , cluster ):
178
199
metadata_snapshot = {}
@@ -181,16 +202,6 @@ def _build_metadata_snapshot(self, subscription, cluster):
181
202
metadata_snapshot [topic ] = set (partitions )
182
203
return metadata_snapshot
183
204
184
- def _subscription_metadata_changed (self , cluster ):
185
- if not self ._subscription .partitions_auto_assigned ():
186
- return False
187
-
188
- metadata_snapshot = self ._build_metadata_snapshot (self ._subscription , cluster )
189
- if self ._metadata_snapshot != metadata_snapshot :
190
- self ._metadata_snapshot = metadata_snapshot
191
- return True
192
- return False
193
-
194
205
def _lookup_assignor (self , name ):
195
206
for assignor in self .config ['assignors' ]:
196
207
if assignor .name == name :
@@ -199,12 +210,10 @@ def _lookup_assignor(self, name):
199
210
200
211
def _on_join_complete (self , generation , member_id , protocol ,
201
212
member_assignment_bytes ):
202
- # if we were the assignor, then we need to make sure that there have
203
- # been no metadata updates since the rebalance begin. Otherwise, we
204
- # won't rebalance again until the next metadata change
205
- if self ._assignment_snapshot is not None and self ._assignment_snapshot != self ._metadata_snapshot :
206
- self ._subscription .mark_for_reassignment ()
207
- return
213
+ # only the leader is responsible for monitoring for metadata changes
214
+ # (i.e. partition changes)
215
+ if not self ._is_leader :
216
+ self ._assignment_snapshot = None
208
217
209
218
assignor = self ._lookup_assignor (protocol )
210
219
assert assignor , 'Coordinator selected invalid assignment protocol: %s' % protocol
@@ -307,6 +316,7 @@ def _perform_assignment(self, leader_id, assignment_strategy, members):
307
316
# keep track of the metadata used for assignment so that we can check
308
317
# after rebalance completion whether anything has changed
309
318
self ._cluster .request_update ()
319
+ self ._is_leader = True
310
320
self ._assignment_snapshot = self ._metadata_snapshot
311
321
312
322
log .debug ("Performing assignment for group %s using strategy %s"
@@ -338,18 +348,32 @@ def _on_join_prepare(self, generation, member_id):
338
348
" for group %s failed on_partitions_revoked" ,
339
349
self ._subscription .listener , self .group_id )
340
350
341
- self ._assignment_snapshot = None
342
- self ._subscription .mark_for_reassignment ()
351
+ self ._is_leader = False
352
+ self ._subscription .reset_group_subscription ()
343
353
344
354
def need_rejoin (self ):
345
355
"""Check whether the group should be rejoined
346
356
347
357
Returns:
348
358
bool: True if consumer should rejoin group, False otherwise
349
359
"""
350
- return (self ._subscription .partitions_auto_assigned () and
351
- (super (ConsumerCoordinator , self ).need_rejoin () or
352
- self ._subscription .needs_partition_assignment ))
360
+ if not self ._subscription .partitions_auto_assigned ():
361
+ return False
362
+
363
+ if self ._auto_assign_all_partitions ():
364
+ return False
365
+
366
+ # we need to rejoin if we performed the assignment and metadata has changed
367
+ if (self ._assignment_snapshot is not None
368
+ and self ._assignment_snapshot != self ._metadata_snapshot ):
369
+ return True
370
+
371
+ # we need to join if our subscription has changed since the last join
372
+ if (self ._joined_subscription is not None
373
+ and self ._joined_subscription != self ._subscription .subscription ):
374
+ return True
375
+
376
+ return super (ConsumerCoordinator , self ).need_rejoin ()
353
377
354
378
def refresh_committed_offsets_if_needed (self ):
355
379
"""Fetch committed offsets for assigned partitions."""
0 commit comments