13
13
from kafka .future import Future
14
14
from kafka .metrics .stats import Avg , Count , Max , Rate
15
15
from kafka .protocol .fetch import FetchRequest
16
- from kafka .protocol .message import PartialMessage
17
16
from kafka .protocol .offset import (
18
17
OffsetRequest , OffsetResetStrategy , UNKNOWN_OFFSET
19
18
)
19
+ from kafka .record import MemoryRecords
20
20
from kafka .serializer import Deserializer
21
21
from kafka .structs import TopicPartition , OffsetAndTimestamp
22
22
@@ -355,7 +355,7 @@ def fetched_records(self, max_records=None):
355
355
356
356
Raises:
357
357
OffsetOutOfRangeError: if no subscription offset_reset_strategy
358
- InvalidMessageError : if message crc validation fails (check_crcs
358
+ CorruptRecordException : if message crc validation fails (check_crcs
359
359
must be set to True)
360
360
RecordTooLargeError: if a message is larger than the currently
361
361
configured max_partition_fetch_bytes
@@ -523,77 +523,26 @@ def _message_generator(self):
523
523
" the current position is %d" , tp , part .fetch_offset ,
524
524
position )
525
525
526
- def _unpack_message_set (self , tp , messages ):
526
+ def _unpack_message_set (self , tp , records ):
527
527
try :
528
- for offset , size , msg in messages :
529
- if self .config ['check_crcs' ] and not msg .validate_crc ():
530
- raise Errors .InvalidMessageError (msg )
531
- elif msg .is_compressed ():
532
- # If relative offset is used, we need to decompress the entire message first to compute
533
- # the absolute offset.
534
- inner_mset = msg .decompress ()
535
-
536
- # There should only ever be a single layer of compression
537
- if inner_mset [0 ][- 1 ].is_compressed ():
538
- log .warning ('MessageSet at %s offset %d appears '
539
- ' double-compressed. This should not'
540
- ' happen -- check your producers!' ,
541
- tp , offset )
542
- if self .config ['skip_double_compressed_messages' ]:
543
- log .warning ('Skipping double-compressed message at'
544
- ' %s %d' , tp , offset )
545
- continue
546
-
547
- if msg .magic > 0 :
548
- last_offset , _ , _ = inner_mset [- 1 ]
549
- absolute_base_offset = offset - last_offset
550
- else :
551
- absolute_base_offset = - 1
552
-
553
- for inner_offset , inner_size , inner_msg in inner_mset :
554
- if msg .magic > 0 :
555
- # When magic value is greater than 0, the timestamp
556
- # of a compressed message depends on the
557
- # typestamp type of the wrapper message:
558
-
559
- if msg .timestamp_type == 0 : # CREATE_TIME (0)
560
- inner_timestamp = inner_msg .timestamp
561
528
562
- elif msg .timestamp_type == 1 : # LOG_APPEND_TIME (1)
563
- inner_timestamp = msg .timestamp
564
-
565
- else :
566
- raise ValueError ('Unknown timestamp type: {0}' .format (msg .timestamp_type ))
567
- else :
568
- inner_timestamp = msg .timestamp
569
-
570
- if absolute_base_offset >= 0 :
571
- inner_offset += absolute_base_offset
572
-
573
- key = self ._deserialize (
574
- self .config ['key_deserializer' ],
575
- tp .topic , inner_msg .key )
576
- value = self ._deserialize (
577
- self .config ['value_deserializer' ],
578
- tp .topic , inner_msg .value )
579
- yield ConsumerRecord (tp .topic , tp .partition , inner_offset ,
580
- inner_timestamp , msg .timestamp_type ,
581
- key , value , inner_msg .crc ,
582
- len (inner_msg .key ) if inner_msg .key is not None else - 1 ,
583
- len (inner_msg .value ) if inner_msg .value is not None else - 1 )
584
-
585
- else :
529
+ batch = records .next_batch ()
530
+ while batch is not None :
531
+ for record in batch :
532
+ key_size = len (record .key ) if record .key is not None else - 1
533
+ value_size = len (record .value ) if record .value is not None else - 1
586
534
key = self ._deserialize (
587
535
self .config ['key_deserializer' ],
588
- tp .topic , msg .key )
536
+ tp .topic , record .key )
589
537
value = self ._deserialize (
590
538
self .config ['value_deserializer' ],
591
- tp .topic , msg .value )
592
- yield ConsumerRecord (tp .topic , tp .partition , offset ,
593
- msg .timestamp , msg .timestamp_type ,
594
- key , value , msg .crc ,
595
- len (msg .key ) if msg .key is not None else - 1 ,
596
- len (msg .value ) if msg .value is not None else - 1 )
539
+ tp .topic , record .value )
540
+ yield ConsumerRecord (
541
+ tp .topic , tp .partition , record .offset , record .timestamp ,
542
+ record .timestamp_type , key , value , record .checksum ,
543
+ key_size , value_size )
544
+
545
+ batch = records .next_batch ()
597
546
598
547
# If unpacking raises StopIteration, it is erroneously
599
548
# caught by the generator. We want all exceptions to be raised
@@ -848,7 +797,8 @@ def _handle_fetch_response(self, request, send_time, response):
848
797
random .shuffle (response .topics )
849
798
for topic , partitions in response .topics :
850
799
random .shuffle (partitions )
851
- for partition , error_code , highwater , messages in partitions :
800
+ for partition_data in partitions :
801
+ partition , error_code , highwater = partition_data [:3 ]
852
802
tp = TopicPartition (topic , partition )
853
803
error_type = Errors .for_code (error_code )
854
804
if not self ._subscriptions .is_fetchable (tp ):
@@ -859,6 +809,7 @@ def _handle_fetch_response(self, request, send_time, response):
859
809
860
810
elif error_type is Errors .NoError :
861
811
self ._subscriptions .assignment [tp ].highwater = highwater
812
+ records = MemoryRecords (partition_data [- 1 ])
862
813
863
814
# we are interested in this fetch only if the beginning
864
815
# offset (of the *request*) matches the current consumed position
@@ -873,29 +824,29 @@ def _handle_fetch_response(self, request, send_time, response):
873
824
position )
874
825
continue
875
826
827
+ if not records .has_next () and records .size_in_bytes () > 0 :
828
+ # we did not read a single message from a non-empty
829
+ # buffer because that message's size is larger than
830
+ # fetch size, in this case record this exception
831
+ self ._record_too_large_partitions [tp ] = fetch_offset
832
+
876
833
num_bytes = 0
877
- partial = None
878
- if messages and isinstance (messages [- 1 ][- 1 ], PartialMessage ):
879
- partial = messages .pop ()
834
+ message_count = 0
880
835
881
- if messages :
836
+ if records . has_next () :
882
837
log .debug ("Adding fetched record for partition %s with"
883
838
" offset %d to buffered record list" , tp ,
884
839
position )
885
- unpacked = list (self ._unpack_message_set (tp , messages ))
840
+ unpacked = list (self ._unpack_message_set (tp , records ))
886
841
self ._records .append (self .PartitionRecords (fetch_offset , tp , unpacked ))
887
- last_offset , _ , _ = messages [- 1 ]
842
+ last_offset = unpacked [- 1 ]. offset
888
843
self ._sensors .records_fetch_lag .record (highwater - last_offset )
889
- num_bytes = sum (msg [1 ] for msg in messages )
890
- elif partial :
891
- # we did not read a single message from a non-empty
892
- # buffer because that message's size is larger than
893
- # fetch size, in this case record this exception
894
- self ._record_too_large_partitions [tp ] = fetch_offset
844
+ num_bytes = records .valid_bytes ()
845
+ message_count = len (unpacked )
895
846
896
- self ._sensors .record_topic_fetch_metrics (topic , num_bytes , len ( messages ) )
847
+ self ._sensors .record_topic_fetch_metrics (topic , num_bytes , message_count )
897
848
total_bytes += num_bytes
898
- total_count += len ( messages )
849
+ total_count += message_count
899
850
elif error_type in (Errors .NotLeaderForPartitionError ,
900
851
Errors .UnknownTopicOrPartitionError ):
901
852
self ._client .cluster .request_update ()
0 commit comments