1111
1212from kafka .common import ProduceRequest , TopicAndPartition
1313from kafka .partitioner import HashedPartitioner
14- from kafka .protocol import create_message
14+ from kafka .protocol import (
15+ CODEC_NONE , CODEC_GZIP , CODEC_SNAPPY , ALL_CODECS ,
16+ create_message , create_gzip_message , create_snappy_message ,
17+ )
1518
1619log = logging .getLogger ("kafka" )
1720
2124STOP_ASYNC_PRODUCER = - 1
2225
2326
24- def _send_upstream (queue , client , batch_time , batch_size ,
27+ def _send_upstream (queue , client , codec , batch_time , batch_size ,
2528 req_acks , ack_timeout ):
2629 """
2730 Listen on the queue for a specified number of messages or till
@@ -62,7 +65,14 @@ def _send_upstream(queue, client, batch_time, batch_size,
6265
6366 # Send collected requests upstream
6467 reqs = []
65- for topic_partition , messages in msgset .items ():
68+ for topic_partition , msg in msgset .items ():
69+ if codec == CODEC_GZIP :
70+ messages = [create_gzip_message (msg )]
71+ elif codec == CODEC_SNAPPY :
72+ messages = [create_snappy_message (msg )]
73+ else :
74+ messages = [create_message (m ) for m in msg ]
75+
6676 req = ProduceRequest (topic_partition .topic ,
6777 topic_partition .partition ,
6878 messages )
@@ -102,6 +112,7 @@ class Producer(object):
102112 def __init__ (self , client , async = False ,
103113 req_acks = ACK_AFTER_LOCAL_WRITE ,
104114 ack_timeout = DEFAULT_ACK_TIMEOUT ,
115+ codec = None ,
105116 batch_send = False ,
106117 batch_send_every_n = BATCH_SEND_MSG_COUNT ,
107118 batch_send_every_t = BATCH_SEND_DEFAULT_INTERVAL ):
@@ -119,11 +130,17 @@ def __init__(self, client, async=False,
119130 self .req_acks = req_acks
120131 self .ack_timeout = ack_timeout
121132
133+ if codec is None :
134+ codec = CODEC_NONE
135+ assert codec in ALL_CODECS
136+ self .codec = codec
137+
122138 if self .async :
123139 self .queue = Queue () # Messages are sent through this queue
124140 self .proc = Process (target = _send_upstream ,
125141 args = (self .queue ,
126142 self .client .copy (),
143+ self .codec ,
127144 batch_send_every_t ,
128145 batch_send_every_n ,
129146 self .req_acks ,
@@ -139,11 +156,16 @@ def send_messages(self, topic, partition, *msg):
139156 """
140157 if self .async :
141158 for m in msg :
142- self .queue .put ((TopicAndPartition (topic , partition ),
143- create_message (m )))
159+ self .queue .put ((TopicAndPartition (topic , partition ), m ))
144160 resp = []
145161 else :
146- messages = [create_message (m ) for m in msg ]
162+ if self .codec == CODEC_GZIP :
163+ messages = [create_gzip_message (msg )]
164+ elif self .codec == CODEC_SNAPPY :
165+ messages = [create_snappy_message (msg )]
166+ else :
167+ messages = [create_message (m ) for m in msg ]
168+
147169 req = ProduceRequest (topic , partition , messages )
148170 try :
149171 resp = self .client .send_produce_request ([req ], acks = self .req_acks ,
@@ -168,7 +190,7 @@ def stop(self, timeout=1):
168190
169191class SimpleProducer (Producer ):
170192 """
171- A simple, round-robbin producer. Each message goes to exactly one partition
193+ A simple, round-robin producer. Each message goes to exactly one partition
172194
173195 Params:
174196 client - The Kafka client instance to use
@@ -189,14 +211,15 @@ class SimpleProducer(Producer):
189211 def __init__ (self , client , async = False ,
190212 req_acks = Producer .ACK_AFTER_LOCAL_WRITE ,
191213 ack_timeout = Producer .DEFAULT_ACK_TIMEOUT ,
214+ codec = None ,
192215 batch_send = False ,
193216 batch_send_every_n = BATCH_SEND_MSG_COUNT ,
194217 batch_send_every_t = BATCH_SEND_DEFAULT_INTERVAL ,
195218 random_start = False ):
196219 self .partition_cycles = {}
197220 self .random_start = random_start
198221 super (SimpleProducer , self ).__init__ (client , async , req_acks ,
199- ack_timeout , batch_send ,
222+ ack_timeout , codec , batch_send ,
200223 batch_send_every_n ,
201224 batch_send_every_t )
202225
@@ -241,6 +264,7 @@ class KeyedProducer(Producer):
241264 def __init__ (self , client , partitioner = None , async = False ,
242265 req_acks = Producer .ACK_AFTER_LOCAL_WRITE ,
243266 ack_timeout = Producer .DEFAULT_ACK_TIMEOUT ,
267+ codec = None ,
244268 batch_send = False ,
245269 batch_send_every_n = BATCH_SEND_MSG_COUNT ,
246270 batch_send_every_t = BATCH_SEND_DEFAULT_INTERVAL ):
@@ -250,7 +274,7 @@ def __init__(self, client, partitioner=None, async=False,
250274 self .partitioners = {}
251275
252276 super (KeyedProducer , self ).__init__ (client , async , req_acks ,
253- ack_timeout , batch_send ,
277+ ack_timeout , codec , batch_send ,
254278 batch_send_every_n ,
255279 batch_send_every_t )
256280
0 commit comments