77the programming language and can be used from within Python programs.
88Martin von Loewis' work[1] helps considerably in this regard.
99
10- There's one problem though; xgettext is the program that scans source code
11- looking for message strings, but it groks only C (or C++). Python
12- introduces a few wrinkles, such as dual quoting characters, triple quoted
13- strings, and raw strings. xgettext understands none of this.
14-
15- Enter pygettext, which uses Python's standard tokenize module to scan
16- Python source code, generating .pot files identical to what GNU xgettext[2]
17- generates for C and C++ code. From there, the standard GNU tools can be
18- used.
10+ pygettext uses Python's standard tokenize module to scan Python source
11+ code, generating .pot files identical to what GNU xgettext[2] generates
12+ for C and C++ code. From there, the standard GNU tools can be used.
1913
2014A word about marking Python strings as candidates for translation. GNU
2115xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
4135option arguments is broken, and in these cases, pygettext just defines
4236additional switches.
4337
38+ NOTE: The public interface of pygettext is limited to the command-line
39+ interface only. The internal API is subject to change without notice.
40+
4441Usage: pygettext [options] inputfile ...
4542
4643Options:
@@ -328,12 +325,6 @@ def add_location(self, filename, lineno, msgid_plural=None, *, is_docstring=Fals
328325 self .is_docstring |= is_docstring
329326
330327
331- def key_for (msgid , msgctxt = None ):
332- if msgctxt is not None :
333- return (msgctxt , msgid )
334- return msgid
335-
336-
337328class TokenEater :
338329 def __init__ (self , options ):
339330 self .__options = options
@@ -354,6 +345,10 @@ def __call__(self, ttype, tstring, stup, etup, line):
354345## file=sys.stderr)
355346 self .__state (ttype , tstring , stup [0 ])
356347
348+ @property
349+ def messages (self ):
350+ return self .__messages
351+
357352 def __waiting (self , ttype , tstring , lineno ):
358353 opts = self .__options
359354 # Do docstring extractions, if enabled
@@ -513,7 +508,7 @@ def __addentry(self, msg, lineno=None, *, is_docstring=False):
513508 lineno = self .__lineno
514509 msgctxt = msg .get ('msgctxt' )
515510 msgid_plural = msg .get ('msgid_plural' )
516- key = key_for (msgid , msgctxt )
511+ key = self . _key_for (msgid , msgctxt )
517512 if key in self .__messages :
518513 self .__messages [key ].add_location (
519514 self .__curfile ,
@@ -530,6 +525,12 @@ def __addentry(self, msg, lineno=None, *, is_docstring=False):
530525 is_docstring = is_docstring ,
531526 )
532527
528+ @staticmethod
529+ def _key_for (msgid , msgctxt = None ):
530+ if msgctxt is not None :
531+ return (msgctxt , msgid )
532+ return msgid
533+
533534 def warn_unexpected_token (self , token ):
534535 print ((
535536 '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
@@ -543,58 +544,58 @@ def set_filename(self, filename):
543544 self .__curfile = filename
544545 self .__freshmodule = 1
545546
546- def write (self , fp ):
547- options = self .__options
548- timestamp = time .strftime ('%Y-%m-%d %H:%M%z' )
549- encoding = fp .encoding if fp .encoding else 'UTF-8'
550- print (pot_header % {'time' : timestamp , 'version' : __version__ ,
551- 'charset' : encoding ,
552- 'encoding' : '8bit' }, file = fp )
553-
554- # Sort locations within each message by filename and lineno
555- sorted_keys = [
556- (key , sorted (msg .locations ))
557- for key , msg in self .__messages .items ()
558- ]
559- # Sort messages by locations
560- # For example, a message with locations [('test.py', 1), ('test.py', 2)] will
561- # appear before a message with locations [('test.py', 1), ('test.py', 3)]
562- sorted_keys .sort (key = itemgetter (1 ))
563-
564- for key , locations in sorted_keys :
565- msg = self .__messages [key ]
566- if options .writelocations :
567- # location comments are different b/w Solaris and GNU:
568- if options .locationstyle == options .SOLARIS :
569- for location in locations :
570- print (f'# File: { location .filename } , line: { location .lineno } ' , file = fp )
571- elif options .locationstyle == options .GNU :
572- # fit as many locations on one line, as long as the
573- # resulting line length doesn't exceed 'options.width'
574- locline = '#:'
575- for location in locations :
576- s = f' { location .filename } :{ location .lineno } '
577- if len (locline ) + len (s ) <= options .width :
578- locline = locline + s
579- else :
580- print (locline , file = fp )
581- locline = f'#:{ s } '
582- if len (locline ) > 2 :
547+
548+ def write_pot_file (messages , options , fp ):
549+ timestamp = time .strftime ('%Y-%m-%d %H:%M%z' )
550+ encoding = fp .encoding if fp .encoding else 'UTF-8'
551+ print (pot_header % {'time' : timestamp , 'version' : __version__ ,
552+ 'charset' : encoding ,
553+ 'encoding' : '8bit' }, file = fp )
554+
555+ # Sort locations within each message by filename and lineno
556+ sorted_keys = [
557+ (key , sorted (msg .locations ))
558+ for key , msg in messages .items ()
559+ ]
560+ # Sort messages by locations
561+ # For example, a message with locations [('test.py', 1), ('test.py', 2)] will
562+ # appear before a message with locations [('test.py', 1), ('test.py', 3)]
563+ sorted_keys .sort (key = itemgetter (1 ))
564+
565+ for key , locations in sorted_keys :
566+ msg = messages [key ]
567+ if options .writelocations :
568+ # location comments are different b/w Solaris and GNU:
569+ if options .locationstyle == options .SOLARIS :
570+ for location in locations :
571+ print (f'# File: { location .filename } , line: { location .lineno } ' , file = fp )
572+ elif options .locationstyle == options .GNU :
573+ # fit as many locations on one line, as long as the
574+ # resulting line length doesn't exceed 'options.width'
575+ locline = '#:'
576+ for location in locations :
577+ s = f' { location .filename } :{ location .lineno } '
578+ if len (locline ) + len (s ) <= options .width :
579+ locline = locline + s
580+ else :
583581 print (locline , file = fp )
584- if msg .is_docstring :
585- # If the entry was gleaned out of a docstring, then add a
586- # comment stating so. This is to aid translators who may wish
587- # to skip translating some unimportant docstrings.
588- print ('#, docstring' , file = fp )
589- if msg .msgctxt is not None :
590- print ('msgctxt' , normalize (msg .msgctxt , encoding ), file = fp )
591- print ('msgid' , normalize (msg .msgid , encoding ), file = fp )
592- if msg .msgid_plural is not None :
593- print ('msgid_plural' , normalize (msg .msgid_plural , encoding ), file = fp )
594- print ('msgstr[0] ""' , file = fp )
595- print ('msgstr[1] ""\n ' , file = fp )
596- else :
597- print ('msgstr ""\n ' , file = fp )
582+ locline = f'#:{ s } '
583+ if len (locline ) > 2 :
584+ print (locline , file = fp )
585+ if msg .is_docstring :
586+ # If the entry was gleaned out of a docstring, then add a
587+ # comment stating so. This is to aid translators who may wish
588+ # to skip translating some unimportant docstrings.
589+ print ('#, docstring' , file = fp )
590+ if msg .msgctxt is not None :
591+ print ('msgctxt' , normalize (msg .msgctxt , encoding ), file = fp )
592+ print ('msgid' , normalize (msg .msgid , encoding ), file = fp )
593+ if msg .msgid_plural is not None :
594+ print ('msgid_plural' , normalize (msg .msgid_plural , encoding ), file = fp )
595+ print ('msgstr[0] ""' , file = fp )
596+ print ('msgstr[1] ""\n ' , file = fp )
597+ else :
598+ print ('msgstr ""\n ' , file = fp )
598599
599600
600601def main ():
@@ -752,7 +753,7 @@ class Options:
752753 fp = open (options .outfile , 'w' )
753754 closep = 1
754755 try :
755- eater .write ( fp )
756+ write_pot_file ( eater .messages , options , fp )
756757 finally :
757758 if closep :
758759 fp .close ()
0 commit comments