33import argparse
44import fileinput
55
6- def main ():
7- parser = argparse .ArgumentParser (prog = "ssyn2es.py" , description = "convert Sudachi synonyms to ES" )
8- parser .add_argument ('files' , metavar = 'FILE' , nargs = '*' , help = 'files to read, if empty, stdin is used' )
9- parser .add_argument ('-p' , '--output-predicate' , action = 'store_true' , help = 'output predicates' )
6+
7+ def parse_args ():
8+ parser = argparse .ArgumentParser (
9+ prog = "ssyn2es.py" , description = "convert Sudachi synonyms to Solr format" )
10+ parser .add_argument ('files' , metavar = 'FILE' , nargs = '*' ,
11+ help = 'files to read, if empty, stdin is used' )
12+
13+ parser .add_argument ('-p' , '--output-predicate' , action = 'store_true' ,
14+ help = 'if set, output predicates' )
1015 args = parser .parse_args ()
16+ return args
1117
18+
19+ def load_synonyms (files , output_predicate ):
1220 synonyms = {}
13- with fileinput .input (files = args . files ) as input :
21+ with fileinput .input (files = files ) as input :
1422 for line in input :
1523 line = line .strip ()
1624 if line == "" :
1725 continue
1826 entry = line .split ("," )[0 :9 ]
19- if entry [2 ] == "2" or (not args . output_predicate and entry [1 ] == "2" ):
27+ if entry [2 ] == "2" or (not output_predicate and entry [1 ] == "2" ):
2028 continue
2129 group = synonyms .setdefault (entry [0 ], [[], []])
2230 group [1 if entry [2 ] == "1" else 0 ].append (entry [8 ])
2331
32+ return synonyms
33+
34+
35+ def dump_synonyms (synonyms , file = None ):
2436 for groupid in sorted (synonyms ):
2537 group = synonyms [groupid ]
2638 if not group [1 ]:
2739 if len (group [0 ]) > 1 :
28- print ("," .join (group [0 ]))
40+ print ("," .join (group [0 ]), file = file )
2941 else :
3042 if len (group [0 ]) > 0 and len (group [1 ]) > 0 :
31- print ("," .join (group [0 ]) + "=>" + "," .join (group [0 ] + group [1 ]))
43+ print ("," .join (group [0 ]) + "=>" +
44+ "," .join (group [0 ] + group [1 ]), file = file )
45+ return
46+
47+
48+ def main ():
49+ args = parse_args ()
50+
51+ synonyms = load_synonyms (args .files , args .output_predicate )
52+ dump_synonyms (synonyms )
3253
3354
3455if __name__ == "__main__" :
35- main ()
56+ main ()
0 commit comments