@@ -59,11 +59,12 @@ def inject_label_in_text(row, text_name, tokenized_label_task, constant_outside)
5959 else :
6060 string += f"{ token .text } { whitespaces } "
6161 else :
62+ label_trimmed = label [2 :] # remove B- and I-
6263 if close_multitoken_label :
63- string += f"{ token .text } ]({ label [ 2 :] } ){ whitespaces } "
64+ string += f"{ token .text } ]({ label_trimmed } ){ whitespaces } "
6465 close_multitoken_label = False
6566 else :
66- string += f"[{ token .text } ]({ label [ 2 :] } ){ whitespaces } "
67+ string += f"[{ token .text } ]({ label_trimmed } ){ whitespaces } "
6768 else :
6869 string += f"{ token .text } { whitespaces } "
6970 return string
@@ -114,6 +115,29 @@ def build_intent_yaml(
114115 df_sub_label [text_name ].tolist ()
115116 )
116117 nlu_list .append (OrderedDict (intent = label , examples = literal (literal_string )))
118+
119+ if tokenized_label_task is not None :
120+
121+ def flatten (xss ):
122+ return [x for xs in xss for x in xs ]
123+
124+ labels = set (flatten (df [tokenized_label_task ].tolist ()))
125+ lookup_list_names = []
126+ for label in labels :
127+ if label .startswith (CONSTANT_LABEL_BEGIN ):
128+ label_trimmed = label [2 :] # remove B-
129+ lookup_list_names .append (label_trimmed )
130+
131+ for lookup_list in client .get_lookup_lists ():
132+ if lookup_list ["name" ] in lookup_list_names :
133+ values = [entry ["value" ] for entry in lookup_list ["terms" ]]
134+ literal_string = build_literal_from_iterable (values )
135+ nlu_list .append (
136+ OrderedDict (
137+ lookup = lookup_list ["name" ], examples = literal (literal_string )
138+ )
139+ )
140+
117141 nlu_dict = OrderedDict (nlu = nlu_list )
118142
119143 if dir_name is not None and not os .path .isdir (dir_name ):
0 commit comments