Skip to content

Commit 7468cc3

Browse files
author
Johannes Hötter
committed
adds lookups
1 parent e9b0331 commit 7468cc3

File tree

1 file changed

+26
-2
lines changed

1 file changed

+26
-2
lines changed

kern/adapter/rasa.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,12 @@ def inject_label_in_text(row, text_name, tokenized_label_task, constant_outside)
5959
else:
6060
string += f"{token.text}{whitespaces}"
6161
else:
62+
label_trimmed = label[2:] # remove B- and I-
6263
if close_multitoken_label:
63-
string += f"{token.text}]({label[2:]}){whitespaces}"
64+
string += f"{token.text}]({label_trimmed}){whitespaces}"
6465
close_multitoken_label = False
6566
else:
66-
string += f"[{token.text}]({label[2:]}){whitespaces}"
67+
string += f"[{token.text}]({label_trimmed}){whitespaces}"
6768
else:
6869
string += f"{token.text}{whitespaces}"
6970
return string
@@ -114,6 +115,29 @@ def build_intent_yaml(
114115
df_sub_label[text_name].tolist()
115116
)
116117
nlu_list.append(OrderedDict(intent=label, examples=literal(literal_string)))
118+
119+
if tokenized_label_task is not None:
120+
121+
def flatten(xss):
122+
return [x for xs in xss for x in xs]
123+
124+
labels = set(flatten(df[tokenized_label_task].tolist()))
125+
lookup_list_names = []
126+
for label in labels:
127+
if label.startswith(CONSTANT_LABEL_BEGIN):
128+
label_trimmed = label[2:] # remove B-
129+
lookup_list_names.append(label_trimmed)
130+
131+
for lookup_list in client.get_lookup_lists():
132+
if lookup_list["name"] in lookup_list_names:
133+
values = [entry["value"] for entry in lookup_list["terms"]]
134+
literal_string = build_literal_from_iterable(values)
135+
nlu_list.append(
136+
OrderedDict(
137+
lookup=lookup_list["name"], examples=literal(literal_string)
138+
)
139+
)
140+
117141
nlu_dict = OrderedDict(nlu=nlu_list)
118142

119143
if dir_name is not None and not os.path.isdir(dir_name):

0 commit comments

Comments
 (0)