Skip to content

Commit 680e04f

Browse files
author
Johannes Hötter
committed
adds documentation and output log
1 parent 7468cc3 commit 680e04f

File tree

1 file changed

+74
-19
lines changed

1 file changed

+74
-19
lines changed

kern/adapter/rasa.py

Lines changed: 74 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
1+
from typing import Any, List, Optional
2+
import pandas as pd
13
import yaml
2-
import os
3-
from collections import OrderedDict
4-
5-
CONSTANT_OUTSIDE = "OUTSIDE"
6-
CONSTANT_LABEL_BEGIN = "B-"
7-
CONSTANT_LABEL_INTERMEDIATE = "I-"
8-
94

5+
# https://stackoverflow.com/questions/8640959/how-can-i-control-what-scalar-form-pyyaml-uses-for-my-data
106
class literal(str):
117
pass
128

@@ -24,12 +20,44 @@ def ordered_dict_presenter(dumper, data):
2420

2521
yaml.add_representer(OrderedDict, ordered_dict_presenter)
2622

23+
import os
24+
from collections import OrderedDict
25+
from wasabi import msg
2726

28-
def build_literal_from_iterable(iterable):
27+
from kern import Client
28+
29+
CONSTANT_OUTSIDE = "OUTSIDE"
30+
CONSTANT_LABEL_BEGIN = "B-"
31+
CONSTANT_LABEL_INTERMEDIATE = "I-"
32+
33+
34+
def build_literal_from_iterable(iterable: List[Any]) -> str:
35+
"""Builds a Rasa-conform yaml string from an iterable.
36+
37+
Args:
38+
iterable (List[Any]): List with values to be converted to a literal block.
39+
40+
Returns:
41+
str: literal block
42+
"""
2943
return "\n".join([f"- {value}" for value in iterable]) + "\n"
3044

3145

32-
def inject_label_in_text(row, text_name, tokenized_label_task, constant_outside):
46+
def inject_label_in_text(
47+
row: pd.Series, text_name: str, tokenized_label_task: str, constant_outside: str
48+
) -> str:
49+
"""Insert token labels into text.
50+
E.g. "Hello, my name is Johannes Hötter" -> "Hello, my name is [Johannes Hötter](person)"
51+
52+
Args:
53+
row (pd.Series): row of the record export dataframe
54+
text_name (str): name of the text/chat field
55+
tokenized_label_task (str): name of the label task containing token-level labels
56+
constant_outside (str): constant to be used for outside labels
57+
58+
Returns:
59+
str: injected text
60+
"""
3361
string = ""
3462
token_list = row[f"{text_name}__tokenized"]
3563

@@ -71,15 +99,31 @@ def inject_label_in_text(row, text_name, tokenized_label_task, constant_outside)
7199

72100

73101
def build_intent_yaml(
74-
client,
75-
text_name,
76-
intent_label_task,
77-
metadata_label_task=None,
78-
tokenized_label_task=None,
79-
dir_name="data",
80-
file_name="nlu.yml",
81-
constant_outside=CONSTANT_OUTSIDE,
82-
):
102+
client: Client,
103+
text_name: str,
104+
intent_label_task: str,
105+
metadata_label_task: Optional[str] = None,
106+
tokenized_label_task: Optional[str] = None,
107+
dir_name: str = "data",
108+
file_name: str = "nlu.yml",
109+
constant_outside: str = CONSTANT_OUTSIDE,
110+
version: str = "3.1",
111+
) -> None:
112+
"""builds a Rasa NLU yaml file from your project data via the client object.
113+
114+
Args:
115+
client (Client): connected Client object for your project
116+
text_name (str): name of the text/chat field
117+
intent_label_task (str): name of the classification label with the intents
118+
metadata_label_task (Optional[str], optional): if you have a metadata task (e.g. sentiment), you can list it here. Currently, only one is possible to provide. Defaults to None.
119+
tokenized_label_task (Optional[str], optional): if you have a token-level task (e.g. for entities), you can list it here. Currently, only one is possible to provide. Defaults to None.
120+
dir_name (str, optional): name of your rasa data directory. Defaults to "data".
121+
file_name (str, optional): name of the file you want to store the data to. Defaults to "nlu.yml".
122+
constant_outside (str, optional): constant to be used for outside labels in token-level tasks. Defaults to CONSTANT_OUTSIDE.
123+
version (str, optional): Rasa version. Defaults to "3.1".
124+
"""
125+
msg.info("Building training data for Rasa")
126+
msg.warn("If you haven't done so yet, please install rasa and run `rasa init`")
83127
df = client.get_record_export(tokenize=(tokenized_label_task is not None))
84128

85129
if tokenized_label_task is not None:
@@ -138,7 +182,7 @@ def flatten(xss):
138182
)
139183
)
140184

141-
nlu_dict = OrderedDict(nlu=nlu_list)
185+
nlu_dict = OrderedDict(version=version, nlu=nlu_list)
142186

143187
if dir_name is not None and not os.path.isdir(dir_name):
144188
os.mkdir(dir_name)
@@ -147,3 +191,14 @@ def flatten(xss):
147191

148192
with open(file_path, "w") as f:
149193
yaml.dump(nlu_dict, f, allow_unicode=True)
194+
msg.good(f"Saved training data to {file_path}! 🚀")
195+
msg.warn(
196+
f"Please make sure to add the project-specific files domain.yml, {os.path.join(dir_name, 'rules.yml')} and {os.path.join(dir_name, 'stories.yml')}."
197+
)
198+
msg.info("More information about these files can be found here:")
199+
msg.info(" - Domain: https://rasa.com/docs/rasa/domain")
200+
msg.info(" - Rules: https://rasa.com/docs/rasa/rules")
201+
msg.info(" - Stories: https://rasa.com/docs/rasa/stories")
202+
msg.good(
203+
"You're all set, and can now start building your conversational AI via `rasa train`! 🎉"
204+
)

0 commit comments

Comments
 (0)