From 77b7c4ac90228489e74e546a95f1542d9f935bca Mon Sep 17 00:00:00 2001 From: Svenja Date: Wed, 16 Aug 2023 14:17:53 +0200 Subject: [PATCH 1/4] New Branch for Emotionallity Detection --- .../emotionality_detection/__init__.py | 51 ++++++++++--------- .../code_snippet_common.md | 46 ++++++++++------- .../code_snippet_refinery.md | 27 ++++++---- .../emotionality_detection/config.py | 37 +++++++------- 4 files changed, 91 insertions(+), 70 deletions(-) diff --git a/classifiers/text_analysis/emotionality_detection/__init__.py b/classifiers/text_analysis/emotionality_detection/__init__.py index d9b5d286..c7dea70b 100644 --- a/classifiers/text_analysis/emotionality_detection/__init__.py +++ b/classifiers/text_analysis/emotionality_detection/__init__.py @@ -1,35 +1,36 @@ +import requests +from extractors.util.spacy import SpacySingleton from pydantic import BaseModel -from LeXmo import LeXmo INPUT_EXAMPLE = { - "text": """As Harry went inside the Chamber of Secrets, he discovered the Basilisk's layer. Before him stood Tom - Riddle, with his wand. Harry was numb for a second as if he had seen a ghost. Moments later the giant - snake attacked Harry but fortunately, Harry dodged and ran into one of the sewer lines while the serpent - followed. The Basilisk couldn't be killed with bare hands but only with a worthy weapon.""" + "text": "I did not know that you were coming! I am very glad to see you. If you would tell me sooner, I would have baked some cookies, though.", + "apiKey": "" } - class EmotionalityDetectionModel(BaseModel): - text: str + apiKey: str + text: str - class Config: - schema_example = {"example": INPUT_EXAMPLE} + class Config: + schema_example = {"example": INPUT_EXAMPLE} +def emotionality_detection(req: EmotionalityDetectionModel): + """BERT model for emotion detection""" + headers = {"Authorization": f"Bearer {req.apiKey}"} + data = {"inputs": req.text} + try: + response = requests.post("https://api-inference.huggingface.co/models/j-hartmann/emotion-english-distilroberta-base", headers=headers, json=data) + response_json = response.json() + ner_positions = [] -def emotionality_detection(request: EmotionalityDetectionModel): - """Fetches emotions from a given text""" + nlp = SpacySingleton.get_nlp("en_core_web_sm") + doc = nlp(req.text) - text = request.text - try: - emo = LeXmo.LeXmo(text) - del emo["text"] - del emo["positive"] - del emo["negative"] - unique = dict(zip(emo.values(), emo.keys())) - if len(unique) == 1: - return "Cannot determine emotion" - else: - emo = max(emo, key=emo.get) - return {"emotion": emo} - except ValueError: - return "Valid text required" + for item in response_json: + start = item["start"] + end = item["end"] + span = doc.char_span(start, end, alignment_mode="expand") + ner_positions.append((item["entity_group"], span.start, span.end)) + return {"entities": ner_positions} + except Exception as e: + return f"That didn't work. Did you provide a valid API key? Go error: {e} and message: {response_json}" \ No newline at end of file diff --git a/classifiers/text_analysis/emotionality_detection/code_snippet_common.md b/classifiers/text_analysis/emotionality_detection/code_snippet_common.md index 6b9a8153..f1ab80a3 100644 --- a/classifiers/text_analysis/emotionality_detection/code_snippet_common.md +++ b/classifiers/text_analysis/emotionality_detection/code_snippet_common.md @@ -1,26 +1,38 @@ ```python -from LeXmo import LeXmo -def emotionality_detection(text:str) -> str: - """ - @param text: text to check - @return: either 'anger', 'fear', 'anticipation', 'trust', 'surprise', 'sadness', 'joy' or 'disgust' depending on the score - """ - emo = LeXmo.LeXmo(text) - del emo["text"] - del emo["positive"] - del emo["negative"] - emo = max(emo, key=emo.get) - return emo - +import requests +import spacy + +def emotionality_detection(text, api_key): + headers = {"Authorization": f"Bearer {api_key}"} + data = {"inputs": text} + try: + response = requests.post("https://api-inference.huggingface.co/models/j-hartmann/emotion-english-distilroberta-base", headers=headers, json=data) + response_json = response.json() + ner_positions = [] + + nlp = spacy.load("en_core_web_sm") + doc = nlp(text) + + for item in response_json: + start = item["start"] + end = item["end"] + span = doc.char_span(start, end, alignment_mode="expand") + ner_positions.append((item["entity_group"], span.start, span.end)) + return ner_positions + except Exception as e: + return f"That didn't work. Did you provide a valid API key? Go error: {e} and message: {response_json}" + # ↑ necessary bricks function # ----------------------------------------------------------------------------------------- # ↓ example implementation -def example_integration(): - texts = ["I am scared to continue.", "Oh my goodness it was the best evening ever, hype!"] - for text in texts: - print(f"\"{text}\" has emotion: {emotionality_detection(text)}") +def emotionality_detection(): + hf_api_key = "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" + texts = ["What a great day to go to the beach.", "Sorry to hear that. CAn I help you?", "Why the hell would you do that?"] + for text in texts: + output = emotion_detection(text, api_key=hf_api_key) + print(output) example_integration() ``` \ No newline at end of file diff --git a/classifiers/text_analysis/emotionality_detection/code_snippet_refinery.md b/classifiers/text_analysis/emotionality_detection/code_snippet_refinery.md index 6fa2e458..b7a98a3f 100644 --- a/classifiers/text_analysis/emotionality_detection/code_snippet_refinery.md +++ b/classifiers/text_analysis/emotionality_detection/code_snippet_refinery.md @@ -1,16 +1,21 @@ ```python -#expects labeling task to have labels ["anger", "fear", "anticipation", "trust", "surprise", "sadness", "joy", "disgust"] -from LeXmo import LeXmo +import requests -ATTRIBUTE: str = "text" # only text attributes +ATTRIBUTE: str = "text" +API_KEY: str = "" def emotionality_detection(record): - text = record[ATTRIBUTE].text # SpaCy document, hence we need to call .text to get the string - emo = LeXmo.LeXmo(text) - del emo["text"] - del emo["positive"] - del emo["negative"] - emo = max(emo, key=emo.get) - - return emo + headers = {"Authorization": f"Bearer {API_KEY}"} + data = {"inputs": record[ATTRIBUTE].text} + try: + response = requests.post("https://api-inference.huggingface.co/models/j-hartmann/emotion-english-distilroberta-base", headers=headers, json=data) + response_json = response.json() + ner_positions = [] + for item in response_json: + start = item["start"] + end = item["end"] + span = record[ATTRIBUTE].char_span(start, end, alignment_mode="expand") + yield item["entity_group"], span.start, span.end + except Exception as e: + return f"That didn't work. Did you provide a valid API key? Go error: {e} and message: {response_json}" ``` \ No newline at end of file diff --git a/classifiers/text_analysis/emotionality_detection/config.py b/classifiers/text_analysis/emotionality_detection/config.py index a7b4571b..c9247b9e 100644 --- a/classifiers/text_analysis/emotionality_detection/config.py +++ b/classifiers/text_analysis/emotionality_detection/config.py @@ -19,28 +19,31 @@ def get_config(): "text_analysis", ], # first entry should be parent directory # bricks integrator information - integrator_inputs={ + integrator_inputs={ "name": "emotionality_detection", "refineryDataType": RefineryDataType.TEXT.value, - "globalComment": "Only for english text.\nWorks best with longer texts since scores for each word are accumulated.", "outputs": [ - "anger", - "fear", - "anticipation", - "trust", - "surprise", - "sadness", - "joy", - "disgust", + "anger", + "disgust", + "fear", + "joy", + "sadness", + "neutral", + "suprise" ], "variables": { - "ATTRIBUTE": { - "selectionType": SelectionType.CHOICE.value, - "addInfo": [ + "API_KEY": { + "selectionType": SelectionType.STRING.value, + "defaultValue": "", + "addInfo": [BricksVariableType.GENERIC_STRING.value], + }, + "ATTRIBUTE": { + "selectionType": SelectionType.CHOICE.value, + "addInfo": [ BricksVariableType.ATTRIBUTE.value, BricksVariableType.GENERIC_STRING.value, - ], - } + ], + }, }, - }, - ) + }, +) \ No newline at end of file From 9cb441c83424fe6b6968423996b1f2a146b152f0 Mon Sep 17 00:00:00 2001 From: Svenja Date: Mon, 28 Aug 2023 12:32:16 +0200 Subject: [PATCH 2/4] new code --- .../code_snippet_common.md | 25 +++++++++---------- .../code_snippet_refinery.md | 16 ++++++------ 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/classifiers/text_analysis/emotionality_detection/code_snippet_common.md b/classifiers/text_analysis/emotionality_detection/code_snippet_common.md index f1ab80a3..54274eec 100644 --- a/classifiers/text_analysis/emotionality_detection/code_snippet_common.md +++ b/classifiers/text_analysis/emotionality_detection/code_snippet_common.md @@ -1,7 +1,6 @@ ```python import requests -import spacy def emotionality_detection(text, api_key): headers = {"Authorization": f"Bearer {api_key}"} @@ -9,17 +8,17 @@ def emotionality_detection(text, api_key): try: response = requests.post("https://api-inference.huggingface.co/models/j-hartmann/emotion-english-distilroberta-base", headers=headers, json=data) response_json = response.json() - ner_positions = [] - nlp = spacy.load("en_core_web_sm") - doc = nlp(text) + # flatten the list of lists + flat_list = [item for sublist in response_json for item in sublist] - for item in response_json: - start = item["start"] - end = item["end"] - span = doc.char_span(start, end, alignment_mode="expand") - ner_positions.append((item["entity_group"], span.start, span.end)) - return ner_positions + # find the item with the highest score + max_item = max(flat_list, key=lambda x: x["score"]) + + # retrieve the label of the item with the highest score + max_label = max_item["label"] + + return max_label except Exception as e: return f"That didn't work. Did you provide a valid API key? Go error: {e} and message: {response_json}" @@ -27,11 +26,11 @@ def emotionality_detection(text, api_key): # ----------------------------------------------------------------------------------------- # ↓ example implementation -def emotionality_detection(): - hf_api_key = "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" +def example_integration(): + hf_api_key = "" texts = ["What a great day to go to the beach.", "Sorry to hear that. CAn I help you?", "Why the hell would you do that?"] for text in texts: - output = emotion_detection(text, api_key=hf_api_key) + output = emotionality_detection(text, api_key=hf_api_key) print(output) example_integration() diff --git a/classifiers/text_analysis/emotionality_detection/code_snippet_refinery.md b/classifiers/text_analysis/emotionality_detection/code_snippet_refinery.md index b7a98a3f..31148b97 100644 --- a/classifiers/text_analysis/emotionality_detection/code_snippet_refinery.md +++ b/classifiers/text_analysis/emotionality_detection/code_snippet_refinery.md @@ -4,18 +4,16 @@ import requests ATTRIBUTE: str = "text" API_KEY: str = "" -def emotionality_detection(record): +def emotionality_detection(text, api_key): headers = {"Authorization": f"Bearer {API_KEY}"} data = {"inputs": record[ATTRIBUTE].text} - try: + try: response = requests.post("https://api-inference.huggingface.co/models/j-hartmann/emotion-english-distilroberta-base", headers=headers, json=data) response_json = response.json() - ner_positions = [] - for item in response_json: - start = item["start"] - end = item["end"] - span = record[ATTRIBUTE].char_span(start, end, alignment_mode="expand") - yield item["entity_group"], span.start, span.end + flat_list = [item for sublist in response_json for item in sublist] + max_item = max(flat_list, key=lambda x: x["score"]) + max_label = max_item["label"] + return max_label except Exception as e: - return f"That didn't work. Did you provide a valid API key? Go error: {e} and message: {response_json}" + return f"That didn't work. Did you provide a valid API key? Go error: {e} and message: {response_json}" ``` \ No newline at end of file From c6d2255d5280e9bf331120f85ed09760671ceec0 Mon Sep 17 00:00:00 2001 From: Svenja Date: Mon, 28 Aug 2023 12:51:24 +0200 Subject: [PATCH 3/4] new init --- .../emotionality_detection/__init__.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/classifiers/text_analysis/emotionality_detection/__init__.py b/classifiers/text_analysis/emotionality_detection/__init__.py index c7dea70b..f19fbefe 100644 --- a/classifiers/text_analysis/emotionality_detection/__init__.py +++ b/classifiers/text_analysis/emotionality_detection/__init__.py @@ -15,22 +15,22 @@ class Config: schema_example = {"example": INPUT_EXAMPLE} def emotionality_detection(req: EmotionalityDetectionModel): - """BERT model for emotion detection""" - headers = {"Authorization": f"Bearer {req.apiKey}"} + """huggingface model for emotion detection""" + headers = {"Authorization": f"Bearer {req.api_key}"} data = {"inputs": req.text} try: response = requests.post("https://api-inference.huggingface.co/models/j-hartmann/emotion-english-distilroberta-base", headers=headers, json=data) response_json = response.json() - ner_positions = [] - nlp = SpacySingleton.get_nlp("en_core_web_sm") - doc = nlp(req.text) + # flatten the list of lists + flat_list = [item for sublist in response_json for item in sublist] - for item in response_json: - start = item["start"] - end = item["end"] - span = doc.char_span(start, end, alignment_mode="expand") - ner_positions.append((item["entity_group"], span.start, span.end)) - return {"entities": ner_positions} + # find the item with the highest score + max_item = max(flat_list, key=lambda x: x["score"]) + + # retrieve the label of the item with the highest score + max_label = max_item["label"] + + return max_label except Exception as e: return f"That didn't work. Did you provide a valid API key? Go error: {e} and message: {response_json}" \ No newline at end of file From f2bd8eca2781cec8c8454850bc45a5ee67b6faec Mon Sep 17 00:00:00 2001 From: Svenja Date: Mon, 28 Aug 2023 14:18:35 +0200 Subject: [PATCH 4/4] change init --- classifiers/text_analysis/emotionality_detection/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classifiers/text_analysis/emotionality_detection/__init__.py b/classifiers/text_analysis/emotionality_detection/__init__.py index f19fbefe..ff1a1abb 100644 --- a/classifiers/text_analysis/emotionality_detection/__init__.py +++ b/classifiers/text_analysis/emotionality_detection/__init__.py @@ -16,7 +16,7 @@ class Config: def emotionality_detection(req: EmotionalityDetectionModel): """huggingface model for emotion detection""" - headers = {"Authorization": f"Bearer {req.api_key}"} + headers = {"Authorization": f"Bearer {req.apiKey}"} data = {"inputs": req.text} try: response = requests.post("https://api-inference.huggingface.co/models/j-hartmann/emotion-english-distilroberta-base", headers=headers, json=data)