Skip to content

Commit 59cfb4d

Browse files
Add auto-merge workflow
- Add toxicity-check.yml workflow with check_toxicity.py script for automatic content moderation from Roshanjossey#108 (resolves Roshanjossey#27) - Refactor toxicity-check.yml and check_toxicity.py to use the GitHub CLI and use gemma-9b-it served by Groq (per Roshanjossey#27 (comment)) respectively - Rename toxicity-check.yml and check_toxicity.py to auto-pr-merge.yml and check_pr.py respectively Co-authored-by: harlanenciso112 <harsanenciso@gmail.com>
1 parent 9f19f8c commit 59cfb4d

File tree

2 files changed

+138
-0
lines changed

2 files changed

+138
-0
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
name: Auto-merge PRs
2+
on:
3+
pull_request_target:
4+
types: [opened, synchronize, reopened]
5+
paths:
6+
- "contributors/**" # Run if only contributors dir changed
7+
8+
jobs:
9+
auto-merge:
10+
runs-on: ubuntu-latest
11+
permissions:
12+
contents: write
13+
pull-requests: write
14+
15+
steps:
16+
- name: Checkout repository
17+
uses: actions/checkout@v4
18+
with:
19+
ref: ${{ github.event.pull_request.head.sha }}
20+
fetch-depth: 2
21+
22+
- name: Get a list of files changed in the pull request
23+
run: |
24+
PR_FILES=$(gh pr view 2615 --json files --jq '.[].filename' ${{ github.event.pull_request.html_url }})
25+
FILES_CHANGED=$(echo $PR_FILES | tr '\n' ' ')
26+
echo "FILES_CHANGED=$FILES_CHANGED" >> $GITHUB_ENV
27+
env:
28+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
29+
30+
- name: Check if PR modifies only contributors/{username}.html
31+
id: only_contributors_check
32+
uses: actions/github-script@v6
33+
with:
34+
script: |
35+
const username = context.payload.pull_request.user.login;
36+
const expected = `contributors/${username}.html`;
37+
const filesChanged = process.env.files_changed.trim();
38+
console.log(`Comparing "${filesChanged}" to "${expected}"`);
39+
const onlyContributors = filesChanged === expected;
40+
core.setOutput('only_contributors', onlyContributors);
41+
42+
- name: Set up Python
43+
uses: actions/setup-python@v3
44+
with:
45+
python-version: "3.x"
46+
47+
- name: Install dependencies
48+
run: pip install bs4 groq
49+
50+
- name: Check PR contents
51+
env:
52+
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
53+
run: python scripts/check_pr.py
54+
55+
- name: Auto-approve PR if not flagged
56+
if: success() && steps.only_contributors_check.outputs.only_contributors == 'true'
57+
run: gh pr merge --merge "${{ github.event.pull_request.html_url }}"
58+
env:
59+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
60+
61+
- name: Create comment if not flagged
62+
if: success() && steps.only_contributors_check.outputs.only_contributors == 'true'
63+
run: echo "👏 You've successfully submitted a PR, and it's been automatically merged!" | tee comment.txt
64+
65+
- name: Create comment if flagged
66+
if: failure()
67+
run: echo "🚩 Your PR was flagged. Please review and appropriately modify your PR; if this flag is in error, wait for the maintainer to review it." | tee comment.txt
68+
69+
- name: Create comment if something other than contributors/{username}.html was modified
70+
if: success() && steps.only_contributors_check.outputs.only_contributors != 'true'
71+
run: echo "👏 You've successfully submitted a PR! It contains changes that require review by the maintainer before merging.\nFiles changed:\n$FILES_CHANGED" | tee comment.txt
72+
73+
- name: Post comment on PR
74+
if: always()
75+
uses: thollander/actions-comment-pull-request@v2
76+
with:
77+
filePath: comment.txt

scripts/check_pr.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import json
2+
import os
3+
import sys
4+
5+
from bs4 import BeautifulSoup
6+
from groq import Groq
7+
8+
9+
client = Groq()
10+
MODEL = "gemma2-9b-it"
11+
SYSTEM_PROMPT = """\
12+
Determine whether or not the given string contains any offensive material.
13+
Respond with true if the string contains any offensive material and false if it contains no offensive material.
14+
Respond in json format with a field \"reason\" set to an explanation and \"flag\" set to true or false.
15+
"""
16+
17+
18+
def text_is_toxic(text: str) -> bool:
19+
"""
20+
Analyze toxicity of text using an LLM served by Groq
21+
"""
22+
result = client.chat.completions.create(
23+
model=MODEL,
24+
messages=[
25+
{
26+
"role": "system",
27+
"content": SYSTEM_PROMPT,
28+
},
29+
{
30+
"role": "user",
31+
"content": f"'''\n{text}\n'''",
32+
}
33+
],
34+
temperature=0,
35+
max_completion_tokens=128, # Limit output to ~512 characters
36+
response_format={"type": "json_object"},
37+
).choices[0].message.content
38+
print("[DEBUG] File content:", text.replace("\n", "\\n"))
39+
print("[DEBUG] Model response:", result)
40+
return json.loads(result)["flag"]
41+
42+
43+
def file_is_toxic(file_path: str) -> bool:
44+
"""
45+
Analyze the file path and content for toxicity
46+
"""
47+
with open(file_path) as file:
48+
file_content = BeautifulSoup(
49+
file.read(), # Read the HTML file
50+
"html.parser",
51+
).get_text() # Extract the text from the HTML file
52+
return text_is_toxic(file_path) or text_is_toxic(file_content)
53+
54+
55+
if __name__ == "__main__":
56+
toxic = False
57+
for file_path in os.popen("git diff --name-only HEAD^ HEAD").read().split(): # For each file in the diff,
58+
if os.path.exists(file_path) and file_is_toxic(file_path): # Check it if it exists and is toxic
59+
print(f"🚩 Flagged {file_path}")
60+
toxic = True
61+
sys.exit(1) if toxic else sys.exit(0) # Exit with a non-zero status code if toxic

0 commit comments

Comments
 (0)