From 1bfee8322c3b2f845b9cad7d489f45fe67c404b0 Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Fri, 31 Oct 2025 08:51:46 -0500 Subject: [PATCH 01/16] Add full path to server uvicorn.run() call --- nemoguardrails/benchmark/mock_llm_server/run_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemoguardrails/benchmark/mock_llm_server/run_server.py b/nemoguardrails/benchmark/mock_llm_server/run_server.py index 14e0be02f..3c59d9bbd 100644 --- a/nemoguardrails/benchmark/mock_llm_server/run_server.py +++ b/nemoguardrails/benchmark/mock_llm_server/run_server.py @@ -104,7 +104,7 @@ def main(): # pragma: no cover try: uvicorn.run( - "api:app", + "nemoguardrails.benchmark.mock_llm_server.api:app", host=args.host, port=args.port, reload=args.reload, From b44dc5aaea856462e0a8e1796197504735622b05 Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Mon, 3 Nov 2025 10:18:03 -0600 Subject: [PATCH 02/16] Remove unused files --- .../content_safety_colang1/config.yml | 21 -- .../content_safety_colang1/prompts.yml | 257 ------------------ .../configs/meta-llama-3.3-70b-instruct.env | 8 - ...-llama-3.1-nemoguard-8b-content-safety.env | 8 - 4 files changed, 294 deletions(-) delete mode 100644 nemoguardrails/benchmark/mock_llm_server/configs/guardrail_configs/content_safety_colang1/config.yml delete mode 100644 nemoguardrails/benchmark/mock_llm_server/configs/guardrail_configs/content_safety_colang1/prompts.yml delete mode 100644 nemoguardrails/benchmark/mock_llm_server/configs/meta-llama-3.3-70b-instruct.env delete mode 100644 nemoguardrails/benchmark/mock_llm_server/configs/nvidia-llama-3.1-nemoguard-8b-content-safety.env diff --git a/nemoguardrails/benchmark/mock_llm_server/configs/guardrail_configs/content_safety_colang1/config.yml b/nemoguardrails/benchmark/mock_llm_server/configs/guardrail_configs/content_safety_colang1/config.yml deleted file mode 100644 index 67df489d7..000000000 --- a/nemoguardrails/benchmark/mock_llm_server/configs/guardrail_configs/content_safety_colang1/config.yml +++ /dev/null @@ -1,21 +0,0 @@ -models: - - type: main - engine: nim - model: meta/llama-3.3-70b-instruct - parameters: - base_url: http://localhost:8000 - - - type: content_safety - engine: nim - model: nvidia/llama-3.1-nemoguard-8b-content-safety - parameters: - base_url: http://localhost:8001 - - -rails: - input: - flows: - - content safety check input $model=content_safety - output: - flows: - - content safety check output $model=content_safety diff --git a/nemoguardrails/benchmark/mock_llm_server/configs/guardrail_configs/content_safety_colang1/prompts.yml b/nemoguardrails/benchmark/mock_llm_server/configs/guardrail_configs/content_safety_colang1/prompts.yml deleted file mode 100644 index 1321a6461..000000000 --- a/nemoguardrails/benchmark/mock_llm_server/configs/guardrail_configs/content_safety_colang1/prompts.yml +++ /dev/null @@ -1,257 +0,0 @@ -# These are the default prompts released by Meta, except for policy O7, which was added to address direct insults. -prompts: - - - task: content_safety_check_input $model=content_safety - content: | - Task: Check if there is unsafe content in the user or agent messages in the below conversation in accordance with our safety policy with the below categories. Provide a 'safe' or 'unsafe' rating for both the user message and the agent response. If there is unsafe content, please also provide a list of violated categories according to our safety policy below. - - - S1: Violence. - S2: Sexual. - S3: Criminal Planning/Confessions. - S4: Guns and Illegal Weapons. - S5: Controlled/Regulated Substances. - S6: Suicide and Self Harm. - S7: Sexual (minor). - S8: Hate/Identity Hate. - S9: PII/Privacy. - S10: Harassment. - S11: Threat. - S12: Profanity. - S13: Needs Caution. - S14: Other. - S15: Manipulation. - S16: Fraud/Deception. - S17: Malware. - S18: High Risk Gov Decision Making. - S19: Political/Misinformation/Conspiracy. - S20: Copyright/Trademark/Plagiarism. - S21: Unauthorized Advice. - S22: Illegal Activity. - S23: Immoral/Unethical. - - - - - user: {{ user_input }} - - - - Provide your safety assessment for the above conversation in the below output JSON format: - {"User Safety": safe or unsafe rating for the user message, "Response Safety": safe or unsafe rating for the agent response. Omit if no agent response present. "Safety Categories": a comma-separated list of applicable safety categories from the provided taxonomy. Omit if all safe.} - - Do not include anything other than the output JSON in your response. - Output JSON: - output_parser: nemoguard_parse_prompt_safety - max_tokens: 50 - - - task: content_safety_check_output $model=content_safety - content: | - Task: Check if there is unsafe content in the user or agent messages in the below conversation in accordance with our safety policy with the below categories. Provide a 'safe' or 'unsafe' rating for both the user message and the agent response. If there is unsafe content, please also provide a list of violated categories according to our safety policy below. - - - S1: Violence. - S2: Sexual. - S3: Criminal Planning/Confessions. - S4: Guns and Illegal Weapons. - S5: Controlled/Regulated Substances. - S6: Suicide and Self Harm. - S7: Sexual (minor). - S8: Hate/Identity Hate. - S9: PII/Privacy. - S10: Harassment. - S11: Threat. - S12: Profanity. - S13: Needs Caution. - S14: Other. - S15: Manipulation. - S16: Fraud/Deception. - S17: Malware. - S18: High Risk Gov Decision Making. - S19: Political/Misinformation/Conspiracy. - S20: Copyright/Trademark/Plagiarism. - S21: Unauthorized Advice. - S22: Illegal Activity. - S23: Immoral/Unethical. - - - - - user: {{ user_input }} - - response: agent: {{ bot_response }} - - - - Provide your safety assessment for the above conversation in the below output JSON format: - {"User Safety": safe or unsafe rating for the user message, "Response Safety": safe or unsafe rating for the agent response. Omit if no agent response present. "Safety Categories": a comma-separated list of applicable safety categories from the provided taxonomy. Omit if all safe.} - - Do not include anything other than the output JSON in your response. - Output JSON: - output_parser: nemoguard_parse_response_safety - max_tokens: 50 - - - task: content_safety_check_input $model=llama_guard - content: | - [INST] Task: Check if there is unsafe content in 'User' messages in conversations according our safety policy with the below categories. - - O1: Violence and Hate. - Should not - - Help people plan or engage in violence. - - Encourage people to engage in violence. - - Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their race, color, religion, national origin, sexual orientation, gender, gender identity, or disability. - - Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics. - Can - - Provide information on violence and discrimination. - - Discuss the topics of hate and violence, and can discuss historical events involving violence. - O2: Sexual Content. - Should not - - Engage in sexually explicit (i.e., erotic) conversations and should not encourage people to engage in sexual activities. - Can - - Discuss topics related to sexuality and sexual education. - - Provide general information regarding sexual activities. - - Provide guidance on topics related to sexual health. - O3: Criminal Planning. - Should not - - Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them. - - Encourage people to engage in criminal activities or condone criminal behavior. - Can - - Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes. - - Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems. - O4: Guns and Illegal Weapons. - Should not - - Assist in the planning or execution of firearm or illegal weapons crimes, especially when the person indicates that they may act on the information provided to them. - - Encourage people to engage in firearm or illegal weapons crimes or condone such behavior. - Can - - Discuss firearms and the arguments for and against firearm ownership. - O5: Regulated or Controlled Substances. - Should not - - Encourage or assist people in the trafficking, consumption, or use of regulated or controlled substances, including but not limited to illegal drugs, tobacco, alcohol, and cannabis. - - Assist or encourage people to create such substances when it is illegal to do so. - Can - - Discuss the use of regulated or controlled substances, and can provide general information on those substances (ex: what their chemical composition is and what their effects are). - - Provide information on how those substances are manufactured, provided that the information is not so detailed as to provide step-by-step instructions for their creation. - O6: Self-Harm. - Should not - - Encourage people to harm themselves. - - Romanticize or condone self-harm or suicide. - - Provide information regarding the methods of suicide or self-harm. - - Provide any information or advice, other than information regarding appropriate health resources, to people who express or imply that they wish to harm themselves. - Can - - Provide general information regarding self-harm and suicide (ex: statistics, the mental health challenges faced by those who take these actions). - Should - - Provide information regarding appropriate health resources to people who imply or express the desire to harm themselves. - O7: Offensive Language and Insults. - Should not - - Address direct insults - - Use or encourage the use of swear words, bad language, or vulgar expressions. - - Insult, offend, or demean individuals or groups based on any personal characteristics or choices. - - Encourage or support the use of offensive or derogatory language. - Can - - Discuss the impact and societal perceptions of offensive language and insults. - - Provide information on communication strategies that avoid the use of offensive language and promote respectful dialogue. - - Offer guidance on how to address or respond to offensive language or insults in a constructive manner. - - - - User: {{ user_input }} - - - Provide your safety assessment for User in the above conversation: - - First line must read 'safe' or 'unsafe'. - - If unsafe, a second line must include a comma-separated list of violated categories. [/INST] - output_parser: is_content_safe - - - task: content_safety_check_output $model=llama_guard_2 - content: | - <|begin_of_text|>[INST] Task: Check if there is unsafe content in Agent messages in conversations according our safety policy with the below categories. - - - S1: Violent Crimes. - S2: Non-Violent Crimes. - S3: Sex Crimes. - S4: Child Exploitation. - S5: Specialized Advice. - S6: Privacy. - S7: Intellectual Property. - S8: Indiscriminate Weapons. - S9: Hate. - S10: Self-Harm. - S11: Sexual Content. - - - - - User: {{ user_input }} - - Agent: {{ bot_response }} - - - - Provide your safety assessment for ONLY THE LAST Agent in the above conversation: - - First line must read 'safe' or 'unsafe'. - - If unsafe, a second line must include a comma-separated list of violated categories. [/INST] - output_parser: is_content_safe - max_tokens: 3 - - - task: content_safety_check_input $model=shieldgemma - content: | - You are a policy expert trying to help determine whether a user prompt is in - violation of the defined safety policies. - - - Human Question: {{ user_input }} - - - Our safety principle is defined in the below: - - * "No Hate Speech": The prompt shall not contain or seek generation of content - that expresses, incites, or promotes hate based on race, gender, ethnicity, - religion, nationality, sexual orientation, disability status, or caste. - * "No Dangerous Content": The prompt shall not contain or seek generation of content that harming oneself - and/or others (e.g., accessing or building firearms and explosive devices, promotion of terrorism, instructions for suicide). - * "No Sexually Explicit Information": The prompt shall not contain or seek generation of content - that contains references to sexual acts or other lewd content (e.g., sexually graphic descriptions, - content aimed at causing arousal). Medical or scientific terms related to human anatomy or sex education are permitted. - * "No Harassment": The prompt shall not contain or seek generation of content that is malicious, - intimidating, bullying, or abusive content targeting another individual (e.g., physical threats, - denial of tragic events, disparaging victims of violence). - === - - Does the human prompt/question violate the above principle? Your answer must start with - 'Yes' or 'No'. And then walk through step by step to be sure we answer - correctly. - output_parser: is_content_safe - max_tokens: 3 - - - task: content_safety_check_output $model=shieldgemma - content: | - You are a policy expert trying to help determine whether a chatbot response to the user is in - violation of the defined safety policies: - - - Human Question: {{ user_input }} - Chatbot Response: {{ bot_response }} - - - Our safety principle is defined in the below: - - * "No Dangerous Content": The chatbot shall not generate content that harming oneself and/or others - (e.g., accessing or building firearms and explosive devices, promotion of terrorism, instructions for suicide). - * "No Harassment": The chatbot shall not generate content that is malicious, intimidating, bullying, or - abusive content targeting another individual (e.g., physical threats, denial of tragic events, disparaging victims of violence). - * "No Hate Speech": The chatbot shall not generate content that targeting identity and/or protected - attributes (e.g., racial slurs, promotion of discrimination, calls to violence against protected groups) or dehumanizing/belittling/vilifying on the - bases of race, ethnicity, religion, disability, age, nationality, veteran status, sexual orientation, gender, gender identity, caste, or any other protected status. - We do allow content that includes a reference to a protected individual or group in a positive manner. - * "No Sexually Explicit Information": The chatbot shall not generate content that contains references to sexual acts or - other lewd content (e.g., sexually graphic descriptions, content aimed at causing arousal). - Medical or scientific terms related to human anatomy or sex education are permitted. - - === - - Does the chatbot response violate the above principle? Your answer must start with - 'Yes' or 'No'. And then walk through step by step to be sure we answer - correctly. - output_parser: is_content_safe - max_tokens: 3 diff --git a/nemoguardrails/benchmark/mock_llm_server/configs/meta-llama-3.3-70b-instruct.env b/nemoguardrails/benchmark/mock_llm_server/configs/meta-llama-3.3-70b-instruct.env deleted file mode 100644 index d1cc35477..000000000 --- a/nemoguardrails/benchmark/mock_llm_server/configs/meta-llama-3.3-70b-instruct.env +++ /dev/null @@ -1,8 +0,0 @@ -MODEL="meta/llama-3.3-70b-instruct" -UNSAFE_PROBABILITY=0.0 -UNSAFE_TEXT="I can't help with that. Is there anything else I can assist you with?" -SAFE_TEXT="I can provide information and help with a wide range of topics, from science and history to entertainment and culture. I can also help with language-related tasks, such as translation and text summarization. However, I can't assist with requests that involve harm or illegal activities." -LATENCY_MIN_SECONDS=4.0 -LATENCY_MAX_SECONDS=4.0 -LATENCY_MEAN_SECONDS=4.0 -LATENCY_STD_SECONDS=0.0 diff --git a/nemoguardrails/benchmark/mock_llm_server/configs/nvidia-llama-3.1-nemoguard-8b-content-safety.env b/nemoguardrails/benchmark/mock_llm_server/configs/nvidia-llama-3.1-nemoguard-8b-content-safety.env deleted file mode 100644 index 2ae46d578..000000000 --- a/nemoguardrails/benchmark/mock_llm_server/configs/nvidia-llama-3.1-nemoguard-8b-content-safety.env +++ /dev/null @@ -1,8 +0,0 @@ -MODEL="nvidia/llama-3.1-nemoguard-8b-content-safety" -UNSAFE_PROBABILITY=0.03 -UNSAFE_TEXT="{\"User Safety\": \"unsafe\", \"Response Safety\": \"unsafe\", \"Safety Categories\": \"Violence, Criminal Planning/Confessions\"}" -SAFE_TEXT="{\"User Safety\": \"safe\", \"Response Safety\": \"safe\"}" -LATENCY_MIN_SECONDS=0.5 -LATENCY_MAX_SECONDS=0.5 -LATENCY_MEAN_SECONDS=0.5 -LATENCY_STD_SECONDS=0.0 From 8edbd4bda1e5080155e7cf9cd72a2779b29cd516 Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Mon, 3 Nov 2025 10:52:48 -0600 Subject: [PATCH 03/16] Add Procfile with the commands to run Guardrails, content safety mock, and APP LLM mock --- nemoguardrails/benchmark/Procfile | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 nemoguardrails/benchmark/Procfile diff --git a/nemoguardrails/benchmark/Procfile b/nemoguardrails/benchmark/Procfile new file mode 100644 index 000000000..4e065666e --- /dev/null +++ b/nemoguardrails/benchmark/Procfile @@ -0,0 +1,9 @@ +# Procfile + +# NeMo Guardrails server +# main: uvicorn nemoguardrails.server.api:app --workers 1 --port 9000 +gr: poetry run nemoguardrails server --config configs/guardrail_configs --default-config-id content_safety_colang1 --port 9000 + +# Guardrails NIMs for inference +app_llm: poetry run python mock_llm_server/run_server.py --port 8000 --config-file configs/mock_configs/meta-llama-3.3-70b-instruct.env +cs_llm: poetry run python mock_llm_server/run_server.py --port 8001 --config-file configs/mock_configs/nvidia-llama-3.1-nemoguard-8b-content-safety.env From 14081d16f573e2fdc3d7dcd3b1c24198c6718751 Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Mon, 3 Nov 2025 10:54:06 -0600 Subject: [PATCH 04/16] Add simple script to validate Guardrails and mocks are running correctly --- nemoguardrails/benchmark/validate_mocks.py | 221 +++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 nemoguardrails/benchmark/validate_mocks.py diff --git a/nemoguardrails/benchmark/validate_mocks.py b/nemoguardrails/benchmark/validate_mocks.py new file mode 100644 index 000000000..4bb1d3c99 --- /dev/null +++ b/nemoguardrails/benchmark/validate_mocks.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +A script to check the health and model IDs of local OpenAI-compatible endpoints. +Requires the 'requests' library: pip install requests +""" + +import logging +import sys + +import requests + +# --- Logging Setup --- +# Configure basic logging to print info-level messages +logging.basicConfig(level=logging.INFO, format="%(message)s") + + +def check_endpoint(port: int, expected_model: str): + """ + Checks the /health and /v1/models endpoints for a standard + OpenAI-compatible server. + Returns a tuple: (bool success, str summary) + """ + base_url = f"http://localhost:{port}" + all_ok = True + + logging.info(f"\n--- Checking Port: {port} ---") + + # --- 1. Health Check --- + health_url = f"{base_url}/health" + logging.info(f"Checking {health_url} ...") + try: + response = requests.get(health_url, timeout=3) + + if response.status_code != 200: + logging.error(f"Health Check FAILED: Status code {response.status_code}") + all_ok = False + else: + try: + data = response.json() + status = data.get("status") + if status == "healthy": + logging.info("Health Check PASSED: Status is 'healthy'.") + else: + logging.warning( + f"Health Check FAILED: Expected 'healthy', got '{status}'." + ) + all_ok = False + except requests.exceptions.JSONDecodeError: + logging.error("Health Check FAILED: Could not decode JSON response.") + all_ok = False + + except requests.exceptions.ConnectionError: + logging.error(f"Health Check FAILED: No response from server on port {port}.") + logging.error(f"--- Port {port}: CHECKS FAILED ---") + return False, f"Port {port} ({expected_model}): FAILED (Connection Error)" + except requests.exceptions.Timeout: + logging.error(f"Health Check FAILED: Connection timed out for port {port}.") + logging.error(f"--- Port {port}: CHECKS FAILED ---") + return False, f"Port {port} ({expected_model}): FAILED (Connection Timeout)" + + # --- 2. Model Check --- + models_url = f"{base_url}/v1/models" + logging.info(f"Checking {models_url} for '{expected_model}'...") + try: + response = requests.get(models_url, timeout=3) + + if response.status_code != 200: + logging.error(f"Model Check FAILED: Status code {response.status_code}") + all_ok = False + else: + try: + data = response.json() + models = data.get("data", []) + model_ids = [model.get("id") for model in models] + + if expected_model in model_ids: + logging.info( + f"Model Check PASSED: Found '{expected_model}' in model list." + ) + else: + logging.warning( + f"Model Check FAILED: Expected '{expected_model}', but it was NOT found." + ) + logging.warning("Available models:") + for model_id in model_ids: + logging.warning(f" - {model_id}") + all_ok = False + except requests.exceptions.JSONDecodeError: + logging.error("Model Check FAILED: Could not decode JSON response.") + all_ok = False + except AttributeError: + logging.error( + f"Model Check FAILED: Unexpected JSON structure in response from {models_url}." + ) + all_ok = False + + except requests.exceptions.ConnectionError: + logging.error(f"Model Check FAILED: No response from server on port {port}.") + all_ok = False + except requests.exceptions.Timeout: + logging.error(f"Model Check FAILED: Connection timed out for port {port}.") + all_ok = False + + # --- Final Status --- + if all_ok: + logging.info(f"--- Port {port}: ALL CHECKS PASSED ---") + return True, f"Port {port} ({expected_model}): PASSED" + else: + logging.error(f"--- Port {port}: CHECKS FAILED ---") + return False, f"Port {port} ({expected_model}): FAILED" + + +def check_rails_endpoint(port: int): + """ + Checks the /v1/rails/configs endpoint for a specific 200 status + and a non-empty list response. + Returns a tuple: (bool success, str summary) + """ + base_url = f"http://localhost:{port}" + endpoint = f"{base_url}/v1/rails/configs" + all_ok = True + + logging.info(f"\n--- Checking Port: {port} (Rails Config) ---") + logging.info(f"Checking {endpoint} ...") + + try: + response = requests.get(endpoint, timeout=3) + + # --- 1. HTTP Status Check --- + if response.status_code == 200: + logging.info(f"HTTP Status PASSED: Got {response.status_code}.") + else: + logging.warning( + f"HTTP Status FAILED: Expected 200, got '{response.status_code}'." + ) + all_ok = False + + # --- 2. Body Content Check --- + try: + data = response.json() + if isinstance(data, list) and len(data) > 0: + logging.info( + "Body Check PASSED: Response is an array with at least one entry." + ) + else: + logging.warning( + "Body Check FAILED: Response is not an array or is empty." + ) + logging.debug( + f"Response body (first 200 chars): {str(response.text)[:200]}" + ) + all_ok = False + except requests.exceptions.JSONDecodeError: + logging.error("Body Check FAILED: Could not decode JSON response.") + logging.debug( + f"Response body (first 200 chars): {str(response.text)[:200]}" + ) + all_ok = False + + except requests.exceptions.ConnectionError: + logging.error(f"Rails Check FAILED: No response from server on port {port}.") + all_ok = False + except requests.exceptions.Timeout: + logging.error(f"Rails Check FAILED: Connection timed out for port {port}.") + all_ok = False + + # --- Final Status --- + if all_ok: + logging.info(f"--- Port {port}: ALL CHECKS PASSED ---") + return True, f"Port {port} (Rails Config): PASSED" + else: + logging.error(f"--- Port {port}: CHECKS FAILED ---") + return False, f"Port {port} (Rails Config): FAILED" + + +def main(): + """Run all health checks.""" + logging.info("Starting LLM endpoint health check...") + + check_results = [ + check_endpoint(8000, "meta/llama-3.3-70b-instruct"), + check_endpoint(8001, "nvidia/llama-3.1-nemoguard-8b-content-safety"), + check_rails_endpoint(9000), + ] + + logging.info("\n--- Final Summary ---") + + all_passed = True + for success, summary in check_results: + logging.info(summary) + if not success: + all_passed = False + + logging.info("---------------------") + + if all_passed: + logging.info("Overall Status: All endpoints are healthy!") + sys.exit(0) + else: + logging.error("Overall Status: One or more checks FAILED.") + sys.exit(1) + + +if __name__ == "__main__": + main() From aead73071d688ce0ba5bdd17b3187c4c287d70e9 Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Mon, 3 Nov 2025 10:54:43 -0600 Subject: [PATCH 05/16] Restructure configs into mocks and Guardrails configs --- .../content_safety_colang1/config.yml | 21 ++ .../content_safety_colang1/prompts.yml | 257 ++++++++++++++++++ .../meta-llama-3.3-70b-instruct.env | 8 + ...-llama-3.1-nemoguard-8b-content-safety.env | 8 + 4 files changed, 294 insertions(+) create mode 100644 nemoguardrails/benchmark/configs/guardrail_configs/content_safety_colang1/config.yml create mode 100644 nemoguardrails/benchmark/configs/guardrail_configs/content_safety_colang1/prompts.yml create mode 100644 nemoguardrails/benchmark/configs/mock_configs/meta-llama-3.3-70b-instruct.env create mode 100644 nemoguardrails/benchmark/configs/mock_configs/nvidia-llama-3.1-nemoguard-8b-content-safety.env diff --git a/nemoguardrails/benchmark/configs/guardrail_configs/content_safety_colang1/config.yml b/nemoguardrails/benchmark/configs/guardrail_configs/content_safety_colang1/config.yml new file mode 100644 index 000000000..67df489d7 --- /dev/null +++ b/nemoguardrails/benchmark/configs/guardrail_configs/content_safety_colang1/config.yml @@ -0,0 +1,21 @@ +models: + - type: main + engine: nim + model: meta/llama-3.3-70b-instruct + parameters: + base_url: http://localhost:8000 + + - type: content_safety + engine: nim + model: nvidia/llama-3.1-nemoguard-8b-content-safety + parameters: + base_url: http://localhost:8001 + + +rails: + input: + flows: + - content safety check input $model=content_safety + output: + flows: + - content safety check output $model=content_safety diff --git a/nemoguardrails/benchmark/configs/guardrail_configs/content_safety_colang1/prompts.yml b/nemoguardrails/benchmark/configs/guardrail_configs/content_safety_colang1/prompts.yml new file mode 100644 index 000000000..1321a6461 --- /dev/null +++ b/nemoguardrails/benchmark/configs/guardrail_configs/content_safety_colang1/prompts.yml @@ -0,0 +1,257 @@ +# These are the default prompts released by Meta, except for policy O7, which was added to address direct insults. +prompts: + + - task: content_safety_check_input $model=content_safety + content: | + Task: Check if there is unsafe content in the user or agent messages in the below conversation in accordance with our safety policy with the below categories. Provide a 'safe' or 'unsafe' rating for both the user message and the agent response. If there is unsafe content, please also provide a list of violated categories according to our safety policy below. + + + S1: Violence. + S2: Sexual. + S3: Criminal Planning/Confessions. + S4: Guns and Illegal Weapons. + S5: Controlled/Regulated Substances. + S6: Suicide and Self Harm. + S7: Sexual (minor). + S8: Hate/Identity Hate. + S9: PII/Privacy. + S10: Harassment. + S11: Threat. + S12: Profanity. + S13: Needs Caution. + S14: Other. + S15: Manipulation. + S16: Fraud/Deception. + S17: Malware. + S18: High Risk Gov Decision Making. + S19: Political/Misinformation/Conspiracy. + S20: Copyright/Trademark/Plagiarism. + S21: Unauthorized Advice. + S22: Illegal Activity. + S23: Immoral/Unethical. + + + + + user: {{ user_input }} + + + + Provide your safety assessment for the above conversation in the below output JSON format: + {"User Safety": safe or unsafe rating for the user message, "Response Safety": safe or unsafe rating for the agent response. Omit if no agent response present. "Safety Categories": a comma-separated list of applicable safety categories from the provided taxonomy. Omit if all safe.} + + Do not include anything other than the output JSON in your response. + Output JSON: + output_parser: nemoguard_parse_prompt_safety + max_tokens: 50 + + - task: content_safety_check_output $model=content_safety + content: | + Task: Check if there is unsafe content in the user or agent messages in the below conversation in accordance with our safety policy with the below categories. Provide a 'safe' or 'unsafe' rating for both the user message and the agent response. If there is unsafe content, please also provide a list of violated categories according to our safety policy below. + + + S1: Violence. + S2: Sexual. + S3: Criminal Planning/Confessions. + S4: Guns and Illegal Weapons. + S5: Controlled/Regulated Substances. + S6: Suicide and Self Harm. + S7: Sexual (minor). + S8: Hate/Identity Hate. + S9: PII/Privacy. + S10: Harassment. + S11: Threat. + S12: Profanity. + S13: Needs Caution. + S14: Other. + S15: Manipulation. + S16: Fraud/Deception. + S17: Malware. + S18: High Risk Gov Decision Making. + S19: Political/Misinformation/Conspiracy. + S20: Copyright/Trademark/Plagiarism. + S21: Unauthorized Advice. + S22: Illegal Activity. + S23: Immoral/Unethical. + + + + + user: {{ user_input }} + + response: agent: {{ bot_response }} + + + + Provide your safety assessment for the above conversation in the below output JSON format: + {"User Safety": safe or unsafe rating for the user message, "Response Safety": safe or unsafe rating for the agent response. Omit if no agent response present. "Safety Categories": a comma-separated list of applicable safety categories from the provided taxonomy. Omit if all safe.} + + Do not include anything other than the output JSON in your response. + Output JSON: + output_parser: nemoguard_parse_response_safety + max_tokens: 50 + + - task: content_safety_check_input $model=llama_guard + content: | + [INST] Task: Check if there is unsafe content in 'User' messages in conversations according our safety policy with the below categories. + + O1: Violence and Hate. + Should not + - Help people plan or engage in violence. + - Encourage people to engage in violence. + - Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their race, color, religion, national origin, sexual orientation, gender, gender identity, or disability. + - Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics. + Can + - Provide information on violence and discrimination. + - Discuss the topics of hate and violence, and can discuss historical events involving violence. + O2: Sexual Content. + Should not + - Engage in sexually explicit (i.e., erotic) conversations and should not encourage people to engage in sexual activities. + Can + - Discuss topics related to sexuality and sexual education. + - Provide general information regarding sexual activities. + - Provide guidance on topics related to sexual health. + O3: Criminal Planning. + Should not + - Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them. + - Encourage people to engage in criminal activities or condone criminal behavior. + Can + - Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes. + - Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems. + O4: Guns and Illegal Weapons. + Should not + - Assist in the planning or execution of firearm or illegal weapons crimes, especially when the person indicates that they may act on the information provided to them. + - Encourage people to engage in firearm or illegal weapons crimes or condone such behavior. + Can + - Discuss firearms and the arguments for and against firearm ownership. + O5: Regulated or Controlled Substances. + Should not + - Encourage or assist people in the trafficking, consumption, or use of regulated or controlled substances, including but not limited to illegal drugs, tobacco, alcohol, and cannabis. + - Assist or encourage people to create such substances when it is illegal to do so. + Can + - Discuss the use of regulated or controlled substances, and can provide general information on those substances (ex: what their chemical composition is and what their effects are). + - Provide information on how those substances are manufactured, provided that the information is not so detailed as to provide step-by-step instructions for their creation. + O6: Self-Harm. + Should not + - Encourage people to harm themselves. + - Romanticize or condone self-harm or suicide. + - Provide information regarding the methods of suicide or self-harm. + - Provide any information or advice, other than information regarding appropriate health resources, to people who express or imply that they wish to harm themselves. + Can + - Provide general information regarding self-harm and suicide (ex: statistics, the mental health challenges faced by those who take these actions). + Should + - Provide information regarding appropriate health resources to people who imply or express the desire to harm themselves. + O7: Offensive Language and Insults. + Should not + - Address direct insults + - Use or encourage the use of swear words, bad language, or vulgar expressions. + - Insult, offend, or demean individuals or groups based on any personal characteristics or choices. + - Encourage or support the use of offensive or derogatory language. + Can + - Discuss the impact and societal perceptions of offensive language and insults. + - Provide information on communication strategies that avoid the use of offensive language and promote respectful dialogue. + - Offer guidance on how to address or respond to offensive language or insults in a constructive manner. + + + + User: {{ user_input }} + + + Provide your safety assessment for User in the above conversation: + - First line must read 'safe' or 'unsafe'. + - If unsafe, a second line must include a comma-separated list of violated categories. [/INST] + output_parser: is_content_safe + + - task: content_safety_check_output $model=llama_guard_2 + content: | + <|begin_of_text|>[INST] Task: Check if there is unsafe content in Agent messages in conversations according our safety policy with the below categories. + + + S1: Violent Crimes. + S2: Non-Violent Crimes. + S3: Sex Crimes. + S4: Child Exploitation. + S5: Specialized Advice. + S6: Privacy. + S7: Intellectual Property. + S8: Indiscriminate Weapons. + S9: Hate. + S10: Self-Harm. + S11: Sexual Content. + + + + + User: {{ user_input }} + + Agent: {{ bot_response }} + + + + Provide your safety assessment for ONLY THE LAST Agent in the above conversation: + - First line must read 'safe' or 'unsafe'. + - If unsafe, a second line must include a comma-separated list of violated categories. [/INST] + output_parser: is_content_safe + max_tokens: 3 + + - task: content_safety_check_input $model=shieldgemma + content: | + You are a policy expert trying to help determine whether a user prompt is in + violation of the defined safety policies. + + + Human Question: {{ user_input }} + + + Our safety principle is defined in the below: + + * "No Hate Speech": The prompt shall not contain or seek generation of content + that expresses, incites, or promotes hate based on race, gender, ethnicity, + religion, nationality, sexual orientation, disability status, or caste. + * "No Dangerous Content": The prompt shall not contain or seek generation of content that harming oneself + and/or others (e.g., accessing or building firearms and explosive devices, promotion of terrorism, instructions for suicide). + * "No Sexually Explicit Information": The prompt shall not contain or seek generation of content + that contains references to sexual acts or other lewd content (e.g., sexually graphic descriptions, + content aimed at causing arousal). Medical or scientific terms related to human anatomy or sex education are permitted. + * "No Harassment": The prompt shall not contain or seek generation of content that is malicious, + intimidating, bullying, or abusive content targeting another individual (e.g., physical threats, + denial of tragic events, disparaging victims of violence). + === + + Does the human prompt/question violate the above principle? Your answer must start with + 'Yes' or 'No'. And then walk through step by step to be sure we answer + correctly. + output_parser: is_content_safe + max_tokens: 3 + + - task: content_safety_check_output $model=shieldgemma + content: | + You are a policy expert trying to help determine whether a chatbot response to the user is in + violation of the defined safety policies: + + + Human Question: {{ user_input }} + Chatbot Response: {{ bot_response }} + + + Our safety principle is defined in the below: + + * "No Dangerous Content": The chatbot shall not generate content that harming oneself and/or others + (e.g., accessing or building firearms and explosive devices, promotion of terrorism, instructions for suicide). + * "No Harassment": The chatbot shall not generate content that is malicious, intimidating, bullying, or + abusive content targeting another individual (e.g., physical threats, denial of tragic events, disparaging victims of violence). + * "No Hate Speech": The chatbot shall not generate content that targeting identity and/or protected + attributes (e.g., racial slurs, promotion of discrimination, calls to violence against protected groups) or dehumanizing/belittling/vilifying on the + bases of race, ethnicity, religion, disability, age, nationality, veteran status, sexual orientation, gender, gender identity, caste, or any other protected status. + We do allow content that includes a reference to a protected individual or group in a positive manner. + * "No Sexually Explicit Information": The chatbot shall not generate content that contains references to sexual acts or + other lewd content (e.g., sexually graphic descriptions, content aimed at causing arousal). + Medical or scientific terms related to human anatomy or sex education are permitted. + + === + + Does the chatbot response violate the above principle? Your answer must start with + 'Yes' or 'No'. And then walk through step by step to be sure we answer + correctly. + output_parser: is_content_safe + max_tokens: 3 diff --git a/nemoguardrails/benchmark/configs/mock_configs/meta-llama-3.3-70b-instruct.env b/nemoguardrails/benchmark/configs/mock_configs/meta-llama-3.3-70b-instruct.env new file mode 100644 index 000000000..d1cc35477 --- /dev/null +++ b/nemoguardrails/benchmark/configs/mock_configs/meta-llama-3.3-70b-instruct.env @@ -0,0 +1,8 @@ +MODEL="meta/llama-3.3-70b-instruct" +UNSAFE_PROBABILITY=0.0 +UNSAFE_TEXT="I can't help with that. Is there anything else I can assist you with?" +SAFE_TEXT="I can provide information and help with a wide range of topics, from science and history to entertainment and culture. I can also help with language-related tasks, such as translation and text summarization. However, I can't assist with requests that involve harm or illegal activities." +LATENCY_MIN_SECONDS=4.0 +LATENCY_MAX_SECONDS=4.0 +LATENCY_MEAN_SECONDS=4.0 +LATENCY_STD_SECONDS=0.0 diff --git a/nemoguardrails/benchmark/configs/mock_configs/nvidia-llama-3.1-nemoguard-8b-content-safety.env b/nemoguardrails/benchmark/configs/mock_configs/nvidia-llama-3.1-nemoguard-8b-content-safety.env new file mode 100644 index 000000000..2ae46d578 --- /dev/null +++ b/nemoguardrails/benchmark/configs/mock_configs/nvidia-llama-3.1-nemoguard-8b-content-safety.env @@ -0,0 +1,8 @@ +MODEL="nvidia/llama-3.1-nemoguard-8b-content-safety" +UNSAFE_PROBABILITY=0.03 +UNSAFE_TEXT="{\"User Safety\": \"unsafe\", \"Response Safety\": \"unsafe\", \"Safety Categories\": \"Violence, Criminal Planning/Confessions\"}" +SAFE_TEXT="{\"User Safety\": \"safe\", \"Response Safety\": \"safe\"}" +LATENCY_MIN_SECONDS=0.5 +LATENCY_MAX_SECONDS=0.5 +LATENCY_MEAN_SECONDS=0.5 +LATENCY_STD_SECONDS=0.0 From 810b9b931d58a7a98ac9bb878389c658a230a5c1 Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Mon, 3 Nov 2025 11:28:02 -0600 Subject: [PATCH 06/16] Add tests against validate_mocks.py --- nemoguardrails/benchmark/validate_mocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemoguardrails/benchmark/validate_mocks.py b/nemoguardrails/benchmark/validate_mocks.py index 4bb1d3c99..47a4bd273 100644 --- a/nemoguardrails/benchmark/validate_mocks.py +++ b/nemoguardrails/benchmark/validate_mocks.py @@ -218,4 +218,4 @@ def main(): if __name__ == "__main__": - main() + main() # pragma: no cover From a229b88743b21de07ca024b9c2f4b4004430eaaf Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Mon, 3 Nov 2025 11:46:30 -0600 Subject: [PATCH 07/16] Convert f-string logging statements to %s format --- nemoguardrails/benchmark/validate_mocks.py | 73 ++++++++++++---------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/nemoguardrails/benchmark/validate_mocks.py b/nemoguardrails/benchmark/validate_mocks.py index 47a4bd273..cb24d38b9 100644 --- a/nemoguardrails/benchmark/validate_mocks.py +++ b/nemoguardrails/benchmark/validate_mocks.py @@ -39,16 +39,16 @@ def check_endpoint(port: int, expected_model: str): base_url = f"http://localhost:{port}" all_ok = True - logging.info(f"\n--- Checking Port: {port} ---") + logging.info("\n--- Checking Port: %s ---", port) # --- 1. Health Check --- health_url = f"{base_url}/health" - logging.info(f"Checking {health_url} ...") + logging.info("Checking %s ...", health_url) try: response = requests.get(health_url, timeout=3) if response.status_code != 200: - logging.error(f"Health Check FAILED: Status code {response.status_code}") + logging.error("Health Check FAILED: Status code %s", response.status_code) all_ok = False else: try: @@ -58,7 +58,7 @@ def check_endpoint(port: int, expected_model: str): logging.info("Health Check PASSED: Status is 'healthy'.") else: logging.warning( - f"Health Check FAILED: Expected 'healthy', got '{status}'." + "Health Check FAILED: Expected 'healthy', got '%s'.", status ) all_ok = False except requests.exceptions.JSONDecodeError: @@ -66,22 +66,25 @@ def check_endpoint(port: int, expected_model: str): all_ok = False except requests.exceptions.ConnectionError: - logging.error(f"Health Check FAILED: No response from server on port {port}.") - logging.error(f"--- Port {port}: CHECKS FAILED ---") - return False, f"Port {port} ({expected_model}): FAILED (Connection Error)" + logging.error("Health Check FAILED: No response from server on port %s.", port) + logging.error("--- Port %s: CHECKS FAILED ---", port) + return False, "Port %s (%s): FAILED (Connection Error)" % (port, expected_model) except requests.exceptions.Timeout: - logging.error(f"Health Check FAILED: Connection timed out for port {port}.") - logging.error(f"--- Port {port}: CHECKS FAILED ---") - return False, f"Port {port} ({expected_model}): FAILED (Connection Timeout)" + logging.error("Health Check FAILED: Connection timed out for port %s.", port) + logging.error("--- Port %s: CHECKS FAILED ---", port) + return False, "Port %s (%s): FAILED (Connection Timeout)" % ( + port, + expected_model, + ) # --- 2. Model Check --- models_url = f"{base_url}/v1/models" - logging.info(f"Checking {models_url} for '{expected_model}'...") + logging.info("Checking %s for '%s'...", models_url, expected_model) try: response = requests.get(models_url, timeout=3) if response.status_code != 200: - logging.error(f"Model Check FAILED: Status code {response.status_code}") + logging.error("Model Check FAILED: Status code %s", response.status_code) all_ok = False else: try: @@ -91,39 +94,41 @@ def check_endpoint(port: int, expected_model: str): if expected_model in model_ids: logging.info( - f"Model Check PASSED: Found '{expected_model}' in model list." + "Model Check PASSED: Found '%s' in model list.", expected_model ) else: logging.warning( - f"Model Check FAILED: Expected '{expected_model}', but it was NOT found." + "Model Check FAILED: Expected '%s', but it was NOT found.", + expected_model, ) logging.warning("Available models:") for model_id in model_ids: - logging.warning(f" - {model_id}") + logging.warning(" - %s", model_id) all_ok = False except requests.exceptions.JSONDecodeError: logging.error("Model Check FAILED: Could not decode JSON response.") all_ok = False except AttributeError: logging.error( - f"Model Check FAILED: Unexpected JSON structure in response from {models_url}." + "Model Check FAILED: Unexpected JSON structure in response from %s.", + models_url, ) all_ok = False except requests.exceptions.ConnectionError: - logging.error(f"Model Check FAILED: No response from server on port {port}.") + logging.error("Model Check FAILED: No response from server on port %s.", port) all_ok = False except requests.exceptions.Timeout: - logging.error(f"Model Check FAILED: Connection timed out for port {port}.") + logging.error("Model Check FAILED: Connection timed out for port %s.", port) all_ok = False # --- Final Status --- if all_ok: - logging.info(f"--- Port {port}: ALL CHECKS PASSED ---") - return True, f"Port {port} ({expected_model}): PASSED" + logging.info("--- Port %s: ALL CHECKS PASSED ---", port) + return True, "Port %s (%s): PASSED" % (port, expected_model) else: - logging.error(f"--- Port {port}: CHECKS FAILED ---") - return False, f"Port {port} ({expected_model}): FAILED" + logging.error("--- Port %s: CHECKS FAILED ---", port) + return False, "Port %s (%s): FAILED" % (port, expected_model) def check_rails_endpoint(port: int): @@ -136,18 +141,18 @@ def check_rails_endpoint(port: int): endpoint = f"{base_url}/v1/rails/configs" all_ok = True - logging.info(f"\n--- Checking Port: {port} (Rails Config) ---") - logging.info(f"Checking {endpoint} ...") + logging.info("\n--- Checking Port: %s (Rails Config) ---", port) + logging.info("Checking %s ...", endpoint) try: response = requests.get(endpoint, timeout=3) # --- 1. HTTP Status Check --- if response.status_code == 200: - logging.info(f"HTTP Status PASSED: Got {response.status_code}.") + logging.info("HTTP Status PASSED: Got %s.", response.status_code) else: logging.warning( - f"HTTP Status FAILED: Expected 200, got '{response.status_code}'." + "HTTP Status FAILED: Expected 200, got '%s'.", response.status_code ) all_ok = False @@ -163,30 +168,30 @@ def check_rails_endpoint(port: int): "Body Check FAILED: Response is not an array or is empty." ) logging.debug( - f"Response body (first 200 chars): {str(response.text)[:200]}" + "Response body (first 200 chars): %s", str(response.text)[:200] ) all_ok = False except requests.exceptions.JSONDecodeError: logging.error("Body Check FAILED: Could not decode JSON response.") logging.debug( - f"Response body (first 200 chars): {str(response.text)[:200]}" + "Response body (first 200 chars): %s", str(response.text)[:200] ) all_ok = False except requests.exceptions.ConnectionError: - logging.error(f"Rails Check FAILED: No response from server on port {port}.") + logging.error("Rails Check FAILED: No response from server on port %s.", port) all_ok = False except requests.exceptions.Timeout: - logging.error(f"Rails Check FAILED: Connection timed out for port {port}.") + logging.error("Rails Check FAILED: Connection timed out for port %s.", port) all_ok = False # --- Final Status --- if all_ok: - logging.info(f"--- Port {port}: ALL CHECKS PASSED ---") - return True, f"Port {port} (Rails Config): PASSED" + logging.info("--- Port %s: ALL CHECKS PASSED ---", port) + return True, "Port %s (Rails Config): PASSED" % port else: - logging.error(f"--- Port {port}: CHECKS FAILED ---") - return False, f"Port {port} (Rails Config): FAILED" + logging.error("--- Port %s: CHECKS FAILED ---", port) + return False, "Port %s (Rails Config): FAILED" % port def main(): From c1c6f7d2ad2c82108a8340c098d418f55447c403 Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Mon, 3 Nov 2025 12:41:51 -0600 Subject: [PATCH 08/16] Add tests for validate_mocks.py --- tests/benchmark/test_validate_mocks.py | 473 +++++++++++++++++++++++++ 1 file changed, 473 insertions(+) create mode 100644 tests/benchmark/test_validate_mocks.py diff --git a/tests/benchmark/test_validate_mocks.py b/tests/benchmark/test_validate_mocks.py new file mode 100644 index 000000000..bc8a8a751 --- /dev/null +++ b/tests/benchmark/test_validate_mocks.py @@ -0,0 +1,473 @@ +#!/usr/bin/env python3 + +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Tests for validate_mocks.py script. +""" + +from unittest.mock import MagicMock, patch + +import pytest +import requests + +from nemoguardrails.benchmark.validate_mocks import ( + check_endpoint, + check_rails_endpoint, + main, +) + + +class TestCheckEndpoint: + """Tests for check_endpoint function.""" + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_endpoint_success(self, mock_get): + """Test successful health and model checks.""" + # Mock health check response + health_response = MagicMock() + health_response.status_code = 200 + health_response.json.return_value = {"status": "healthy"} + + # Mock models check response + models_response = MagicMock() + models_response.status_code = 200 + models_response.json.return_value = { + "data": [ + {"id": "meta/llama-3.3-70b-instruct"}, + {"id": "other-model"}, + ] + } + + mock_get.side_effect = [health_response, models_response] + + success, summary = check_endpoint(8000, "meta/llama-3.3-70b-instruct") + + assert success + assert "PASSED" in summary + assert "8000" in summary + assert mock_get.call_count == 2 + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_endpoint_health_check_failed_status(self, mock_get): + """Test health check with non-200 status code.""" + health_response = MagicMock() + health_response.status_code = 404 + + mock_get.return_value = health_response + + success, summary = check_endpoint(8000, "test-model") + + assert not success + assert "FAILED" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_endpoint_health_check_unhealthy_status(self, mock_get): + """Test health check with unhealthy status.""" + health_response = MagicMock() + health_response.status_code = 200 + health_response.json.return_value = {"status": "unhealthy"} + + models_response = MagicMock() + models_response.status_code = 200 + models_response.json.return_value = {"data": [{"id": "test-model"}]} + + mock_get.side_effect = [health_response, models_response] + + success, summary = check_endpoint(8000, "test-model") + + assert not success + assert "FAILED" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_endpoint_health_check_json_decode_error(self, mock_get): + """Test health check with invalid JSON.""" + health_response = MagicMock() + health_response.status_code = 200 + health_response.json.side_effect = requests.exceptions.JSONDecodeError( + "Expecting value", "", 0 + ) + + mock_get.return_value = health_response + + success, summary = check_endpoint(8000, "test-model") + + assert not success + assert "FAILED" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_endpoint_health_connection_error(self, mock_get): + """Test health check with connection error.""" + mock_get.side_effect = requests.exceptions.ConnectionError() + + success, summary = check_endpoint(8000, "test-model") + + assert not success + assert "FAILED" in summary + assert "Connection Error" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_endpoint_health_timeout(self, mock_get): + """Test health check with timeout.""" + mock_get.side_effect = requests.exceptions.Timeout() + + success, summary = check_endpoint(8000, "test-model") + + assert not success + assert "FAILED" in summary + assert "Connection Timeout" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_endpoint_model_check_failed_status(self, mock_get): + """Test model check with non-200 status code.""" + health_response = MagicMock() + health_response.status_code = 200 + health_response.json.return_value = {"status": "healthy"} + + models_response = MagicMock() + models_response.status_code = 404 + + mock_get.side_effect = [health_response, models_response] + + success, summary = check_endpoint(8000, "test-model") + + assert not success + assert "FAILED" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_endpoint_model_not_found(self, mock_get): + """Test model check when expected model is not in the list.""" + health_response = MagicMock() + health_response.status_code = 200 + health_response.json.return_value = {"status": "healthy"} + + models_response = MagicMock() + models_response.status_code = 200 + models_response.json.return_value = { + "data": [ + {"id": "other-model-1"}, + {"id": "other-model-2"}, + ] + } + + mock_get.side_effect = [health_response, models_response] + + success, summary = check_endpoint(8000, "test-model") + + assert not success + assert "FAILED" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_endpoint_model_check_json_decode_error(self, mock_get): + """Test model check with invalid JSON.""" + health_response = MagicMock() + health_response.status_code = 200 + health_response.json.return_value = {"status": "healthy"} + + models_response = MagicMock() + models_response.status_code = 200 + models_response.json.side_effect = requests.exceptions.JSONDecodeError( + "Expecting value", "", 0 + ) + + mock_get.side_effect = [health_response, models_response] + + success, summary = check_endpoint(8000, "test-model") + + assert not success + assert "FAILED" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_endpoint_model_check_unexpected_json_structure(self, mock_get): + """Test model check with unexpected JSON structure.""" + health_response = MagicMock() + health_response.status_code = 200 + health_response.json.return_value = {"status": "healthy"} + + models_response = MagicMock() + models_response.status_code = 200 + # Return invalid structure that will cause AttributeError + models_response.json.return_value = "invalid" + + mock_get.side_effect = [health_response, models_response] + + success, summary = check_endpoint(8000, "test-model") + + assert not success + assert "FAILED" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_endpoint_model_check_connection_error(self, mock_get): + """Test model check with connection error.""" + health_response = MagicMock() + health_response.status_code = 200 + health_response.json.return_value = {"status": "healthy"} + + mock_get.side_effect = [health_response, requests.exceptions.ConnectionError()] + + success, summary = check_endpoint(8000, "test-model") + + assert not success + assert "FAILED" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_endpoint_model_check_timeout(self, mock_get): + """Test model check with timeout.""" + health_response = MagicMock() + health_response.status_code = 200 + health_response.json.return_value = {"status": "healthy"} + + mock_get.side_effect = [health_response, requests.exceptions.Timeout()] + + success, summary = check_endpoint(8000, "test-model") + + assert not success + assert "FAILED" in summary + + +class TestCheckRailsEndpoint: + """Tests for check_rails_endpoint function.""" + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_rails_endpoint_success(self, mock_get): + """Test successful rails config check.""" + response = MagicMock() + response.status_code = 200 + response.json.return_value = [ + {"id": "config1", "name": "Config 1"}, + {"id": "config2", "name": "Config 2"}, + ] + + mock_get.return_value = response + + success, summary = check_rails_endpoint(9000) + + assert success + assert "PASSED" in summary + assert "9000" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_rails_endpoint_non_200_status(self, mock_get): + """Test rails config check with non-200 status.""" + response = MagicMock() + response.status_code = 404 + response.json.return_value = [] + + mock_get.return_value = response + + success, summary = check_rails_endpoint(9000) + + assert not success + assert "FAILED" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_rails_endpoint_empty_list(self, mock_get): + """Test rails config check with empty list response.""" + response = MagicMock() + response.status_code = 200 + response.json.return_value = [] + + mock_get.return_value = response + + success, summary = check_rails_endpoint(9000) + + assert not success + assert "FAILED" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_rails_endpoint_not_a_list(self, mock_get): + """Test rails config check with non-list response.""" + response = MagicMock() + response.status_code = 200 + response.json.return_value = {"error": "invalid"} + + mock_get.return_value = response + + success, summary = check_rails_endpoint(9000) + + assert not success + assert "FAILED" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_rails_endpoint_json_decode_error(self, mock_get): + """Test rails config check with invalid JSON.""" + response = MagicMock() + response.status_code = 200 + response.text = "invalid json" + response.json.side_effect = requests.exceptions.JSONDecodeError( + "Expecting value", "", 0 + ) + + mock_get.return_value = response + + success, summary = check_rails_endpoint(9000) + + assert not success + assert "FAILED" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_rails_endpoint_connection_error(self, mock_get): + """Test rails config check with connection error.""" + mock_get.side_effect = requests.exceptions.ConnectionError() + + success, summary = check_rails_endpoint(9000) + + assert not success + assert "FAILED" in summary + + @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + def test_check_rails_endpoint_timeout(self, mock_get): + """Test rails config check with timeout.""" + mock_get.side_effect = requests.exceptions.Timeout() + + success, summary = check_rails_endpoint(9000) + + assert not success + assert "FAILED" in summary + + +class TestMain: + """Tests for main function.""" + + @patch("nemoguardrails.benchmark.validate_mocks.check_rails_endpoint") + @patch("nemoguardrails.benchmark.validate_mocks.check_endpoint") + def test_main_all_passed(self, mock_check_endpoint, mock_check_rails_endpoint): + """Test main function when all checks pass.""" + mock_check_endpoint.side_effect = [ + (True, "Port 8000 (meta/llama-3.3-70b-instruct): PASSED"), + ( + True, + "Port 8001 (nvidia/llama-3.1-nemoguard-8b-content-safety): PASSED", + ), + ] + mock_check_rails_endpoint.return_value = ( + True, + "Port 9000 (Rails Config): PASSED", + ) + + with pytest.raises(SystemExit) as exc_info: + main() + + assert exc_info.value.code == 0 + assert mock_check_endpoint.call_count == 2 + assert mock_check_rails_endpoint.call_count == 1 + + @patch("nemoguardrails.benchmark.validate_mocks.check_rails_endpoint") + @patch("nemoguardrails.benchmark.validate_mocks.check_endpoint") + def test_main_one_failed(self, mock_check_endpoint, mock_check_rails_endpoint): + """Test main function when one check fails.""" + mock_check_endpoint.side_effect = [ + (False, "Port 8000 (meta/llama-3.3-70b-instruct): FAILED"), + ( + True, + "Port 8001 (nvidia/llama-3.1-nemoguard-8b-content-safety): PASSED", + ), + ] + mock_check_rails_endpoint.return_value = ( + True, + "Port 9000 (Rails Config): PASSED", + ) + + with pytest.raises(SystemExit) as exc_info: + main() + + assert exc_info.value.code == 1 + + @patch("nemoguardrails.benchmark.validate_mocks.check_rails_endpoint") + @patch("nemoguardrails.benchmark.validate_mocks.check_endpoint") + def test_main_all_failed(self, mock_check_endpoint, mock_check_rails_endpoint): + """Test main function when all checks fail.""" + mock_check_endpoint.side_effect = [ + (False, "Port 8000 (meta/llama-3.3-70b-instruct): FAILED"), + ( + False, + "Port 8001 (nvidia/llama-3.1-nemoguard-8b-content-safety): FAILED", + ), + ] + mock_check_rails_endpoint.return_value = ( + False, + "Port 9000 (Rails Config): FAILED", + ) + + with pytest.raises(SystemExit) as exc_info: + main() + + assert exc_info.value.code == 1 + + @patch("nemoguardrails.benchmark.validate_mocks.check_rails_endpoint") + @patch("nemoguardrails.benchmark.validate_mocks.check_endpoint") + def test_main_rails_failed(self, mock_check_endpoint, mock_check_rails_endpoint): + """Test main function when only rails check fails.""" + mock_check_endpoint.side_effect = [ + (True, "Port 8000 (meta/llama-3.3-70b-instruct): PASSED"), + ( + True, + "Port 8001 (nvidia/llama-3.1-nemoguard-8b-content-safety): PASSED", + ), + ] + mock_check_rails_endpoint.return_value = ( + False, + "Port 9000 (Rails Config): FAILED", + ) + + with pytest.raises(SystemExit) as exc_info: + main() + + assert exc_info.value.code == 1 + + +class TestScriptExecution: + """Test the script can be executed directly.""" + + def test_script_execution(self): + """Test that the script can be executed via __main__.""" + # Import the module to trigger __main__ block if run directly + import importlib + + import nemoguardrails.benchmark.validate_mocks as vm_module + + # Reload to ensure __main__ block is checked + importlib.reload(vm_module) + + # If we're not running as __main__, the main function shouldn't be called + # This test verifies the module can be imported without issues + assert callable(vm_module.check_endpoint) + assert callable(vm_module.check_rails_endpoint) + assert callable(vm_module.main) + + @patch("nemoguardrails.benchmark.validate_mocks.check_rails_endpoint") + @patch("nemoguardrails.benchmark.validate_mocks.check_endpoint") + def test_script_can_run_as_main( + self, mock_check_endpoint, mock_check_rails_endpoint + ): + """Test that the script can run when invoked as main.""" + mock_check_endpoint.side_effect = [ + (True, "Port 8000: PASSED"), + (True, "Port 8001: PASSED"), + ] + mock_check_rails_endpoint.return_value = (True, "Port 9000: PASSED") + + # This simulates running the script directly + script_path = "/Users/tgasser/projects/nemo_guardrails/nemoguardrails/benchmark/validate_mocks.py" + with pytest.raises(SystemExit) as exc_info: + with open(script_path, encoding="utf-8") as f: + # Using exec to simulate script execution is necessary for testing __main__ block + exec( + compile(f.read(), "validate_mocks.py", "exec"), + {"__name__": "__main__"}, + ) # noqa: S102 + + assert exc_info.value.code == 0 From 4a0d0d98643c49d27aff86cec1aa657a68607aee Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Mon, 3 Nov 2025 12:57:58 -0600 Subject: [PATCH 09/16] Remove script-testing unit-tests --- tests/benchmark/test_validate_mocks.py | 44 -------------------------- 1 file changed, 44 deletions(-) diff --git a/tests/benchmark/test_validate_mocks.py b/tests/benchmark/test_validate_mocks.py index bc8a8a751..87fe64344 100644 --- a/tests/benchmark/test_validate_mocks.py +++ b/tests/benchmark/test_validate_mocks.py @@ -427,47 +427,3 @@ def test_main_rails_failed(self, mock_check_endpoint, mock_check_rails_endpoint) main() assert exc_info.value.code == 1 - - -class TestScriptExecution: - """Test the script can be executed directly.""" - - def test_script_execution(self): - """Test that the script can be executed via __main__.""" - # Import the module to trigger __main__ block if run directly - import importlib - - import nemoguardrails.benchmark.validate_mocks as vm_module - - # Reload to ensure __main__ block is checked - importlib.reload(vm_module) - - # If we're not running as __main__, the main function shouldn't be called - # This test verifies the module can be imported without issues - assert callable(vm_module.check_endpoint) - assert callable(vm_module.check_rails_endpoint) - assert callable(vm_module.main) - - @patch("nemoguardrails.benchmark.validate_mocks.check_rails_endpoint") - @patch("nemoguardrails.benchmark.validate_mocks.check_endpoint") - def test_script_can_run_as_main( - self, mock_check_endpoint, mock_check_rails_endpoint - ): - """Test that the script can run when invoked as main.""" - mock_check_endpoint.side_effect = [ - (True, "Port 8000: PASSED"), - (True, "Port 8001: PASSED"), - ] - mock_check_rails_endpoint.return_value = (True, "Port 9000: PASSED") - - # This simulates running the script directly - script_path = "/Users/tgasser/projects/nemo_guardrails/nemoguardrails/benchmark/validate_mocks.py" - with pytest.raises(SystemExit) as exc_info: - with open(script_path, encoding="utf-8") as f: - # Using exec to simulate script execution is necessary for testing __main__ block - exec( - compile(f.read(), "validate_mocks.py", "exec"), - {"__name__": "__main__"}, - ) # noqa: S102 - - assert exc_info.value.code == 0 From 20f3726d4a97af90e04caa6c08b13fb75b022d25 Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Mon, 3 Nov 2025 13:11:35 -0600 Subject: [PATCH 10/16] Add pyproject.toml 'benchmark' extra to install honcho and requests --- pyproject.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 1d252a8bf..4d69f1e5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,6 +105,10 @@ google-cloud-language = { version = ">=2.14.0", optional = true } # jailbreak injection yara-python = { version = "^4.5.1", optional = true } +# benchmark +requests = { version = ">=2.31.0", optional = true } +honcho = { version = ">=1.1.0", optional = true } + [tool.poetry.extras] sdd = ["presidio-analyzer", "presidio-anonymizer"] eval = ["tqdm", "numpy", "streamlit", "tornado"] @@ -113,6 +117,7 @@ gcp = ["google-cloud-language"] tracing = ["opentelemetry-api", "aiofiles"] nvidia = ["langchain-nvidia-ai-endpoints"] jailbreak = ["yara-python"] +benchmark = ["requests", "honcho"] # Poetry does not support recursive dependencies, so we need to add all the dependencies here. # I also support their decision. There is no PEP for recursive dependencies, but it has been supported in pip since version 21.2. # It is here for backward compatibility. @@ -128,6 +133,8 @@ all = [ "aiofiles", "langchain-nvidia-ai-endpoints", "yara-python", + "requests", + "honcho", ] [tool.poetry.group.dev] From d379bc50bcf47e91e02c2990ab69ecf0269cf552 Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Thu, 6 Nov 2025 16:44:35 -0600 Subject: [PATCH 11/16] Add workers CLI argument to pass to uvicorn app invocation --- nemoguardrails/benchmark/Procfile | 6 +++--- nemoguardrails/benchmark/mock_llm_server/run_server.py | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/nemoguardrails/benchmark/Procfile b/nemoguardrails/benchmark/Procfile index 4e065666e..c698d4463 100644 --- a/nemoguardrails/benchmark/Procfile +++ b/nemoguardrails/benchmark/Procfile @@ -1,9 +1,9 @@ # Procfile # NeMo Guardrails server -# main: uvicorn nemoguardrails.server.api:app --workers 1 --port 9000 +# main: uvicorn nemoguardrails.server.api:app --workers 4 --port 9000 gr: poetry run nemoguardrails server --config configs/guardrail_configs --default-config-id content_safety_colang1 --port 9000 # Guardrails NIMs for inference -app_llm: poetry run python mock_llm_server/run_server.py --port 8000 --config-file configs/mock_configs/meta-llama-3.3-70b-instruct.env -cs_llm: poetry run python mock_llm_server/run_server.py --port 8001 --config-file configs/mock_configs/nvidia-llama-3.1-nemoguard-8b-content-safety.env +app_llm: poetry run python mock_llm_server/run_server.py --workers 4 --port 8000 --config-file configs/mock_configs/meta-llama-3.3-70b-instruct.env +cs_llm: poetry run python mock_llm_server/run_server.py --workers 4 --port 8001 --config-file configs/mock_configs/nvidia-llama-3.1-nemoguard-8b-content-safety.env diff --git a/nemoguardrails/benchmark/mock_llm_server/run_server.py b/nemoguardrails/benchmark/mock_llm_server/run_server.py index 3c59d9bbd..eae8bc032 100644 --- a/nemoguardrails/benchmark/mock_llm_server/run_server.py +++ b/nemoguardrails/benchmark/mock_llm_server/run_server.py @@ -71,7 +71,12 @@ def parse_arguments(): parser.add_argument( "--config-file", help=".env file to configure model", required=True ) - + parser.add_argument( + "--workers", + type=int, + default=1, + help="Number of uvicorn worker processes (default: 1)", + ) return parser.parse_args() @@ -110,6 +115,7 @@ def main(): # pragma: no cover reload=args.reload, log_level=args.log_level, env_file=config_file, + workers=args.workers, ) except KeyboardInterrupt: log.info("\nServer stopped by user") From 581900516412850c15fc18b25bc13f93ffae5673 Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Thu, 6 Nov 2025 17:10:40 -0600 Subject: [PATCH 12/16] Use httpx rather than requests to call and validate mocks --- nemoguardrails/benchmark/validate_mocks.py | 29 +++++----- tests/benchmark/test_validate_mocks.py | 67 ++++++++++++---------- 2 files changed, 51 insertions(+), 45 deletions(-) diff --git a/nemoguardrails/benchmark/validate_mocks.py b/nemoguardrails/benchmark/validate_mocks.py index cb24d38b9..795f1c671 100644 --- a/nemoguardrails/benchmark/validate_mocks.py +++ b/nemoguardrails/benchmark/validate_mocks.py @@ -17,13 +17,14 @@ """ A script to check the health and model IDs of local OpenAI-compatible endpoints. -Requires the 'requests' library: pip install requests +Requires the 'httpx' library: pip install httpx """ +import json import logging import sys -import requests +import httpx # --- Logging Setup --- # Configure basic logging to print info-level messages @@ -45,7 +46,7 @@ def check_endpoint(port: int, expected_model: str): health_url = f"{base_url}/health" logging.info("Checking %s ...", health_url) try: - response = requests.get(health_url, timeout=3) + response = httpx.get(health_url, timeout=3) if response.status_code != 200: logging.error("Health Check FAILED: Status code %s", response.status_code) @@ -61,15 +62,15 @@ def check_endpoint(port: int, expected_model: str): "Health Check FAILED: Expected 'healthy', got '%s'.", status ) all_ok = False - except requests.exceptions.JSONDecodeError: + except json.JSONDecodeError: logging.error("Health Check FAILED: Could not decode JSON response.") all_ok = False - except requests.exceptions.ConnectionError: + except httpx.ConnectError: logging.error("Health Check FAILED: No response from server on port %s.", port) logging.error("--- Port %s: CHECKS FAILED ---", port) return False, "Port %s (%s): FAILED (Connection Error)" % (port, expected_model) - except requests.exceptions.Timeout: + except httpx.TimeoutException: logging.error("Health Check FAILED: Connection timed out for port %s.", port) logging.error("--- Port %s: CHECKS FAILED ---", port) return False, "Port %s (%s): FAILED (Connection Timeout)" % ( @@ -81,7 +82,7 @@ def check_endpoint(port: int, expected_model: str): models_url = f"{base_url}/v1/models" logging.info("Checking %s for '%s'...", models_url, expected_model) try: - response = requests.get(models_url, timeout=3) + response = httpx.get(models_url, timeout=3) if response.status_code != 200: logging.error("Model Check FAILED: Status code %s", response.status_code) @@ -105,7 +106,7 @@ def check_endpoint(port: int, expected_model: str): for model_id in model_ids: logging.warning(" - %s", model_id) all_ok = False - except requests.exceptions.JSONDecodeError: + except json.JSONDecodeError: logging.error("Model Check FAILED: Could not decode JSON response.") all_ok = False except AttributeError: @@ -115,10 +116,10 @@ def check_endpoint(port: int, expected_model: str): ) all_ok = False - except requests.exceptions.ConnectionError: + except httpx.ConnectError: logging.error("Model Check FAILED: No response from server on port %s.", port) all_ok = False - except requests.exceptions.Timeout: + except httpx.TimeoutException: logging.error("Model Check FAILED: Connection timed out for port %s.", port) all_ok = False @@ -145,7 +146,7 @@ def check_rails_endpoint(port: int): logging.info("Checking %s ...", endpoint) try: - response = requests.get(endpoint, timeout=3) + response = httpx.get(endpoint, timeout=3) # --- 1. HTTP Status Check --- if response.status_code == 200: @@ -171,17 +172,17 @@ def check_rails_endpoint(port: int): "Response body (first 200 chars): %s", str(response.text)[:200] ) all_ok = False - except requests.exceptions.JSONDecodeError: + except json.JSONDecodeError: logging.error("Body Check FAILED: Could not decode JSON response.") logging.debug( "Response body (first 200 chars): %s", str(response.text)[:200] ) all_ok = False - except requests.exceptions.ConnectionError: + except httpx.ConnectError: logging.error("Rails Check FAILED: No response from server on port %s.", port) all_ok = False - except requests.exceptions.Timeout: + except httpx.TimeoutException: logging.error("Rails Check FAILED: Connection timed out for port %s.", port) all_ok = False diff --git a/tests/benchmark/test_validate_mocks.py b/tests/benchmark/test_validate_mocks.py index 87fe64344..d8a86c1fa 100644 --- a/tests/benchmark/test_validate_mocks.py +++ b/tests/benchmark/test_validate_mocks.py @@ -19,10 +19,11 @@ Tests for validate_mocks.py script. """ +import json from unittest.mock import MagicMock, patch +import httpx import pytest -import requests from nemoguardrails.benchmark.validate_mocks import ( check_endpoint, @@ -34,7 +35,7 @@ class TestCheckEndpoint: """Tests for check_endpoint function.""" - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_endpoint_success(self, mock_get): """Test successful health and model checks.""" # Mock health check response @@ -61,7 +62,7 @@ def test_check_endpoint_success(self, mock_get): assert "8000" in summary assert mock_get.call_count == 2 - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_endpoint_health_check_failed_status(self, mock_get): """Test health check with non-200 status code.""" health_response = MagicMock() @@ -74,7 +75,7 @@ def test_check_endpoint_health_check_failed_status(self, mock_get): assert not success assert "FAILED" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_endpoint_health_check_unhealthy_status(self, mock_get): """Test health check with unhealthy status.""" health_response = MagicMock() @@ -92,12 +93,12 @@ def test_check_endpoint_health_check_unhealthy_status(self, mock_get): assert not success assert "FAILED" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_endpoint_health_check_json_decode_error(self, mock_get): """Test health check with invalid JSON.""" health_response = MagicMock() health_response.status_code = 200 - health_response.json.side_effect = requests.exceptions.JSONDecodeError( + health_response.json.side_effect = json.JSONDecodeError( "Expecting value", "", 0 ) @@ -108,10 +109,10 @@ def test_check_endpoint_health_check_json_decode_error(self, mock_get): assert not success assert "FAILED" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_endpoint_health_connection_error(self, mock_get): """Test health check with connection error.""" - mock_get.side_effect = requests.exceptions.ConnectionError() + mock_get.side_effect = httpx.ConnectError("Connection failed") success, summary = check_endpoint(8000, "test-model") @@ -119,10 +120,10 @@ def test_check_endpoint_health_connection_error(self, mock_get): assert "FAILED" in summary assert "Connection Error" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_endpoint_health_timeout(self, mock_get): """Test health check with timeout.""" - mock_get.side_effect = requests.exceptions.Timeout() + mock_get.side_effect = httpx.TimeoutException("Request timed out") success, summary = check_endpoint(8000, "test-model") @@ -130,7 +131,7 @@ def test_check_endpoint_health_timeout(self, mock_get): assert "FAILED" in summary assert "Connection Timeout" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_endpoint_model_check_failed_status(self, mock_get): """Test model check with non-200 status code.""" health_response = MagicMock() @@ -147,7 +148,7 @@ def test_check_endpoint_model_check_failed_status(self, mock_get): assert not success assert "FAILED" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_endpoint_model_not_found(self, mock_get): """Test model check when expected model is not in the list.""" health_response = MagicMock() @@ -170,7 +171,7 @@ def test_check_endpoint_model_not_found(self, mock_get): assert not success assert "FAILED" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_endpoint_model_check_json_decode_error(self, mock_get): """Test model check with invalid JSON.""" health_response = MagicMock() @@ -179,7 +180,7 @@ def test_check_endpoint_model_check_json_decode_error(self, mock_get): models_response = MagicMock() models_response.status_code = 200 - models_response.json.side_effect = requests.exceptions.JSONDecodeError( + models_response.json.side_effect = json.JSONDecodeError( "Expecting value", "", 0 ) @@ -190,7 +191,7 @@ def test_check_endpoint_model_check_json_decode_error(self, mock_get): assert not success assert "FAILED" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_endpoint_model_check_unexpected_json_structure(self, mock_get): """Test model check with unexpected JSON structure.""" health_response = MagicMock() @@ -209,28 +210,34 @@ def test_check_endpoint_model_check_unexpected_json_structure(self, mock_get): assert not success assert "FAILED" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_endpoint_model_check_connection_error(self, mock_get): """Test model check with connection error.""" health_response = MagicMock() health_response.status_code = 200 health_response.json.return_value = {"status": "healthy"} - mock_get.side_effect = [health_response, requests.exceptions.ConnectionError()] + mock_get.side_effect = [ + health_response, + httpx.ConnectError("Connection failed"), + ] success, summary = check_endpoint(8000, "test-model") assert not success assert "FAILED" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_endpoint_model_check_timeout(self, mock_get): """Test model check with timeout.""" health_response = MagicMock() health_response.status_code = 200 health_response.json.return_value = {"status": "healthy"} - mock_get.side_effect = [health_response, requests.exceptions.Timeout()] + mock_get.side_effect = [ + health_response, + httpx.TimeoutException("Request timed out"), + ] success, summary = check_endpoint(8000, "test-model") @@ -241,7 +248,7 @@ def test_check_endpoint_model_check_timeout(self, mock_get): class TestCheckRailsEndpoint: """Tests for check_rails_endpoint function.""" - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_rails_endpoint_success(self, mock_get): """Test successful rails config check.""" response = MagicMock() @@ -259,7 +266,7 @@ def test_check_rails_endpoint_success(self, mock_get): assert "PASSED" in summary assert "9000" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_rails_endpoint_non_200_status(self, mock_get): """Test rails config check with non-200 status.""" response = MagicMock() @@ -273,7 +280,7 @@ def test_check_rails_endpoint_non_200_status(self, mock_get): assert not success assert "FAILED" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_rails_endpoint_empty_list(self, mock_get): """Test rails config check with empty list response.""" response = MagicMock() @@ -287,7 +294,7 @@ def test_check_rails_endpoint_empty_list(self, mock_get): assert not success assert "FAILED" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_rails_endpoint_not_a_list(self, mock_get): """Test rails config check with non-list response.""" response = MagicMock() @@ -301,15 +308,13 @@ def test_check_rails_endpoint_not_a_list(self, mock_get): assert not success assert "FAILED" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_rails_endpoint_json_decode_error(self, mock_get): """Test rails config check with invalid JSON.""" response = MagicMock() response.status_code = 200 response.text = "invalid json" - response.json.side_effect = requests.exceptions.JSONDecodeError( - "Expecting value", "", 0 - ) + response.json.side_effect = json.JSONDecodeError("Expecting value", "", 0) mock_get.return_value = response @@ -318,20 +323,20 @@ def test_check_rails_endpoint_json_decode_error(self, mock_get): assert not success assert "FAILED" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_rails_endpoint_connection_error(self, mock_get): """Test rails config check with connection error.""" - mock_get.side_effect = requests.exceptions.ConnectionError() + mock_get.side_effect = httpx.ConnectError("Connection failed") success, summary = check_rails_endpoint(9000) assert not success assert "FAILED" in summary - @patch("nemoguardrails.benchmark.validate_mocks.requests.get") + @patch("nemoguardrails.benchmark.validate_mocks.httpx.get") def test_check_rails_endpoint_timeout(self, mock_get): """Test rails config check with timeout.""" - mock_get.side_effect = requests.exceptions.Timeout() + mock_get.side_effect = httpx.TimeoutException("Request timed out") success, summary = check_rails_endpoint(9000) From 83b40b78942a3783ed3613138c23d6664f80ef1f Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Thu, 6 Nov 2025 17:15:50 -0600 Subject: [PATCH 13/16] Revert "Add pyproject.toml 'benchmark' extra to install honcho and requests" This reverts commit 20f3726d4a97af90e04caa6c08b13fb75b022d25. --- pyproject.toml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4d69f1e5c..1d252a8bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,10 +105,6 @@ google-cloud-language = { version = ">=2.14.0", optional = true } # jailbreak injection yara-python = { version = "^4.5.1", optional = true } -# benchmark -requests = { version = ">=2.31.0", optional = true } -honcho = { version = ">=1.1.0", optional = true } - [tool.poetry.extras] sdd = ["presidio-analyzer", "presidio-anonymizer"] eval = ["tqdm", "numpy", "streamlit", "tornado"] @@ -117,7 +113,6 @@ gcp = ["google-cloud-language"] tracing = ["opentelemetry-api", "aiofiles"] nvidia = ["langchain-nvidia-ai-endpoints"] jailbreak = ["yara-python"] -benchmark = ["requests", "honcho"] # Poetry does not support recursive dependencies, so we need to add all the dependencies here. # I also support their decision. There is no PEP for recursive dependencies, but it has been supported in pip since version 21.2. # It is here for backward compatibility. @@ -133,8 +128,6 @@ all = [ "aiofiles", "langchain-nvidia-ai-endpoints", "yara-python", - "requests", - "honcho", ] [tool.poetry.group.dev] From 8f58d118630035dec589743f657f5634356d4972 Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Mon, 10 Nov 2025 07:52:22 -0600 Subject: [PATCH 14/16] Removed commented line in Procfile --- nemoguardrails/benchmark/Procfile | 1 - 1 file changed, 1 deletion(-) diff --git a/nemoguardrails/benchmark/Procfile b/nemoguardrails/benchmark/Procfile index c698d4463..f177f52be 100644 --- a/nemoguardrails/benchmark/Procfile +++ b/nemoguardrails/benchmark/Procfile @@ -1,7 +1,6 @@ # Procfile # NeMo Guardrails server -# main: uvicorn nemoguardrails.server.api:app --workers 4 --port 9000 gr: poetry run nemoguardrails server --config configs/guardrail_configs --default-config-id content_safety_colang1 --port 9000 # Guardrails NIMs for inference From d5c6038816d4bfe97ec5b0e8de3caa18abb50f4a Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Mon, 10 Nov 2025 07:56:38 -0600 Subject: [PATCH 15/16] Add README explaining core-banchmarking --- nemoguardrails/benchmark/README.md | 159 +++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 nemoguardrails/benchmark/README.md diff --git a/nemoguardrails/benchmark/README.md b/nemoguardrails/benchmark/README.md new file mode 100644 index 000000000..750fde0fe --- /dev/null +++ b/nemoguardrails/benchmark/README.md @@ -0,0 +1,159 @@ +# Guardrails Benchmarking + +NeMo Guardrails includes benchmarking tools to help users capacity-test their Guardrails applications. +Adding guardrails to an LLM-based application improves safety and security, while adding some latency. These benchmarks allow users to quantify the tradeoff between security and latency, to make data-driven decisions. +We currently have a simple testbench, which runs the Guardrails server with mocks as Guardrail and Application models. This can be used for performance-testing on a laptop without any GPUs, and run in a few minutes. + +## Guardrails Core Benchmarking + +This benchmark measures the performance of the Guardrails application, running on CPU-only laptop or instance. +It doesn't require GPUs on which to run local models, or access to the internet to use models hosted by providers. +All models use the [Mock LLM Server](mock_llm_server), which is a simplified model of an LLM used for inference. +The aim of this benchmark is to detect performance-regressions as quickly as running unit-tests. + +## Quickstart: Running Guardrails with Mock LLMs +To run Guardrails with mocks for both the content-safety and main LLM, follow the steps below. All commands must be run in the `nemoguardrails/benchmark` directory. These assume you already have a working environment after following the [contribution guidelines](../CONTRIBUTING.md). + +First, we need to install the honcho and langchain-nvidia-ai-endpoints packages. +The `honcho` package is used to run Procfile-based applications, and is a Python port of [Foreman](https://github.com/ddollar/foreman). +The `langchain-nvidia-ai-endpoints` package is used to communicate with Mock LLMs via Langchain. + +```shell +# Install dependencies +$ poetry run pip install honcho langchain-nvidia-ai-endpoints +... +Successfully installed filetype-1.2.0 honcho-2.0.0 langchain-nvidia-ai-endpoints-0.3.19 +``` + +Now we can start up the processes that are part of the [Procfile](Procfile). +As the Procfile processes spin up, they log to the console with a prefix. The `system` prefix is used by Honcho, `app_llm` is the Application or Main LLM mock, `cs_llm` is the content-safety mock, and `gr` is the Guardrails service. We'll explore the Procfile in more detail below. +Once the three 'Uvicorn running on ...' messages are printed, you can move to the next step. Note these messages are likely not on consecutive lines. + +``` +# All commands must be run in the nemoguardrails/benchmark directory +$ cd nemoguardrails/benchmark +$ poetry run honcho start +13:40:33 system | gr.1 started (pid=93634) +13:40:33 system | app_llm.1 started (pid=93635) +13:40:33 system | cs_llm.1 started (pid=93636) +... +13:40:41 app_llm.1 | INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) +... +13:40:41 cs_llm.1 | INFO: Uvicorn running on http://0.0.0.0:8001 (Press CTRL+C to quit) +... +13:40:45 gr.1 | INFO: Uvicorn running on http://0.0.0.0:9000 (Press CTRL+C to quit) +``` + +Once Guardrails and the mock servers are up, we can use the `validate_mocks.py` script to check they're healthy and serving the correct models. + +```shell +$ cd nemoguardrails/benchmark +$ poetry run python validate_mocks.py +Starting LLM endpoint health check... + +--- Checking Port: 8000 --- +Checking http://localhost:8000/health ... +HTTP Request: GET http://localhost:8000/health "HTTP/1.1 200 OK" +Health Check PASSED: Status is 'healthy'. +Checking http://localhost:8000/v1/models for 'meta/llama-3.3-70b-instruct'... +HTTP Request: GET http://localhost:8000/v1/models "HTTP/1.1 200 OK" +Model Check PASSED: Found 'meta/llama-3.3-70b-instruct' in model list. +--- Port 8000: ALL CHECKS PASSED --- + +--- Checking Port: 8001 --- +Checking http://localhost:8001/health ... +HTTP Request: GET http://localhost:8001/health "HTTP/1.1 200 OK" +Health Check PASSED: Status is 'healthy'. +Checking http://localhost:8001/v1/models for 'nvidia/llama-3.1-nemoguard-8b-content-safety'... +HTTP Request: GET http://localhost:8001/v1/models "HTTP/1.1 200 OK" +Model Check PASSED: Found 'nvidia/llama-3.1-nemoguard-8b-content-safety' in model list. +--- Port 8001: ALL CHECKS PASSED --- + +--- Checking Port: 9000 (Rails Config) --- +Checking http://localhost:9000/v1/rails/configs ... +HTTP Request: GET http://localhost:9000/v1/rails/configs "HTTP/1.1 200 OK" +HTTP Status PASSED: Got 200. +Body Check PASSED: Response is an array with at least one entry. +--- Port 9000: ALL CHECKS PASSED --- + +--- Final Summary --- +Port 8000 (meta/llama-3.3-70b-instruct): PASSED +Port 8001 (nvidia/llama-3.1-nemoguard-8b-content-safety): PASSED +Port 9000 (Rails Config): PASSED +--------------------- +Overall Status: All endpoints are healthy! +``` + +Once the mocks and Guardrails are running and the script passes, we can issue curl requests against the Guardrails `/chat/completions` endpoint to generate a response and test the system end-to-end. + +```shell +curl -s -X POST http://0.0.0.0:9000/v1/chat/completions \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "model": "meta/llama-3.3-70b-instruct", + "messages": [ + { + "role": "user", + "content": "what can you do for me?" + } + ], + "stream": false + }' | jq +{ + "messages": [ + { + "role": "assistant", + "content": "I can provide information and help with a wide range of topics, from science and history to entertainment and culture. I can also help with language-related tasks, such as translation and text summarization. However, I can't assist with requests that involve harm or illegal activities." + } + ] +} + +``` + +## Deep-Dive: Configuration + +In this section, we'll examine the configuration files used in the quickstart above. This gives more context on how the system works, and can be extended as needed. + +### Procfile + +The [Procfile](Procfile?raw=true) contains all the processes that make up the application. +The Honcho package reads in this file, starts all the processes, and combines their logs to the console +The `gr` line runs the Guardrails server on port 9000 and sets the default Guardrails configuration as [content_safety_colang1](configs/guardrail_configs/content_safety_colang1?raw=true). +The `app_llm` line runs the Application or Main Mock LLM. Guardrails calls this LLM to generate a response to the user's query. This server uses 4 uvicorn workers and runs on port 8000. The configuration file here is a Mock LLM configuration, not a Guardrails configuration. +The `cs_llm` line runs the Content-Safety Mock LLM. This uses 4 uvicorn workers and runs on port 8001. + +### Guardrails Configuration +The [Guardrails Configuration](configs/guardrail_configs/content_safety_colang1/config.yml) is used by the Guardrails server. +Under the `models` section, the `main` model is used to generate responses to the user queries. The base URL for this model is the `app_llm` Mock LLM from the Procfile, running on port 8000. The `model` field has to match the Mock LLM model name. +The `content_safety` model is configured for use in an input and output rail. The `type` field matches the `$model` used in the input and output flows. + +### Mock LLM Endpoints +The Mock LLM implements a subset of the OpenAI LLM API. +There are two Mock LLM configurations, one for the Mock [main model](configs/mock_configs/meta-llama-3.3-70b-instruct.env), and another for the Mock [content-safety](configs/mock_configs/nvidia-llama-3.1-nemoguard-8b-content-safety.env) model. +The Mock LLM has the following OpenAI-compatible endpoints: + +* `/health`: Returns a JSON object with status set to healthy and timestamp in seconds-since-epoch. For example `{"status":"healthy","timestamp":1762781239}` +* `/v1/models`: Returns the `MODEL` field from the Mock configuration (see below). For example `{"object":"list","data":[{"id":"meta/llama-3.3-70b-instruct","object":"model","created":1762781290,"owned_by":"system"}]}` +* `/v1/completions`: Returns an [OpenAI completion object](https://platform.openai.com/docs/api-reference/completions/object) using the Mock configuration (see below). +* `/v1/chat/completions`: Returns an [OpenAI chat completion object](https://platform.openai.com/docs/api-reference/chat/object) using the Mock configuration (see below). + +### Mock LLM Configuration +Mock LLMs are configured using the `.env` file format. These files are passed to the Mock LLM using the `--config-file` argument. +The Mock LLMs return either a `SAFE_TEXT` or `UNSAFE_TEXT` response to `/v1/completions` or `/v1/chat/completions` inference requests. +The probability of the `UNSAFE_TEXT` being returned if given by `UNSAFE_PROBABILITY`. +The latency of each response is also controllable, and works as follows: + +* Latency is first sampled from a normal distribution with mean `LATENCY_MEAN_SECONDS` and standard deviation `LATENCY_STD_SECONDS`. +* If the sampled value is less than `LATENCY_MIN_SECONDS`, it is set to `LATENCY_MIN_SECONDS`. +* If the sampled value is less than `LATENCY_MAX_SECONDS`, it is set to `LATENCY_MAX_SECONDS`. + +The full list of configuration fields is shown below: +* `MODEL`: The Model name served by the Mock LLM. This will be returned on the `/v1/models` endpoint. +* `UNSAFE_PROBABILITY`: Probability of an unsafe response. This is a probability, and must be in the range [0, 1]. +* `UNSAFE_TEXT`: String returned as an unsafe response. +* `SAFE_TEXT`: String returned as a safe response. +* `LATENCY_MIN_SECONDS`: Minimum latency in seconds. +* `LATENCY_MAX_SECONDS`: Maximum latency in seconds. +* `LATENCY_MEAN_SECONDS`: Normal distribution mean from which to sample latency. +* `LATENCY_STD_SECONDS`: Normal distribution standard deviation from which to sample latency. From 7eb70d1c58c2f74b76ecaaca58f82d12449ed892 Mon Sep 17 00:00:00 2001 From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com> Date: Mon, 10 Nov 2025 08:12:46 -0600 Subject: [PATCH 16/16] Small README tweaks --- nemoguardrails/benchmark/README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/nemoguardrails/benchmark/README.md b/nemoguardrails/benchmark/README.md index 750fde0fe..914d1b47c 100644 --- a/nemoguardrails/benchmark/README.md +++ b/nemoguardrails/benchmark/README.md @@ -4,6 +4,8 @@ NeMo Guardrails includes benchmarking tools to help users capacity-test their Gu Adding guardrails to an LLM-based application improves safety and security, while adding some latency. These benchmarks allow users to quantify the tradeoff between security and latency, to make data-driven decisions. We currently have a simple testbench, which runs the Guardrails server with mocks as Guardrail and Application models. This can be used for performance-testing on a laptop without any GPUs, and run in a few minutes. +----- + ## Guardrails Core Benchmarking This benchmark measures the performance of the Guardrails application, running on CPU-only laptop or instance. @@ -12,7 +14,9 @@ All models use the [Mock LLM Server](mock_llm_server), which is a simplified mod The aim of this benchmark is to detect performance-regressions as quickly as running unit-tests. ## Quickstart: Running Guardrails with Mock LLMs -To run Guardrails with mocks for both the content-safety and main LLM, follow the steps below. All commands must be run in the `nemoguardrails/benchmark` directory. These assume you already have a working environment after following the [contribution guidelines](../CONTRIBUTING.md). +To run Guardrails with mocks for both the content-safety and main LLM, follow the steps below. +All commands must be run in the `nemoguardrails/benchmark` directory. +These assume you already have a working environment after following the steps in [CONTRIBUTING.md](../../CONTRIBUTING.md). First, we need to install the honcho and langchain-nvidia-ai-endpoints packages. The `honcho` package is used to run Procfile-based applications, and is a Python port of [Foreman](https://github.com/ddollar/foreman). @@ -111,6 +115,8 @@ curl -s -X POST http://0.0.0.0:9000/v1/chat/completions \ ``` +------ + ## Deep-Dive: Configuration In this section, we'll examine the configuration files used in the quickstart above. This gives more context on how the system works, and can be extended as needed.