Skip to content

Commit 561679e

Browse files
committed
fix: align Makefile format command with CI and fix notebook linting
- Add --exclude src/ragas/_version.py to format and CI commands - This matches GitHub CI behavior which overrides pyproject.toml exclusions - Ensures notebooks are formatted locally, preventing CI failures - Add noqa: E402 comment to notebook for intentional sys.path modification
1 parent 44034ff commit 561679e

File tree

2 files changed

+25
-46
lines changed

2 files changed

+25
-46
lines changed

Makefile

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,11 @@ install: ## Install full dependencies with uv sync (backward compatible - modern
5252
format: ## Format and lint all code
5353
@echo "Formatting and linting all code..."
5454
@echo "(ruff format) Formatting ragas..."
55-
$(Q)uv run --active ruff format src tests docs --config pyproject.toml
55+
$(Q)uv run --active ruff format src tests docs --exclude src/ragas/_version.py --config pyproject.toml
5656
@echo "(ruff) Auto-fixing ragas (includes import sorting and unused imports)..."
57-
$(Q)uv run --active ruff check src tests docs --fix-only --config pyproject.toml
57+
$(Q)uv run --active ruff check src tests docs --exclude src/ragas/_version.py --fix-only --config pyproject.toml
5858
@echo "(ruff) Final linting check for ragas..."
59-
$(Q)uv run --active ruff check src tests docs --config pyproject.toml
59+
$(Q)uv run --active ruff check src tests docs --exclude src/ragas/_version.py --config pyproject.toml
6060

6161
type: ## Type check all code
6262
@echo "Type checking all code..."
@@ -93,8 +93,8 @@ benchmarks-test: ## Run benchmarks for ragas unit tests
9393
run-ci: ## Run complete CI pipeline (mirrors GitHub CI exactly)
9494
@echo "Running complete CI pipeline..."
9595
@echo "Format check..."
96-
$(Q)uv run --active ruff format --check src tests docs --config pyproject.toml
97-
$(Q)uv run --active ruff check src tests docs --config pyproject.toml
96+
$(Q)uv run --active ruff format --check src tests docs --exclude src/ragas/_version.py --config pyproject.toml
97+
$(Q)uv run --active ruff check src tests docs --exclude src/ragas/_version.py --config pyproject.toml
9898
@echo "Type check..."
9999
$(Q)$(MAKE) type
100100
@echo "Unit tests..."
@@ -104,8 +104,8 @@ run-ci: ## Run complete CI pipeline (mirrors GitHub CI exactly)
104104
run-ci-format-check: ## Run format check in dry-run mode (like GitHub CI)
105105
@echo "Running format check (dry-run, like GitHub CI)..."
106106
@echo "Checking ragas formatting..."
107-
$(Q)uv run --active ruff format --check src tests docs --config pyproject.toml
108-
$(Q)uv run --active ruff check src docs tests --config pyproject.toml
107+
$(Q)uv run --active ruff format --check src tests docs --exclude src/ragas/_version.py --config pyproject.toml
108+
$(Q)uv run --active ruff check src docs tests --exclude src/ragas/_version.py --config pyproject.toml
109109

110110
run-ci-type: ## Run type checking (matches GitHub CI)
111111
@echo "Running type checking (matches GitHub CI)..."
@@ -118,8 +118,8 @@ run-ci-tests: ## Run all tests with CI options
118118
run-ci-fast: ## Fast CI check for quick local validation (2-3 minutes)
119119
@echo "Running fast CI check for quick feedback..."
120120
@echo "Format check..."
121-
$(Q)uv run --active ruff format --check src tests docs --config pyproject.toml
122-
$(Q)uv run --active ruff check src docs tests --config pyproject.toml
121+
$(Q)uv run --active ruff format --check src tests docs --exclude src/ragas/_version.py --config pyproject.toml
122+
$(Q)uv run --active ruff check src docs tests --exclude src/ragas/_version.py --config pyproject.toml
123123
@echo "Core unit tests (no nbmake for speed)..."
124124
$(Q)uv run --active pytest tests/unit --dist loadfile -n auto -x
125125
@echo "Fast CI check completed!"

tests/e2e/metrics_migration/metric_score_diff.ipynb

Lines changed: 16 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,9 @@
3434
"metadata": {},
3535
"outputs": [],
3636
"source": [
37-
"import asyncio\n",
38-
"import time\n",
39-
"from dataclasses import dataclass\n",
40-
"from typing import Any, Dict, List, Tuple\n",
41-
"\n",
4237
"import numpy as np\n",
43-
"import pandas as pd\n",
4438
"\n",
45-
"# Ragas imports\n",
46-
"from ragas.dataset_schema import SingleTurnSample"
39+
"# Ragas imports"
4740
]
4841
},
4942
{
@@ -135,31 +128,19 @@
135128
},
136129
{
137130
"cell_type": "code",
138-
"execution_count": 10,
131+
"execution_count": null,
139132
"metadata": {},
140-
"outputs": [
141-
{
142-
"name": "stdout",
143-
"output_type": "stream",
144-
"text": [
145-
"✓ Setup complete\n",
146-
"✓ Metric classes loaded:\n",
147-
" Legacy: LLMContextPrecisionWithReference from ragas.metrics._context_precision\n",
148-
" Modern: ContextPrecision from ragas.metrics.collections\n"
149-
]
150-
}
151-
],
133+
"outputs": [],
152134
"source": [
153-
"import os\n",
135+
"import importlib\n",
154136
"import sys\n",
155137
"from pathlib import Path\n",
156-
"import importlib\n",
157138
"\n",
158139
"# Add project root to path\n",
159140
"project_root = Path.cwd().parent.parent.parent\n",
160141
"sys.path.insert(0, str(project_root))\n",
161142
"\n",
162-
"from tests.utils import check_api_key\n",
143+
"from tests.utils import check_api_key # noqa: E402\n",
163144
"\n",
164145
"# Check for OpenAI API key\n",
165146
"check_api_key(\"openai\")\n",
@@ -182,7 +163,7 @@
182163
"LegacyMetric = load_metric_class(METRIC_CONFIG[\"legacy_import\"])\n",
183164
"ModernMetric = load_metric_class(METRIC_CONFIG[\"modern_import\"])\n",
184165
"\n",
185-
"print(f\"✓ Metric classes loaded:\")\n",
166+
"print(\"✓ Metric classes loaded:\")\n",
186167
"print(\n",
187168
" f\" Legacy: {METRIC_CONFIG['legacy_import']['class_name']} from {METRIC_CONFIG['legacy_import']['module']}\"\n",
188169
")\n",
@@ -329,7 +310,7 @@
329310
"legacy_name = getattr(legacy_metric, \"name\", legacy_metric.__class__.__name__)\n",
330311
"modern_name = getattr(modern_metric, \"name\", modern_metric.__class__.__name__)\n",
331312
"\n",
332-
"print(f\"✓ Metrics initialized:\")\n",
313+
"print(\"✓ Metrics initialized:\")\n",
333314
"print(f\" Legacy: {legacy_name}\")\n",
334315
"print(f\" Modern: {modern_name}\")\n",
335316
"print(f\" Dataset fields required: {METRIC_CONFIG['dataset_fields']}\")"
@@ -404,7 +385,7 @@
404385
"\n",
405386
"print(f\"✓ Prepared {len(amnesty_test_data)} samples for testing\")\n",
406387
"if amnesty_test_data:\n",
407-
" print(f\"\\nFirst sample fields:\")\n",
388+
" print(\"\\nFirst sample fields:\")\n",
408389
" first_sample = amnesty_test_data[0]\n",
409390
" for key, value in first_sample.items():\n",
410391
" if isinstance(value, list):\n",
@@ -561,9 +542,7 @@
561542
}
562543
],
563544
"source": [
564-
"import pandas as pd\n",
565545
"import matplotlib.pyplot as plt\n",
566-
"import numpy as np\n",
567546
"\n",
568547
"# Get detailed DataFrame\n",
569548
"df_amnesty = amnesty_result.to_dataframe()\n",
@@ -586,12 +565,12 @@
586565
"print(\"DETAILED STATISTICAL ANALYSIS\")\n",
587566
"print(\"=\" * 70)\n",
588567
"print(f\"\\nDataset: amnesty_qa ({len(df_amnesty)} samples)\")\n",
589-
"print(f\"\\nScore Statistics:\")\n",
568+
"print(\"\\nScore Statistics:\")\n",
590569
"print(f\" Legacy Mean: {amnesty_result.old_mean:.4f}\")\n",
591570
"print(f\" New Mean: {amnesty_result.new_mean:.4f}\")\n",
592571
"print(f\" Score Shift: {amnesty_result.mean_diff:+.4f}\")\n",
593572
"\n",
594-
"print(f\"\\nDifference Statistics:\")\n",
573+
"print(\"\\nDifference Statistics:\")\n",
595574
"print(f\" Mean |Diff|: {df_amnesty['abs_diff'].mean():.4f}\")\n",
596575
"print(f\" Std Dev: {amnesty_result.std_diff:.4f}\")\n",
597576
"print(f\" Max Diff: {amnesty_result.max_diff:.4f}\")\n",
@@ -602,7 +581,7 @@
602581
"# For LLM-based metrics: use [0.1, 0.15, 0.2, 0.25, 0.3]\n",
603582
"# For deterministic metrics: use [1e-10, 1e-8, 1e-6, 1e-4, 0.01]\n",
604583
"tolerance_levels = [0.1, 0.15, 0.2, 0.25, 0.3]\n",
605-
"print(f\"\\nTolerance Analysis:\")\n",
584+
"print(\"\\nTolerance Analysis:\")\n",
606585
"for tol in tolerance_levels:\n",
607586
" within = (df_amnesty[\"abs_diff\"] < tol).sum()\n",
608587
" pct = within / len(df_amnesty) * 100\n",
@@ -896,7 +875,7 @@
896875
"\n",
897876
"print(f\"✓ Prepared {len(fiqa_test_data)} samples for testing\")\n",
898877
"if fiqa_test_data:\n",
899-
" print(f\"\\nFirst sample fields:\")\n",
878+
" print(\"\\nFirst sample fields:\")\n",
900879
" first_sample = fiqa_test_data[0]\n",
901880
" for key, value in first_sample.items():\n",
902881
" if isinstance(value, list):\n",
@@ -1049,12 +1028,12 @@
10491028
"print(\"DETAILED STATISTICAL ANALYSIS\")\n",
10501029
"print(\"=\" * 70)\n",
10511030
"print(f\"\\nDataset: fiqa ({len(df_fiqa)} samples)\")\n",
1052-
"print(f\"\\nScore Statistics:\")\n",
1031+
"print(\"\\nScore Statistics:\")\n",
10531032
"print(f\" Legacy Mean: {fiqa_result.old_mean:.4f}\")\n",
10541033
"print(f\" New Mean: {fiqa_result.new_mean:.4f}\")\n",
10551034
"print(f\" Score Shift: {fiqa_result.mean_diff:+.4f}\")\n",
10561035
"\n",
1057-
"print(f\"\\nDifference Statistics:\")\n",
1036+
"print(\"\\nDifference Statistics:\")\n",
10581037
"print(f\" Mean |Diff|: {df_fiqa['abs_diff'].mean():.4f}\")\n",
10591038
"print(f\" Std Dev: {fiqa_result.std_diff:.4f}\")\n",
10601039
"print(f\" Max Diff: {fiqa_result.max_diff:.4f}\")\n",
@@ -1065,7 +1044,7 @@
10651044
"# For LLM-based metrics: use [0.1, 0.15, 0.2, 0.25, 0.3]\n",
10661045
"# For deterministic metrics: use [1e-10, 1e-8, 1e-6, 1e-4, 0.01]\n",
10671046
"tolerance_levels = [0.1, 0.15, 0.2, 0.25, 0.3]\n",
1068-
"print(f\"\\nTolerance Analysis:\")\n",
1047+
"print(\"\\nTolerance Analysis:\")\n",
10691048
"for tol in tolerance_levels:\n",
10701049
" within = (df_fiqa[\"abs_diff\"] < tol).sum()\n",
10711050
" pct = within / len(df_fiqa) * 100\n",
@@ -1311,4 +1290,4 @@
13111290
},
13121291
"nbformat": 4,
13131292
"nbformat_minor": 4
1314-
}
1293+
}

0 commit comments

Comments
 (0)