3434 "metadata" : {},
3535 "outputs" : [],
3636 "source" : [
37- " import asyncio\n " ,
38- " import time\n " ,
39- " from dataclasses import dataclass\n " ,
40- " from typing import Any, Dict, List, Tuple\n " ,
41- " \n " ,
4237 " import numpy as np\n " ,
43- " import pandas as pd\n " ,
4438 " \n " ,
45- " # Ragas imports\n " ,
46- " from ragas.dataset_schema import SingleTurnSample"
39+ " # Ragas imports"
4740 ]
4841 },
4942 {
135128 },
136129 {
137130 "cell_type" : " code" ,
138- "execution_count" : 10 ,
131+ "execution_count" : null ,
139132 "metadata" : {},
140- "outputs" : [
141- {
142- "name" : " stdout" ,
143- "output_type" : " stream" ,
144- "text" : [
145- " ✓ Setup complete\n " ,
146- " ✓ Metric classes loaded:\n " ,
147- " Legacy: LLMContextPrecisionWithReference from ragas.metrics._context_precision\n " ,
148- " Modern: ContextPrecision from ragas.metrics.collections\n "
149- ]
150- }
151- ],
133+ "outputs" : [],
152134 "source" : [
153- " import os \n " ,
135+ " import importlib \n " ,
154136 " import sys\n " ,
155137 " from pathlib import Path\n " ,
156- " import importlib\n " ,
157138 " \n " ,
158139 " # Add project root to path\n " ,
159140 " project_root = Path.cwd().parent.parent.parent\n " ,
160141 " sys.path.insert(0, str(project_root))\n " ,
161142 " \n " ,
162- " from tests.utils import check_api_key\n " ,
143+ " from tests.utils import check_api_key # noqa: E402 \n " ,
163144 " \n " ,
164145 " # Check for OpenAI API key\n " ,
165146 " check_api_key(\" openai\" )\n " ,
182163 " LegacyMetric = load_metric_class(METRIC_CONFIG[\" legacy_import\" ])\n " ,
183164 " ModernMetric = load_metric_class(METRIC_CONFIG[\" modern_import\" ])\n " ,
184165 " \n " ,
185- " print(f \" ✓ Metric classes loaded:\" )\n " ,
166+ " print(\" ✓ Metric classes loaded:\" )\n " ,
186167 " print(\n " ,
187168 " f\" Legacy: {METRIC_CONFIG['legacy_import']['class_name']} from {METRIC_CONFIG['legacy_import']['module']}\"\n " ,
188169 " )\n " ,
329310 " legacy_name = getattr(legacy_metric, \" name\" , legacy_metric.__class__.__name__)\n " ,
330311 " modern_name = getattr(modern_metric, \" name\" , modern_metric.__class__.__name__)\n " ,
331312 " \n " ,
332- " print(f \" ✓ Metrics initialized:\" )\n " ,
313+ " print(\" ✓ Metrics initialized:\" )\n " ,
333314 " print(f\" Legacy: {legacy_name}\" )\n " ,
334315 " print(f\" Modern: {modern_name}\" )\n " ,
335316 " print(f\" Dataset fields required: {METRIC_CONFIG['dataset_fields']}\" )"
404385 " \n " ,
405386 " print(f\" ✓ Prepared {len(amnesty_test_data)} samples for testing\" )\n " ,
406387 " if amnesty_test_data:\n " ,
407- " print(f \"\\ nFirst sample fields:\" )\n " ,
388+ " print(\"\\ nFirst sample fields:\" )\n " ,
408389 " first_sample = amnesty_test_data[0]\n " ,
409390 " for key, value in first_sample.items():\n " ,
410391 " if isinstance(value, list):\n " ,
561542 }
562543 ],
563544 "source" : [
564- " import pandas as pd\n " ,
565545 " import matplotlib.pyplot as plt\n " ,
566- " import numpy as np\n " ,
567546 " \n " ,
568547 " # Get detailed DataFrame\n " ,
569548 " df_amnesty = amnesty_result.to_dataframe()\n " ,
586565 " print(\" DETAILED STATISTICAL ANALYSIS\" )\n " ,
587566 " print(\" =\" * 70)\n " ,
588567 " print(f\"\\ nDataset: amnesty_qa ({len(df_amnesty)} samples)\" )\n " ,
589- " print(f \"\\ nScore Statistics:\" )\n " ,
568+ " print(\"\\ nScore Statistics:\" )\n " ,
590569 " print(f\" Legacy Mean: {amnesty_result.old_mean:.4f}\" )\n " ,
591570 " print(f\" New Mean: {amnesty_result.new_mean:.4f}\" )\n " ,
592571 " print(f\" Score Shift: {amnesty_result.mean_diff:+.4f}\" )\n " ,
593572 " \n " ,
594- " print(f \"\\ nDifference Statistics:\" )\n " ,
573+ " print(\"\\ nDifference Statistics:\" )\n " ,
595574 " print(f\" Mean |Diff|: {df_amnesty['abs_diff'].mean():.4f}\" )\n " ,
596575 " print(f\" Std Dev: {amnesty_result.std_diff:.4f}\" )\n " ,
597576 " print(f\" Max Diff: {amnesty_result.max_diff:.4f}\" )\n " ,
602581 " # For LLM-based metrics: use [0.1, 0.15, 0.2, 0.25, 0.3]\n " ,
603582 " # For deterministic metrics: use [1e-10, 1e-8, 1e-6, 1e-4, 0.01]\n " ,
604583 " tolerance_levels = [0.1, 0.15, 0.2, 0.25, 0.3]\n " ,
605- " print(f \"\\ nTolerance Analysis:\" )\n " ,
584+ " print(\"\\ nTolerance Analysis:\" )\n " ,
606585 " for tol in tolerance_levels:\n " ,
607586 " within = (df_amnesty[\" abs_diff\" ] < tol).sum()\n " ,
608587 " pct = within / len(df_amnesty) * 100\n " ,
896875 " \n " ,
897876 " print(f\" ✓ Prepared {len(fiqa_test_data)} samples for testing\" )\n " ,
898877 " if fiqa_test_data:\n " ,
899- " print(f \"\\ nFirst sample fields:\" )\n " ,
878+ " print(\"\\ nFirst sample fields:\" )\n " ,
900879 " first_sample = fiqa_test_data[0]\n " ,
901880 " for key, value in first_sample.items():\n " ,
902881 " if isinstance(value, list):\n " ,
10491028 " print(\" DETAILED STATISTICAL ANALYSIS\" )\n " ,
10501029 " print(\" =\" * 70)\n " ,
10511030 " print(f\"\\ nDataset: fiqa ({len(df_fiqa)} samples)\" )\n " ,
1052- " print(f \"\\ nScore Statistics:\" )\n " ,
1031+ " print(\"\\ nScore Statistics:\" )\n " ,
10531032 " print(f\" Legacy Mean: {fiqa_result.old_mean:.4f}\" )\n " ,
10541033 " print(f\" New Mean: {fiqa_result.new_mean:.4f}\" )\n " ,
10551034 " print(f\" Score Shift: {fiqa_result.mean_diff:+.4f}\" )\n " ,
10561035 " \n " ,
1057- " print(f \"\\ nDifference Statistics:\" )\n " ,
1036+ " print(\"\\ nDifference Statistics:\" )\n " ,
10581037 " print(f\" Mean |Diff|: {df_fiqa['abs_diff'].mean():.4f}\" )\n " ,
10591038 " print(f\" Std Dev: {fiqa_result.std_diff:.4f}\" )\n " ,
10601039 " print(f\" Max Diff: {fiqa_result.max_diff:.4f}\" )\n " ,
10651044 " # For LLM-based metrics: use [0.1, 0.15, 0.2, 0.25, 0.3]\n " ,
10661045 " # For deterministic metrics: use [1e-10, 1e-8, 1e-6, 1e-4, 0.01]\n " ,
10671046 " tolerance_levels = [0.1, 0.15, 0.2, 0.25, 0.3]\n " ,
1068- " print(f \"\\ nTolerance Analysis:\" )\n " ,
1047+ " print(\"\\ nTolerance Analysis:\" )\n " ,
10691048 " for tol in tolerance_levels:\n " ,
10701049 " within = (df_fiqa[\" abs_diff\" ] < tol).sum()\n " ,
10711050 " pct = within / len(df_fiqa) * 100\n " ,
13111290 },
13121291 "nbformat" : 4 ,
13131292 "nbformat_minor" : 4
1314- }
1293+ }
0 commit comments