|
5 | 5 | import os |
6 | 6 | import queue |
7 | 7 | import random |
| 8 | +import sqlite3 |
8 | 9 | import subprocess |
9 | 10 | import time |
10 | 11 | import uuid |
|
119 | 120 | from codeflash.verification.verification_utils import TestConfig |
120 | 121 |
|
121 | 122 |
|
| 123 | +def log_code_repair_to_db( |
| 124 | + code_repair_log_db: Path, |
| 125 | + optimization_id: str, |
| 126 | + trace_id: str | None = None, |
| 127 | + passed: str | None = None, |
| 128 | + faster: str | None = None, |
| 129 | +) -> None: |
| 130 | + """Log code repair data to SQLite database. |
| 131 | +
|
| 132 | + Uses upsert pattern to allow incremental logging with different columns at different places. |
| 133 | + Only non-None values will be updated; existing values are preserved. |
| 134 | + """ |
| 135 | + try: |
| 136 | + conn = sqlite3.connect(code_repair_log_db) |
| 137 | + cursor = conn.cursor() |
| 138 | + |
| 139 | + # Build dynamic upsert query based on provided columns |
| 140 | + columns = ["optimization_id"] |
| 141 | + values = [optimization_id] |
| 142 | + update_parts = ["updated_at = CURRENT_TIMESTAMP"] |
| 143 | + |
| 144 | + if trace_id is not None: |
| 145 | + columns.append("trace_id") |
| 146 | + values.append(trace_id) |
| 147 | + update_parts.append("trace_id = excluded.trace_id") |
| 148 | + |
| 149 | + if passed is not None: |
| 150 | + columns.append("passed") |
| 151 | + values.append(passed) |
| 152 | + update_parts.append("passed = excluded.passed") |
| 153 | + |
| 154 | + if faster is not None: |
| 155 | + columns.append("faster") |
| 156 | + values.append(faster) |
| 157 | + update_parts.append("faster = excluded.faster") |
| 158 | + |
| 159 | + placeholders = ", ".join(["?"] * len(values)) |
| 160 | + columns_str = ", ".join(columns) |
| 161 | + update_str = ", ".join(update_parts) |
| 162 | + |
| 163 | + cursor.execute( |
| 164 | + f""" |
| 165 | + INSERT INTO code_repair_logs_cf ({columns_str}) |
| 166 | + VALUES ({placeholders}) |
| 167 | + ON CONFLICT(optimization_id) DO UPDATE SET {update_str} |
| 168 | + """, # noqa: S608 |
| 169 | + values, |
| 170 | + ) |
| 171 | + conn.commit() |
| 172 | + conn.close() |
| 173 | + except Exception as e: |
| 174 | + sentry_sdk.capture_exception(e) |
| 175 | + logger.exception(e) |
| 176 | + |
| 177 | + |
122 | 178 | class CandidateProcessor: |
123 | 179 | """Handles candidate processing using a queue-based approach.""" |
124 | 180 |
|
@@ -249,6 +305,8 @@ def __init__( |
249 | 305 | max_workers=n_tests + 3 if self.experiment_id is None else n_tests + 4 |
250 | 306 | ) |
251 | 307 | self.optimization_review = "" |
| 308 | + # SQLite database setup for logging |
| 309 | + self.code_repair_log_db = Path(__file__).parent / "code_repair_log_cf.db" |
252 | 310 |
|
253 | 311 | def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]: |
254 | 312 | should_run_experiment = self.experiment_id is not None |
@@ -389,7 +447,19 @@ def optimize_function(self) -> Result[BestOptimization, str]: |
389 | 447 | initialization_result = self.can_be_optimized() |
390 | 448 | if not is_successful(initialization_result): |
391 | 449 | return Failure(initialization_result.failure()) |
392 | | - |
| 450 | + conn = sqlite3.connect(self.code_repair_log_db) |
| 451 | + cursor = conn.cursor() |
| 452 | + cursor.execute(""" |
| 453 | + CREATE TABLE IF NOT EXISTS code_repair_logs ( |
| 454 | + optimization_id TEXT PRIMARY KEY, |
| 455 | + trace_id TEXT, |
| 456 | + passed TEXT, |
| 457 | + faster TEXT, |
| 458 | + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, |
| 459 | + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP |
| 460 | + """) |
| 461 | + conn.commit() |
| 462 | + conn.close() |
393 | 463 | should_run_experiment, code_context, original_helper_code = initialization_result.unwrap() |
394 | 464 |
|
395 | 465 | code_print( |
@@ -540,13 +610,29 @@ def determine_best_candidate( |
540 | 610 | logger.warning( |
541 | 611 | "force_lsp|No functions were replaced in the optimized code. Skipping optimization candidate." |
542 | 612 | ) |
| 613 | + if candidate.optimization_id.endswith("cdrp"): |
| 614 | + log_code_repair_to_db( |
| 615 | + code_repair_log_db=self.code_repair_log_db, |
| 616 | + trace_id=self.function_trace_id[:-4] + exp_type, |
| 617 | + optimization_id=candidate.optimization_id, |
| 618 | + passed="no", |
| 619 | + faster="no", # this also may or may not pass |
| 620 | + ) |
543 | 621 | console.rule() |
544 | 622 | continue |
545 | 623 | except (ValueError, SyntaxError, cst.ParserSyntaxError, AttributeError) as e: |
546 | 624 | logger.error(e) |
547 | 625 | self.write_code_and_helpers( |
548 | 626 | self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path |
549 | 627 | ) |
| 628 | + if candidate.optimization_id.endswith("cdrp"): |
| 629 | + log_code_repair_to_db( |
| 630 | + code_repair_log_db=self.code_repair_log_db, |
| 631 | + trace_id=self.function_trace_id[:-4] + exp_type, |
| 632 | + optimization_id=candidate.optimization_id, |
| 633 | + passed="no", |
| 634 | + faster="no", # this also may or may not pass |
| 635 | + ) |
550 | 636 | continue |
551 | 637 | # check if this code has been evaluated before by checking the ast normalized code string |
552 | 638 | normalized_code = normalize_code(candidate.source_code.flat.strip()) |
@@ -574,6 +660,16 @@ def determine_best_candidate( |
574 | 660 | ): # new candidate has a shorter diff than the previously encountered one |
575 | 661 | ast_code_to_id[normalized_code]["shorter_source_code"] = candidate.source_code |
576 | 662 | ast_code_to_id[normalized_code]["diff_len"] = new_diff_len |
| 663 | + if candidate.optimization_id.endswith("cdrp"): |
| 664 | + log_code_repair_to_db( |
| 665 | + code_repair_log_db=self.code_repair_log_db, |
| 666 | + trace_id=self.function_trace_id[:-4] + exp_type, |
| 667 | + optimization_id=candidate.optimization_id, |
| 668 | + passed="yes" if is_correct[candidate.optimization_id] else "no", |
| 669 | + faster="yes" |
| 670 | + if speedup_ratios[candidate.optimization_id] > 0 |
| 671 | + else "no", # this also may or may not pass |
| 672 | + ) |
577 | 673 | continue |
578 | 674 | ast_code_to_id[normalized_code] = { |
579 | 675 | "optimization_id": candidate.optimization_id, |
@@ -743,6 +839,16 @@ def determine_best_candidate( |
743 | 839 | if self.args.benchmark and benchmark_tree: |
744 | 840 | console.print(benchmark_tree) |
745 | 841 | console.rule() |
| 842 | + if candidate.optimization_id.endswith("cdrp"): |
| 843 | + log_code_repair_to_db( |
| 844 | + code_repair_log_db=self.code_repair_log_db, |
| 845 | + trace_id=self.function_trace_id[:-4] + exp_type, |
| 846 | + optimization_id=candidate.optimization_id, |
| 847 | + passed="yes" if is_correct[candidate.optimization_id] else "no", |
| 848 | + faster="yes" |
| 849 | + if speedup_ratios[candidate.optimization_id] > 0 |
| 850 | + else "no", # this also may or may not pass |
| 851 | + ) |
746 | 852 | except KeyboardInterrupt as e: |
747 | 853 | logger.exception(f"Optimization interrupted: {e}") |
748 | 854 | raise |
|
0 commit comments