From d135abbefe7fab52b5fa16f9326d637172dea3e2 Mon Sep 17 00:00:00 2001 From: SeaUrc Date: Mon, 20 Oct 2025 12:39:23 -0400 Subject: [PATCH 01/10] Added kalshi bet predictor example --- .../6-kalshi-bet-predictor/.gitignore | 4 + .../6-kalshi-bet-predictor/cerebrium.toml | 23 +++ .../6-kalshi-bet-predictor/main.py | 170 ++++++++++++++++++ .../6-kalshi-bet-predictor/requirements.txt | 44 +++++ 4 files changed, 241 insertions(+) create mode 100644 8-application-demos/6-kalshi-bet-predictor/.gitignore create mode 100644 8-application-demos/6-kalshi-bet-predictor/cerebrium.toml create mode 100644 8-application-demos/6-kalshi-bet-predictor/main.py create mode 100644 8-application-demos/6-kalshi-bet-predictor/requirements.txt diff --git a/8-application-demos/6-kalshi-bet-predictor/.gitignore b/8-application-demos/6-kalshi-bet-predictor/.gitignore new file mode 100644 index 00000000..01d392ed --- /dev/null +++ b/8-application-demos/6-kalshi-bet-predictor/.gitignore @@ -0,0 +1,4 @@ +.DS_Store +.env +.venv/ +.vscode/ \ No newline at end of file diff --git a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml new file mode 100644 index 00000000..67cd4b9f --- /dev/null +++ b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml @@ -0,0 +1,23 @@ +[cerebrium.deployment] +name = "kalshi-bet-predictor" +python_version = "3.11" +docker_base_image_url = "debian:bookworm-slim" +disable_auth = true +include = ['./*', 'main.py', 'cerebrium.toml'] +exclude = ['.*'] + +[cerebrium.dependencies.paths] +pip = "requirements.txt" + +[cerebrium.hardware] +cpu = 4 +memory = 36 +compute = "ADA_L40" + +[cerebrium.scaling] +min_replicas = 1 +max_replicas = 2 +cooldown = 30 +replica_concurrency = 1 +scaling_metric = "concurrency_utilization" + diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py new file mode 100644 index 00000000..5904cf9a --- /dev/null +++ b/8-application-demos/6-kalshi-bet-predictor/main.py @@ -0,0 +1,170 @@ +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer +from typing import Tuple +import requests +import re +from dotenv import load_dotenv +import os +from exa_py import Exa + +def getKalshiQuestion(market_ticker)->Tuple[str,str]: + url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{market_ticker}" + try: + res = requests.get(url) + res.raise_for_status() + obj = res.json() + return obj['market']['rules_primary'] + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Error fetching Kalshi market data: {e}") + +def getKalshiOdds(market_ticker)->Tuple[str, str]: + url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{market_ticker}" + try: + res = requests.get(url) + res.raise_for_status() + obj = res.json() + return obj['market']['yes_ask'], obj['market']['no_ask'] + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Error fetching Kalshi market data: {e}") + + +class BetPredictor: + def __init__(self, model_name: str = "Qwen/Qwen3-4B-Instruct-2507"): + + self.tokenizer = AutoTokenizer.from_pretrained(model_name) + self.model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype = torch.bfloat16, + device_map="auto" + ) + + load_dotenv() + + self.exa = Exa(os.environ.get("EXA_API_KEY")) + + print(f"Loaded model {model_name}!") + + def _generate_response(self, prompt: str, max_new_tokens: int) -> str: + inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) + input_ids_len = inputs['input_ids'].shape[-1] + + output_sequences = self.model.generate( + **inputs, + max_new_tokens=max_new_tokens, + pad_token_id=self.tokenizer.eos_token_id, + do_sample=False, + ) + + newly_generated_ids = output_sequences[0, input_ids_len:] + + response = self.tokenizer.decode(newly_generated_ids, skip_special_tokens=True).strip() + + print(f"Generated this response! {response}") + return response + + def convert_rules_to_question(self, rules:str) -> str: + prompt = ( + "You will receive a sentence that is a statement of the following type:" + "If , then the market resolves to Yes" + "Convert the conditional to a yes/no question" + "Your response SHOULD ONLY BE a SINGLE line consisting of the yes/no question:\n" + "Do not add ANY preamble, conclusion, or extra text.\n\n" + f"STATEMENT: {rules}\n" + ) + + raw_response = self._generate_response(prompt, max_new_tokens=400) + + return raw_response + + def get_relevant_questions(self, question: str) -> list[str]: + + prompt = ( + "Based on the following question, generate a list of 5 relevant questions " + "that one could search online to gather more information. " + "These questions should yield information that would be helpful to answering " + "the following question in an objective manner.\n\n" + "Your response SHOULD ONLY BE the following lines, in this exact format:\n" + "1. \n" + "2. \n" + "3. \n" + "4. \n" + "5. \n" + "Do not add ANY preamble, conclusion, or extra text.\n\n" + f"Question: \"{question}\"\n" + ) + + raw_response = self._generate_response(prompt, max_new_tokens=400) + + relevant_questions = [] + for line in raw_response.split('\n'): + line = line.strip() + if line and line[0].isdigit(): + clean_question = line.split('.', 1)[-1].strip() + relevant_questions.append(clean_question) + + return relevant_questions + + + def get_information(self, questions): + results = [self.exa.answer(q, text=True) for q in questions] + answers = [r.answer for r in results] + return answers + + def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]: + prompt = ( + "Analyze the provided information below to answer the given binary question. " + "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n" + "--- Information ---\n" + f"{information}\n\n" + "--- Question ---\n" + f"{question}\n\n" + "IMPORTANT INSTRUCTIONS:\n" + "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n" + " Yes: %, No: %, Explanation: \n" + "2. Percentages must sum to 100%.\n" + "3. Do NOT include any preamble, summary, or additional text.\n" + "4. Provide a brief but clear explanation supporting your probabilities.\n\n" + "AGAIN, Your response MUST ONLY be a single line in THIS EXACT FORMAT: Yes: %, No: %, Explanation: " + ) + + response = self._generate_response(prompt, max_new_tokens=800) + + match = re.search(r"Yes: (.*?), No: (.*?), Explanation: (.*)", response, re.DOTALL) + + if match: + yes, no, explanation = match.groups() + return yes.strip(), no.strip(), explanation.strip() + else: + raise ValueError(f"Failed to parse LLM response: {response}") + + def predict(self, question): + relevant_questions = self.get_relevant_questions(question) + answers = self.get_information(relevant_questions) + + information = "" + for i, v in enumerate(relevant_questions): + information += f"INFORMATION {i+1}: \n" + information += f"QUESTION {i+1}: {v}\n" + information += f"ANSWER {i+1}: {answers[i]} \n\n" + + yes, no, explanation = self.get_binary_answer_with_percentage(information, question) + return yes, no, explanation + + +predictor = BetPredictor() + +def predict(ticker: str): + rules = getKalshiQuestion(ticker) + question = predictor.convert_rules_to_question(rules) + + predYes, predNo, explanation = predictor.predict(question) + + realYes, realNo = getKalshiOdds(ticker) + + if realYes < predYes: # undervalued + buyYes = True + if realNo < predNo: # undervalued + buyNo = True + + return {"buy_yes":buyYes, "buy_no": buyNo, "yes": predYes, "no": predNo, "explanation": explanation} + diff --git a/8-application-demos/6-kalshi-bet-predictor/requirements.txt b/8-application-demos/6-kalshi-bet-predictor/requirements.txt new file mode 100644 index 00000000..328692b6 --- /dev/null +++ b/8-application-demos/6-kalshi-bet-predictor/requirements.txt @@ -0,0 +1,44 @@ +accelerate==1.10.1 +annotated-types==0.7.0 +anyio==4.11.0 +certifi==2025.10.5 +charset-normalizer==3.4.4 +distro==1.9.0 +exa-py==1.16.1 +filelock==3.20.0 +fsspec==2025.9.0 +greenlet==3.2.4 +h11==0.16.0 +hf-xet==1.1.10 +httpcore==1.0.9 +httpx==0.28.1 +huggingface-hub==0.35.3 +idna==3.11 +Jinja2==3.1.6 +jiter==0.11.1 +MarkupSafe==3.0.3 +mpmath==1.3.0 +networkx==3.5 +numpy==2.3.4 +openai==2.5.0 +packaging==25.0 +playwright==1.55.0 +psutil==7.1.0 +pydantic==2.12.3 +pydantic_core==2.41.4 +pyee==13.0.0 +PyPDF2==3.0.1 +python-dotenv==1.1.1 +PyYAML==6.0.3 +regex==2025.9.18 +requests==2.32.5 +safetensors==0.6.2 +sniffio==1.3.1 +sympy==1.14.0 +tokenizers==0.22.1 +torch==2.9.0 +tqdm==4.67.1 +transformers==4.57.1 +typing-inspection==0.4.2 +typing_extensions==4.15.0 +urllib3==2.5.0 From 07eb90a2a021c1c2940303663c8456f9250ab16b Mon Sep 17 00:00:00 2001 From: SeaUrc Date: Tue, 21 Oct 2025 22:28:47 -0400 Subject: [PATCH 02/10] fix: requirements and kalshi call and real odds comparison --- .../6-kalshi-bet-predictor/main.py | 40 ++++++++----------- .../6-kalshi-bet-predictor/requirements.txt | 13 +++--- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py index 5904cf9a..d49ef642 100644 --- a/8-application-demos/6-kalshi-bet-predictor/main.py +++ b/8-application-demos/6-kalshi-bet-predictor/main.py @@ -7,27 +7,17 @@ import os from exa_py import Exa -def getKalshiQuestion(market_ticker)->Tuple[str,str]: - url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{market_ticker}" - try: - res = requests.get(url) - res.raise_for_status() - obj = res.json() - return obj['market']['rules_primary'] - except requests.exceptions.RequestException as e: - raise RuntimeError(f"Error fetching Kalshi market data: {e}") -def getKalshiOdds(market_ticker)->Tuple[str, str]: +def getKalshiMarket(market_ticker)->Tuple[str,str]: url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{market_ticker}" try: res = requests.get(url) res.raise_for_status() obj = res.json() - return obj['market']['yes_ask'], obj['market']['no_ask'] + return obj except requests.exceptions.RequestException as e: raise RuntimeError(f"Error fetching Kalshi market data: {e}") - class BetPredictor: def __init__(self, model_name: str = "Qwen/Qwen3-4B-Instruct-2507"): @@ -102,8 +92,9 @@ def get_relevant_questions(self, question: str) -> list[str]: clean_question = line.split('.', 1)[-1].strip() relevant_questions.append(clean_question) - return relevant_questions + print(f"Generated relevant questions: {relevant_questions}") + return relevant_questions def get_information(self, questions): results = [self.exa.answer(q, text=True) for q in questions] @@ -124,7 +115,7 @@ def get_binary_answer_with_percentage(self, information: str, question: str) -> "2. Percentages must sum to 100%.\n" "3. Do NOT include any preamble, summary, or additional text.\n" "4. Provide a brief but clear explanation supporting your probabilities.\n\n" - "AGAIN, Your response MUST ONLY be a single line in THIS EXACT FORMAT: Yes: %, No: %, Explanation: " + "Again, your response MUST ONLY be a single line in THIS EXACT FORMAT: Yes: %, No: %, Explanation: " ) response = self._generate_response(prompt, max_new_tokens=800) @@ -154,17 +145,20 @@ def predict(self, question): predictor = BetPredictor() def predict(ticker: str): - rules = getKalshiQuestion(ticker) + market = getKalshiMarket(ticker) + rules = market['market']['rules_primary'] + question = predictor.convert_rules_to_question(rules) - predYes, predNo, explanation = predictor.predict(question) + pred_yes, pred_no, explanation = predictor.predict(question) - realYes, realNo = getKalshiOdds(ticker) - - if realYes < predYes: # undervalued - buyYes = True - if realNo < predNo: # undervalued - buyNo = True + pred_yes = int(pred_yes[:2]) + pred_no = int(pred_no[:2]) + + real_yes = int(market['market']['yes_ask']) + real_no = int(market['market']['no_ask']) - return {"buy_yes":buyYes, "buy_no": buyNo, "yes": predYes, "no": predNo, "explanation": explanation} + buy_yes = real_yes < pred_yes + buy_no = real_no < pred_no + return {"buy_yes":buy_yes, "buy_no": buy_no, "yes": pred_yes, "no": pred_no, "explanation": explanation} diff --git a/8-application-demos/6-kalshi-bet-predictor/requirements.txt b/8-application-demos/6-kalshi-bet-predictor/requirements.txt index 328692b6..b3d3fbd3 100644 --- a/8-application-demos/6-kalshi-bet-predictor/requirements.txt +++ b/8-application-demos/6-kalshi-bet-predictor/requirements.txt @@ -1,13 +1,13 @@ -accelerate==1.10.1 +accelerate==1.11.0 annotated-types==0.7.0 anyio==4.11.0 certifi==2025.10.5 charset-normalizer==3.4.4 distro==1.9.0 +dotenv==0.9.9 exa-py==1.16.1 filelock==3.20.0 fsspec==2025.9.0 -greenlet==3.2.4 h11==0.16.0 hf-xet==1.1.10 httpcore==1.0.9 @@ -20,17 +20,14 @@ MarkupSafe==3.0.3 mpmath==1.3.0 networkx==3.5 numpy==2.3.4 -openai==2.5.0 +openai==2.6.0 packaging==25.0 -playwright==1.55.0 -psutil==7.1.0 +psutil==7.1.1 pydantic==2.12.3 pydantic_core==2.41.4 -pyee==13.0.0 -PyPDF2==3.0.1 python-dotenv==1.1.1 PyYAML==6.0.3 -regex==2025.9.18 +regex==2025.10.23 requests==2.32.5 safetensors==0.6.2 sniffio==1.3.1 From 669982a1164c923ccf10494110438995628b4b73 Mon Sep 17 00:00:00 2001 From: SeaUrc Date: Wed, 22 Oct 2025 23:00:12 -0400 Subject: [PATCH 03/10] add polymarket, switch to API call, create script to make parallel calls to cerebrium endpoint --- .../6-kalshi-bet-predictor/cerebrium.toml | 4 +- .../6-kalshi-bet-predictor/main.py | 270 ++++++++---------- .../6-kalshi-bet-predictor/predictor.py | 188 ++++++++++++ .../6-kalshi-bet-predictor/requirements.txt | 19 -- 4 files changed, 303 insertions(+), 178 deletions(-) create mode 100644 8-application-demos/6-kalshi-bet-predictor/predictor.py diff --git a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml index 67cd4b9f..7133c08c 100644 --- a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml +++ b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml @@ -3,7 +3,7 @@ name = "kalshi-bet-predictor" python_version = "3.11" docker_base_image_url = "debian:bookworm-slim" disable_auth = true -include = ['./*', 'main.py', 'cerebrium.toml'] +include = ['./*', 'predictor.py', 'cerebrium.toml'] exclude = ['.*'] [cerebrium.dependencies.paths] @@ -15,7 +15,7 @@ memory = 36 compute = "ADA_L40" [cerebrium.scaling] -min_replicas = 1 +min_replicas = 0 max_replicas = 2 cooldown = 30 replica_concurrency = 1 diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py index d49ef642..8a81115c 100644 --- a/8-application-demos/6-kalshi-bet-predictor/main.py +++ b/8-application-demos/6-kalshi-bet-predictor/main.py @@ -1,164 +1,120 @@ -import torch -from transformers import AutoModelForCausalLM, AutoTokenizer -from typing import Tuple +import csv import requests -import re -from dotenv import load_dotenv -import os -from exa_py import Exa - - -def getKalshiMarket(market_ticker)->Tuple[str,str]: - url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{market_ticker}" +from typing import Dict, List, Tuple +import asyncio +import aiohttp + +def load_markets(csv_path: str) -> List[Tuple[str, str]]: + markets = [] + with open(csv_path, 'r') as f: + reader = csv.reader(f) + next(reader) # Skip header if present + for row in reader: + if len(row) >= 2: + markets.append((row[0], row[1])) + return markets + +async def get_market_data(session: aiohttp.ClientSession, kalshi_id: str, + polymarket_slug: str, endpoint_url: str) -> Dict: + + payload = { + 'kalshi_id': kalshi_id, + 'polymarket_slug': polymarket_slug + } + try: - res = requests.get(url) - res.raise_for_status() - obj = res.json() - return obj - except requests.exceptions.RequestException as e: - raise RuntimeError(f"Error fetching Kalshi market data: {e}") - -class BetPredictor: - def __init__(self, model_name: str = "Qwen/Qwen3-4B-Instruct-2507"): - - self.tokenizer = AutoTokenizer.from_pretrained(model_name) - self.model = AutoModelForCausalLM.from_pretrained( - model_name, - torch_dtype = torch.bfloat16, - device_map="auto" - ) - - load_dotenv() - - self.exa = Exa(os.environ.get("EXA_API_KEY")) - - print(f"Loaded model {model_name}!") - - def _generate_response(self, prompt: str, max_new_tokens: int) -> str: - inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) - input_ids_len = inputs['input_ids'].shape[-1] - - output_sequences = self.model.generate( - **inputs, - max_new_tokens=max_new_tokens, - pad_token_id=self.tokenizer.eos_token_id, - do_sample=False, - ) - - newly_generated_ids = output_sequences[0, input_ids_len:] - - response = self.tokenizer.decode(newly_generated_ids, skip_special_tokens=True).strip() - - print(f"Generated this response! {response}") - return response - - def convert_rules_to_question(self, rules:str) -> str: - prompt = ( - "You will receive a sentence that is a statement of the following type:" - "If , then the market resolves to Yes" - "Convert the conditional to a yes/no question" - "Your response SHOULD ONLY BE a SINGLE line consisting of the yes/no question:\n" - "Do not add ANY preamble, conclusion, or extra text.\n\n" - f"STATEMENT: {rules}\n" - ) - - raw_response = self._generate_response(prompt, max_new_tokens=400) - - return raw_response - - def get_relevant_questions(self, question: str) -> list[str]: - - prompt = ( - "Based on the following question, generate a list of 5 relevant questions " - "that one could search online to gather more information. " - "These questions should yield information that would be helpful to answering " - "the following question in an objective manner.\n\n" - "Your response SHOULD ONLY BE the following lines, in this exact format:\n" - "1. \n" - "2. \n" - "3. \n" - "4. \n" - "5. \n" - "Do not add ANY preamble, conclusion, or extra text.\n\n" - f"Question: \"{question}\"\n" - ) - - raw_response = self._generate_response(prompt, max_new_tokens=400) - - relevant_questions = [] - for line in raw_response.split('\n'): - line = line.strip() - if line and line[0].isdigit(): - clean_question = line.split('.', 1)[-1].strip() - relevant_questions.append(clean_question) - - print(f"Generated relevant questions: {relevant_questions}") - - return relevant_questions + async with session.post(endpoint_url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as response: + response.raise_for_status() + data = await response.json() + + kalshi_data = data['Kalshi'] + polymarket_data = data['Polymarket'] + + return { + 'kalshi_id': kalshi_id, + 'polymarket_slug': polymarket_slug, + 'kalshi_edge': kalshi_data['edge'], + 'polymarket_edge': polymarket_data['edge'], + 'kalshi_buy_yes': kalshi_data['buy_yes'], + 'kalshi_buy_no': kalshi_data['buy_no'], + 'polymarket_buy_yes': polymarket_data['buy_yes'], + 'polymarket_buy_no': polymarket_data['buy_no'], + } + except Exception as e: + print(f"Error fetching data for {kalshi_id}/{polymarket_slug}: {e}") + return None + +async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]: + markets = load_markets(csv_path) - def get_information(self, questions): - results = [self.exa.answer(q, text=True) for q in questions] - answers = [r.answer for r in results] - return answers - - def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]: - prompt = ( - "Analyze the provided information below to answer the given binary question. " - "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n" - "--- Information ---\n" - f"{information}\n\n" - "--- Question ---\n" - f"{question}\n\n" - "IMPORTANT INSTRUCTIONS:\n" - "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n" - " Yes: %, No: %, Explanation: \n" - "2. Percentages must sum to 100%.\n" - "3. Do NOT include any preamble, summary, or additional text.\n" - "4. Provide a brief but clear explanation supporting your probabilities.\n\n" - "Again, your response MUST ONLY be a single line in THIS EXACT FORMAT: Yes: %, No: %, Explanation: " - ) - - response = self._generate_response(prompt, max_new_tokens=800) - - match = re.search(r"Yes: (.*?), No: (.*?), Explanation: (.*)", response, re.DOTALL) - - if match: - yes, no, explanation = match.groups() - return yes.strip(), no.strip(), explanation.strip() - else: - raise ValueError(f"Failed to parse LLM response: {response}") - - def predict(self, question): - relevant_questions = self.get_relevant_questions(question) - answers = self.get_information(relevant_questions) - - information = "" - for i, v in enumerate(relevant_questions): - information += f"INFORMATION {i+1}: \n" - information += f"QUESTION {i+1}: {v}\n" - information += f"ANSWER {i+1}: {answers[i]} \n\n" + print(f"Fetching data for {len(markets)} markets all at once...") + + async with aiohttp.ClientSession() as session: + tasks = [get_market_data(session, kalshi_id, polymarket_slug, endpoint_url) + for kalshi_id, polymarket_slug in markets] - yes, no, explanation = self.get_binary_answer_with_percentage(information, question) - return yes, no, explanation - - -predictor = BetPredictor() - -def predict(ticker: str): - market = getKalshiMarket(ticker) - rules = market['market']['rules_primary'] - - question = predictor.convert_rules_to_question(rules) + results = await asyncio.gather(*tasks) - pred_yes, pred_no, explanation = predictor.predict(question) - - pred_yes = int(pred_yes[:2]) - pred_no = int(pred_no[:2]) - - real_yes = int(market['market']['yes_ask']) - real_no = int(market['market']['no_ask']) + return [r for r in results if r is not None] - buy_yes = real_yes < pred_yes - buy_no = real_no < pred_no +def compute_statistics(results: List[Dict]) -> None: + print("\n" + "="*80) + print("STATISTICS") + print("="*80) + + if not results: + print("No results to analyze") + return + + total_markets = len(results) + + kalshi_edges = [r['kalshi_edge'] for r in results] + total_kalshi_edge = sum(kalshi_edges) + + polymarket_edges = [r['polymarket_edge'] for r in results] + total_polymarket_edge = sum(polymarket_edges) + + kalshi_better_count = sum(1 for r in results if r['kalshi_edge'] > r['polymarket_edge']) + polymarket_better_count = sum(1 for r in results if r['polymarket_edge'] > r['kalshi_edge']) + equal_count = total_markets - kalshi_better_count - polymarket_better_count + + edge_differences = [abs(r['kalshi_edge'] - r['polymarket_edge']) for r in results] + avg_edge_difference = sum(edge_differences) / total_markets + max_edge_difference = max(edge_differences) + + # Results + print(f"\nTotal markets analyzed: {total_markets}") + print("\n" + "-"*80) + print("COMPARISON") + print("-"*80) + print(f"Markets with greater Kalshi edge: {kalshi_better_count} ({kalshi_better_count/total_markets*100:.1f}%)") + print(f"Markets with greater Polymarket edge: {polymarket_better_count} ({polymarket_better_count/total_markets*100:.1f}%)") + print(f"Markets with equal edge: {equal_count} ({equal_count/total_markets*100:.1f}%)") + print(f"\nAverage edge difference: {avg_edge_difference:.4f}") + print(f"Max edge difference: {max_edge_difference:.4f}") + + # Overall winner + print("\n" + "="*80) + if total_kalshi_edge > total_polymarket_edge: + advantage = total_kalshi_edge - total_polymarket_edge + print(f"OVERALL: Kalshi has greater total edge (+{advantage:.4f})") + elif total_polymarket_edge > total_kalshi_edge: + advantage = total_polymarket_edge - total_kalshi_edge + print(f"OVERALL: Polymarket has greater total edge (+{advantage:.4f})") + else: + print(f"OVERALL: Both platforms have equal total edge") + print("="*80) + +def main(): + CSV_PATH = 'markets.csv' + ENDPOINT_URL = 'https://{cerebrium}/predict' # Your hosted endpoint + + print("Starting async market analysis...") + results = asyncio.run(analyze_markets_async(CSV_PATH, ENDPOINT_URL)) + + print(f"\nSuccessfully fetched {len(results)} markets") + + compute_statistics(results) - return {"buy_yes":buy_yes, "buy_no": buy_no, "yes": pred_yes, "no": pred_no, "explanation": explanation} +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/8-application-demos/6-kalshi-bet-predictor/predictor.py b/8-application-demos/6-kalshi-bet-predictor/predictor.py new file mode 100644 index 00000000..f1d9a4c0 --- /dev/null +++ b/8-application-demos/6-kalshi-bet-predictor/predictor.py @@ -0,0 +1,188 @@ +from typing import Tuple +import requests +import re +from dotenv import load_dotenv +import os +from exa_py import Exa +from openai import OpenAI + + +def getMarket(is_kalshi, ticker): + if is_kalshi: + url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" # market ticker + else: + url = f"https://gamma-api.polymarket.com/markets/slug/{ticker}" # slug + try: + res = requests.get(url) + res.raise_for_status() + obj = res.json() + return obj + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Error fetching Kalshi market data: {e}") + + +class BetPredictor: + def __init__(self, model_name: str = "gpt-5-nano"): + + + load_dotenv() + exa_api_key = os.environ.get("EXA_API_KEY") + openai_api_key = os.environ.get("OPENAI_API_KEY") + + if not exa_api_key: + raise EnvironmentError("Missing EXA_API_KEY in environment variables") + if not openai_api_key: + raise EnvironmentError("Missing OPENAI_API_KEY in environment variables") + + self.exa = Exa(exa_api_key) + self.client = OpenAI(api_key=openai_api_key) + self.model_name = model_name + + print(f"Using model: {model_name}") + + def _generate_response(self, prompt: str) -> str: + + response = self.client.responses.create( + model=self.model_name, + input=prompt + ) + + output = response.output_text.strip() + print(f"Generated this response: {output}") + + return output + + def convert_rules_to_question(self, rules:str) -> str: + prompt = ( + "A market resolution statement usually describes the conditions under which a market would resolve to \"Yes\" or \"No\". " + "Your task is to extract and rewrite the core factual question that determines whether the market will resolve to \"Yes.\"\n" + "Input format: A sentence describing how a market will resolve.\n" + "Example: \"Will the market resolve to Yes if Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025?\"\n" + "Output format: A clear, grammatically correct factual question that reflects the underlying condition of the resolution, removing any meta-language about the market or its resolution.\n" + "The condition will start with \"if [event], then the market will resolve to \"Yes\" and you must convert it to \"Will [event]?\"\n" + "Examples:\n" + "Input: \"If Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025, then the market resolves to Yes\"\n" + "Output: \"Will Curtis Sliwa drop out of the NYC Mayoral race before Nov 4, 2025?\"\n" + "Input: \"If Donald Trump wins the 2024 U.S. presidential election, then the market resolves to Yes\"\n" + "Output: \"Will Donald Trump win the 2024 U.S. presidential election?\"\n" + "Only output the rewritten factual question with no explanations or commentary.\n" + f"STATEMENT: {rules}" + ) + + raw_response = self._generate_response(prompt) + + return raw_response + + def get_relevant_questions(self, question: str) -> list[str]: + + prompt = ( + "Based on the following question, generate a list of 5 relevant questions " + "that one could search online to gather more information. " + "These questions should yield information that would be helpful to answering " + "the following question in an objective manner.\n\n" + "Your response SHOULD ONLY BE the following lines, in this exact format:\n" + "1. \n" + "2. \n" + "3. \n" + "4. \n" + "5. \n" + "Do not add ANY preamble, conclusion, or extra text.\n\n" + f"Question: \"{question}\"" + ) + + raw_response = self._generate_response(prompt) + + relevant_questions = [] + for line in raw_response.split('\n'): + line = line.strip() + if line and line[0].isdigit(): + clean_question = line.split('.', 1)[-1].strip() + relevant_questions.append(clean_question) + + print(f"Generated relevant questions: {relevant_questions}") + + return relevant_questions + + + def get_information(self, questions): + results = [self.exa.answer(q, text=True) for q in questions] + answers = [r.answer for r in results] + return answers + + def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]: + prompt = ( + "Analyze the provided information below to answer the given binary question. " + "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n" + "--- Information ---\n" + f"{information}\n\n" + "--- Question ---\n" + f"{question}\n\n" + "IMPORTANT INSTRUCTIONS:\n" + "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n" + " Yes: %, No: %, Explanation: \n" + "2. Percentages must sum to 100%.\n" + "3. Do NOT include any preamble, summary, or additional text.\n" + "4. Provide a brief but clear explanation supporting your probabilities.\n\n" + ) + + response = self._generate_response(prompt) + + match = re.search(r"Yes: (.*?), No: (.*?), Explanation: (.*)", response, re.DOTALL) + + if match: + yes, no, explanation = match.groups() + return yes.strip(), no.strip(), explanation.strip() + else: + raise ValueError(f"Failed to parse LLM response: {response}") + + def predict(self, question): + relevant_questions = self.get_relevant_questions(question) + answers = self.get_information(relevant_questions) + + information = "" + for i, v in enumerate(relevant_questions): + information += f"INFORMATION {i+1}: \n" + information += f"QUESTION {i+1}: {v}\n" + information += f"ANSWER {i+1}: {answers[i]} \n\n" + + information.rstrip("\n") + + yes, no, explanation = self.get_binary_answer_with_percentage(information, question) + return yes, no, explanation + + +predictor = BetPredictor() + +def predictMarket(isKalshi: bool, ticker: str): + + if isKalshi: + market = getMarket(isKalshi, ticker) + rules = market['market']['rules_primary'] + print(f"Rules: {rules}") + question = predictor.convert_rules_to_question(rules) + else: + market = getMarket(isKalshi, ticker) + question = market['question'] + + print(f"Question: {question}") + + pred_yes, pred_no, explanation = predictor.predict(question) + + match_yes = re.search(r"(\d+)%", pred_yes) + match_no = re.search(r"(\d+)%", pred_no) + pred_yes = int(match_yes.group(1)) + pred_no = int(match_no.group(1)) + + real_yes = int(market['market']['yes_ask']) + real_no = int(market['market']['no_ask']) + + buy_yes = real_yes < pred_yes + buy_no = real_no < pred_no + + return {"buy_yes":buy_yes, "buy_no": buy_no, "edge": max(pred_yes-real_yes, pred_no-real_no), "yes": pred_yes, "no": pred_no, "explanation": explanation} + +def predict(kalshi_ticker, poly_slug): + kalshi_res = predictMarket(True, kalshi_ticker) + poly_res = predictMarket(False, poly_slug) + + return {"Kalshi": kalshi_res, "Polymarket": poly_res} \ No newline at end of file diff --git a/8-application-demos/6-kalshi-bet-predictor/requirements.txt b/8-application-demos/6-kalshi-bet-predictor/requirements.txt index b3d3fbd3..dfebee10 100644 --- a/8-application-demos/6-kalshi-bet-predictor/requirements.txt +++ b/8-application-demos/6-kalshi-bet-predictor/requirements.txt @@ -1,4 +1,3 @@ -accelerate==1.11.0 annotated-types==0.7.0 anyio==4.11.0 certifi==2025.10.5 @@ -6,36 +5,18 @@ charset-normalizer==3.4.4 distro==1.9.0 dotenv==0.9.9 exa-py==1.16.1 -filelock==3.20.0 -fsspec==2025.9.0 h11==0.16.0 -hf-xet==1.1.10 httpcore==1.0.9 httpx==0.28.1 -huggingface-hub==0.35.3 idna==3.11 -Jinja2==3.1.6 jiter==0.11.1 -MarkupSafe==3.0.3 -mpmath==1.3.0 -networkx==3.5 -numpy==2.3.4 openai==2.6.0 -packaging==25.0 -psutil==7.1.1 pydantic==2.12.3 pydantic_core==2.41.4 python-dotenv==1.1.1 -PyYAML==6.0.3 -regex==2025.10.23 requests==2.32.5 -safetensors==0.6.2 sniffio==1.3.1 -sympy==1.14.0 -tokenizers==0.22.1 -torch==2.9.0 tqdm==4.67.1 -transformers==4.57.1 typing-inspection==0.4.2 typing_extensions==4.15.0 urllib3==2.5.0 From e308d8c8dbc44031c8138f9501874929e63802a9 Mon Sep 17 00:00:00 2001 From: SeaUrc Date: Fri, 24 Oct 2025 15:28:04 -0400 Subject: [PATCH 04/10] find equiv markets --- .../6-kalshi-bet-predictor/cerebrium.toml | 8 +- .../6-kalshi-bet-predictor/compare.py | 126 ++++++++ .../find_equiv_markets.py | 203 ++++++++++++ .../6-kalshi-bet-predictor/main.py | 303 +++++++++++------- .../6-kalshi-bet-predictor/predictor.py | 188 ----------- 5 files changed, 526 insertions(+), 302 deletions(-) create mode 100644 8-application-demos/6-kalshi-bet-predictor/compare.py create mode 100644 8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py delete mode 100644 8-application-demos/6-kalshi-bet-predictor/predictor.py diff --git a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml index 7133c08c..0e73bf8d 100644 --- a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml +++ b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml @@ -3,7 +3,7 @@ name = "kalshi-bet-predictor" python_version = "3.11" docker_base_image_url = "debian:bookworm-slim" disable_auth = true -include = ['./*', 'predictor.py', 'cerebrium.toml'] +include = ['./*', 'main.py', 'cerebrium.toml'] exclude = ['.*'] [cerebrium.dependencies.paths] @@ -11,12 +11,12 @@ pip = "requirements.txt" [cerebrium.hardware] cpu = 4 -memory = 36 -compute = "ADA_L40" +memory = 16 +compute = "CPU" [cerebrium.scaling] min_replicas = 0 -max_replicas = 2 +max_replicas = 100 cooldown = 30 replica_concurrency = 1 scaling_metric = "concurrency_utilization" diff --git a/8-application-demos/6-kalshi-bet-predictor/compare.py b/8-application-demos/6-kalshi-bet-predictor/compare.py new file mode 100644 index 00000000..813dfc91 --- /dev/null +++ b/8-application-demos/6-kalshi-bet-predictor/compare.py @@ -0,0 +1,126 @@ +import csv +import json +from typing import Dict, List, Tuple +import asyncio +import aiohttp + +def load_markets(csv_path: str) -> List[Tuple[str, str]]: + markets = [] + with open(csv_path, 'r') as f: + reader = csv.reader(f) + for row in reader: + if len(row) >= 2: + markets.append((row[0], row[1])) + return markets + +async def get_market_data(session: aiohttp.ClientSession, kalshi_ticker: str, + polymarket_slug: str, endpoint_url: str) -> Dict: + + payload = json.dumps({ + 'kalshi_ticker': kalshi_ticker, + 'poly_slug': polymarket_slug + }) + + headers = { + 'Authorization': '', + 'Content-Type': 'application/json' + } + + try: + async with session.post(endpoint_url, headers=headers, data=payload) as response: + response.raise_for_status() + data = await response.json() + print(data) + data = data['result'] + + kalshi_data = data['kalshi'] + polymarket_data = data['polymarket'] + + return { + 'kalshi_ticker': kalshi_ticker, + 'polymarket_slug': polymarket_slug, + 'kalshi_edge': kalshi_data['edge'], + 'polymarket_edge': polymarket_data['edge'], + 'kalshi_buy_yes': kalshi_data['buy_yes'], + 'kalshi_buy_no': kalshi_data['buy_no'], + 'polymarket_buy_yes': polymarket_data['buy_yes'], + 'polymarket_buy_no': polymarket_data['buy_no'], + } + except Exception as e: + print(f"Error fetching data for {kalshi_ticker}/{polymarket_slug}: {e}") + return None + +async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]: + markets = load_markets(csv_path) + + print(f"Fetching data for {len(markets)} markets all at once...") + + async with aiohttp.ClientSession() as session: + tasks = [get_market_data(session, kalshi_ticker, polymarket_slug, endpoint_url) + for kalshi_ticker, polymarket_slug in markets] + + results = await asyncio.gather(*tasks) + + return [r for r in results if r is not None] + +def compute_statistics(results: List[Dict]) -> None: + print("\n" + "="*80) + print("STATISTICS") + print("="*80) + + if not results: + print("No results to analyze") + return + + total_markets = len(results) + + kalshi_edges = [r['kalshi_edge'] for r in results] + total_kalshi_edge = sum(kalshi_edges) + + polymarket_edges = [r['polymarket_edge'] for r in results] + total_polymarket_edge = sum(polymarket_edges) + + kalshi_better_count = sum(1 for r in results if r['kalshi_edge'] > r['polymarket_edge']) + polymarket_better_count = sum(1 for r in results if r['polymarket_edge'] > r['kalshi_edge']) + equal_count = total_markets - kalshi_better_count - polymarket_better_count + + edge_differences = [abs(r['kalshi_edge'] - r['polymarket_edge']) for r in results] + avg_edge_difference = sum(edge_differences) / total_markets + max_edge_difference = max(edge_differences) + + print(f"\nTotal markets analyzed: {total_markets}") + print("\n" + "-"*80) + print("COMPARISON") + print("-"*80) + print(f"Markets with greater Kalshi edge: {kalshi_better_count} ({kalshi_better_count/total_markets*100:.1f}%)") + print(f"Markets with greater Polymarket edge: {polymarket_better_count} ({polymarket_better_count/total_markets*100:.1f}%)") + print(f"Markets with equal edge: {equal_count} ({equal_count/total_markets*100:.1f}%)") + print(f"\nAverage edge difference: {avg_edge_difference:.4f}") + print(f"Max edge difference: {max_edge_difference:.4f}") + + print("\n" + "="*80) + if total_kalshi_edge > total_polymarket_edge: + advantage = total_kalshi_edge - total_polymarket_edge + print(f"OVERALL: Kalshi has greater total edge (+{advantage:.4f})") + print(f"OVERALL: Kalshi has an average edge of (+{advantage/total_markets:.4f}) per market") + elif total_polymarket_edge > total_kalshi_edge: + advantage = total_polymarket_edge - total_kalshi_edge + print(f"OVERALL: Polymarket has greater total edge (+{advantage:.4f})") + print(f"OVERALL: Polymarket has an average edge of (+{advantage/total_markets:.4f}) per market") + else: + print(f"OVERALL: Both platforms have equal total edge") + print("="*80) + +def main(): + CSV_PATH = '' + ENDPOINT_URL = '' + + print("Starting async market analysis...") + results = asyncio.run(analyze_markets_async(CSV_PATH, ENDPOINT_URL)) + + print(f"\nSuccessfully fetched {len(results)} markets") + + compute_statistics(results) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py new file mode 100644 index 00000000..e5ed8c9a --- /dev/null +++ b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py @@ -0,0 +1,203 @@ +import csv +import os +import requests +import faiss +from sentence_transformers import SentenceTransformer +from typing import List, Dict, Any + +# --- CONFIGURATION --- +SIMILARITY_THRESHOLD = 0.70 +MAX_MARKET_LIMIT = 40000 +TOP_K = 5 # number of top Polymarket markets to check for each Kalshi market +KALSHI_API_URL = "https://api.elections.kalshi.com/trade-api/v2/markets" +POLYMARKET_API_URL = "https://clob.polymarket.com/markets" +OUTPUT_FILE = "equivalent_markets.csv" + +# ---------------------- API FETCH FUNCTIONS ---------------------- + +def get_kalshi_markets() -> List[Dict[str, Any]]: + print("Fetching Kalshi markets...") + markets_list = [] + cursor = "" + + try: + while True: + params = {'limit': 1000} + if cursor: + params['cursor'] = cursor + + response = requests.get(KALSHI_API_URL, params=params) + response.raise_for_status() + data = response.json() + + if 'markets' not in data: + print("Error: 'markets' key not in Kalshi response.") + break + + for market in data['markets']: + if market['status'] == 'active' and market['market_type'] == 'binary': + + markets_list.append({ + 'platform': 'Kalshi', + 'title': market['title'], + 'ticker': market['ticker'], + 'url': f"https://kalshi.com/markets/{market['ticker']}", + 'event_url': f"https://kalshi.com/markets/{market['event_ticker']}", + 'close_date': market['close_time'] + }) + + cursor = data['cursor'] + print(f"{cursor} | {len(markets_list)}") + + if len(markets_list) > MAX_MARKET_LIMIT or not cursor: + break + + print(f"Found {len(markets_list)} open binary markets on Kalshi.") + return markets_list + + except requests.exceptions.RequestException as e: + print(f"Error fetching Kalshi markets: {e}") + return [] + +def get_kalshi_market(ticker): + title = requests.get(f"{KALSHI_API_URL}/{ticker}") + title = title.json() + return title['market']['title'] + +def get_polymarket_markets() -> List[Dict[str, Any]]: + print("Fetching Polymarket markets (CLOB API)...") + markets_list = [] + next_cursor = None + + try: + while True: + params = {} + if next_cursor: + params['next_cursor'] = next_cursor + + response = requests.get(POLYMARKET_API_URL, params=params) + response.raise_for_status() + data = response.json() + + market_list_page = data['data'] + if not market_list_page: + break + + for market in market_list_page: + if market.get('active') and not market.get('closed'): + markets_list.append({ + 'platform': 'Polymarket', + 'title': market.get('question'), + 'id': market.get('condition_id'), + 'url': f"https://polymarket.com/event/{market.get('market_slug')}", + 'close_date': market.get('end_date_iso') + }) + + next_cursor = data.get('next_cursor') + print(f"cursor {next_cursor} | {len(markets_list)}") + + if len(markets_list) > MAX_MARKET_LIMIT or not next_cursor or next_cursor == 'LTE=': + break + + print(f"Found {len(markets_list)} open markets on Polymarket.") + return markets_list + + except requests.exceptions.RequestException as e: + print(f"Error fetching Polymarket markets: {e}") + return [] + + +# ---------------------- FAISS-BASED MATCHING ---------------------- + +def find_similar_markets(kalshi_markets, polymarket_markets, threshold=0.9, top_k=TOP_K): + print("\nLoading NLP model (SentenceTransformer)...") + model = SentenceTransformer('all-MiniLM-L6-v2') + + kalshi_titles = [m['title'] for m in kalshi_markets] + poly_titles = [m['title'] for m in polymarket_markets] + + if not kalshi_titles or not poly_titles: + print("Not enough market data to compare.") + return [] + + print("Encoding market titles into embeddings...") + kalshi_embeddings = model.encode(kalshi_titles, convert_to_numpy=True, normalize_embeddings=True) + poly_embeddings = model.encode(poly_titles, convert_to_numpy=True, normalize_embeddings=True) + + print(f"Building FAISS index for {len(poly_embeddings)} Polymarket markets...") + dim = poly_embeddings.shape[1] + index = faiss.IndexFlatIP(dim) # Inner product for cosine similarity + index.add(poly_embeddings) + + print(f"Querying top {top_k} nearest Polymarket markets for each Kalshi market...") + scores, indices = index.search(kalshi_embeddings, top_k) + + potential_matches = [] + for i, kalshi_market in enumerate(kalshi_markets): + for j in range(top_k): + score = float(scores[i][j]) + if score >= threshold: + poly_market = polymarket_markets[indices[i][j]] + potential_matches.append({ + 'score': score, + 'kalshi_market': kalshi_market, + 'polymarket_market': poly_market + }) + if i % 100 == 0: + print(f"Processed {i}/{len(kalshi_markets)} Kalshi markets...") + + + return potential_matches + +def interactive_save(matches: List[Dict[str, Any]]): + print("\n--- Interactive Review Mode ---") + print("Press 'y' to save a match, anything else to skip.\n") + + file_exists = os.path.exists(OUTPUT_FILE) + with open(OUTPUT_FILE, "a", newline='', encoding="utf-8") as csvfile: + writer = csv.writer(csvfile) + if not file_exists: + writer.writerow(["kalshi_ticker", "polymarket_slug"]) + + for i, match in enumerate(matches): + kalshi_ticker = match['kalshi_market']['ticker'] + poly_slug = match['polymarket_market']['url'].split("event/")[1] + kalshi_title = get_kalshi_market(kalshi_ticker) + poly_title = match['polymarket_market']['title'] + score = match['score'] + + print(f"\nMatch #{i+1} (Score: {score:.4f})") + print(f"[KALSHI] {kalshi_title}") + print(f"[POLYMARKET] {poly_title}") + print(f" > Kalshi URL: {match['kalshi_market']['url']}") + print(f" > Polymarket URL:{match['polymarket_market']['url']}") + + choice = input("Save this match? (y/n): ").strip().lower() + if choice == 'y': + writer.writerow([kalshi_ticker, poly_slug]) + print("Saved.") + else: + print("Skipped.") + + print(f"\nDone. Saved matches to '{OUTPUT_FILE}'.") + +def main(): + kalshi_markets = get_kalshi_markets() + polymarket_markets = get_polymarket_markets() + + if not kalshi_markets or not polymarket_markets: + print("\nCould not fetch markets from one or both platforms. Exiting.") + return + + matches = find_similar_markets(kalshi_markets, polymarket_markets, SIMILARITY_THRESHOLD) + print(f"\n--- Found {len(matches)} Potential Matches ---") + + if not matches: + print("No strong matches found.") + return + + matches.sort(key=lambda x: x['score'], reverse=True) + interactive_save(matches) + +if __name__ == "__main__": + main() diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py index 8a81115c..cfa962e9 100644 --- a/8-application-demos/6-kalshi-bet-predictor/main.py +++ b/8-application-demos/6-kalshi-bet-predictor/main.py @@ -1,120 +1,203 @@ -import csv +import json +from typing import Tuple import requests -from typing import Dict, List, Tuple -import asyncio -import aiohttp - -def load_markets(csv_path: str) -> List[Tuple[str, str]]: - markets = [] - with open(csv_path, 'r') as f: - reader = csv.reader(f) - next(reader) # Skip header if present - for row in reader: - if len(row) >= 2: - markets.append((row[0], row[1])) - return markets - -async def get_market_data(session: aiohttp.ClientSession, kalshi_id: str, - polymarket_slug: str, endpoint_url: str) -> Dict: - - payload = { - 'kalshi_id': kalshi_id, - 'polymarket_slug': polymarket_slug - } - +import re +from dotenv import load_dotenv +import os +from exa_py import Exa +from openai import OpenAI + + +def getMarket(is_kalshi, ticker): + if is_kalshi: + url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" # market ticker + else: + url = f"https://gamma-api.polymarket.com/markets/slug/{ticker}" # slug try: - async with session.post(endpoint_url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as response: - response.raise_for_status() - data = await response.json() - - kalshi_data = data['Kalshi'] - polymarket_data = data['Polymarket'] - - return { - 'kalshi_id': kalshi_id, - 'polymarket_slug': polymarket_slug, - 'kalshi_edge': kalshi_data['edge'], - 'polymarket_edge': polymarket_data['edge'], - 'kalshi_buy_yes': kalshi_data['buy_yes'], - 'kalshi_buy_no': kalshi_data['buy_no'], - 'polymarket_buy_yes': polymarket_data['buy_yes'], - 'polymarket_buy_no': polymarket_data['buy_no'], - } - except Exception as e: - print(f"Error fetching data for {kalshi_id}/{polymarket_slug}: {e}") - return None - -async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]: - markets = load_markets(csv_path) - - print(f"Fetching data for {len(markets)} markets all at once...") - - async with aiohttp.ClientSession() as session: - tasks = [get_market_data(session, kalshi_id, polymarket_slug, endpoint_url) - for kalshi_id, polymarket_slug in markets] + res = requests.get(url) + res.raise_for_status() + obj = res.json() + return obj + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Error fetching Kalshi market data: {e}") + + +class BetPredictor: + def __init__(self, model_name: str = "gpt-5-nano"): - results = await asyncio.gather(*tasks) - - return [r for r in results if r is not None] + load_dotenv() + + exa_api_key = os.environ.get("EXA_API_KEY") + openai_api_key = os.environ.get("OPENAI_API_KEY") + + if not exa_api_key: + raise EnvironmentError("Missing EXA_API_KEY in environment variables") + if not openai_api_key: + raise EnvironmentError("Missing OPENAI_API_KEY in environment variables") + + self.exa = Exa(exa_api_key) + self.client = OpenAI(api_key=openai_api_key) + self.model_name = model_name + + print(f"Using model: {model_name}") + + def _generate_response(self, prompt: str) -> str: + + response = self.client.responses.create( + model=self.model_name, + input=prompt + ) + + output = response.output_text.strip() + print(f"Generated this response: {output}") + + return output + + def convert_rules_to_question(self, rules:str) -> str: + prompt = ( + "A market resolution statement usually describes the conditions under which a market would resolve to \"Yes\" or \"No\". " + "Your task is to extract and rewrite the core factual question that determines whether the market will resolve to \"Yes.\"\n" + "Input format: A sentence describing how a market will resolve.\n" + "Example: \"Will the market resolve to Yes if Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025?\"\n" + "Output format: A clear, grammatically correct factual question that reflects the underlying condition of the resolution, removing any meta-language about the market or its resolution.\n" + "The condition will start with \"if [event], then the market will resolve to \"Yes\" and you must convert it to \"Will [event]?\"\n" + "Examples:\n" + "Input: \"If Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025, then the market resolves to Yes\"\n" + "Output: \"Will Curtis Sliwa drop out of the NYC Mayoral race before Nov 4, 2025?\"\n" + "Input: \"If Donald Trump wins the 2024 U.S. presidential election, then the market resolves to Yes\"\n" + "Output: \"Will Donald Trump win the 2024 U.S. presidential election?\"\n" + "Only output the rewritten factual question with no explanations or commentary.\n" + f"STATEMENT: {rules}" + ) + + raw_response = self._generate_response(prompt) + + return raw_response + + def get_relevant_questions(self, question: str) -> list[str]: + + prompt = ( + "Based on the following question, generate a list of 5 relevant questions " + "that one could search online to gather more information. " + "These questions should yield information that would be helpful to answering " + "the following question in an objective manner.\n\n" + "Your response SHOULD ONLY BE the following lines, in this exact format:\n" + "1. \n" + "2. \n" + "3. \n" + "4. \n" + "5. \n" + "Do not add ANY preamble, conclusion, or extra text.\n\n" + f"Question: \"{question}\"" + ) + + raw_response = self._generate_response(prompt) + + relevant_questions = [] + for line in raw_response.split('\n'): + line = line.strip() + if line and line[0].isdigit(): + clean_question = line.split('.', 1)[-1].strip() + relevant_questions.append(clean_question) + + print(f"Generated relevant questions: {relevant_questions}") + + return relevant_questions -def compute_statistics(results: List[Dict]) -> None: - print("\n" + "="*80) - print("STATISTICS") - print("="*80) - - if not results: - print("No results to analyze") - return - - total_markets = len(results) - - kalshi_edges = [r['kalshi_edge'] for r in results] - total_kalshi_edge = sum(kalshi_edges) - - polymarket_edges = [r['polymarket_edge'] for r in results] - total_polymarket_edge = sum(polymarket_edges) - - kalshi_better_count = sum(1 for r in results if r['kalshi_edge'] > r['polymarket_edge']) - polymarket_better_count = sum(1 for r in results if r['polymarket_edge'] > r['kalshi_edge']) - equal_count = total_markets - kalshi_better_count - polymarket_better_count - - edge_differences = [abs(r['kalshi_edge'] - r['polymarket_edge']) for r in results] - avg_edge_difference = sum(edge_differences) / total_markets - max_edge_difference = max(edge_differences) - - # Results - print(f"\nTotal markets analyzed: {total_markets}") - print("\n" + "-"*80) - print("COMPARISON") - print("-"*80) - print(f"Markets with greater Kalshi edge: {kalshi_better_count} ({kalshi_better_count/total_markets*100:.1f}%)") - print(f"Markets with greater Polymarket edge: {polymarket_better_count} ({polymarket_better_count/total_markets*100:.1f}%)") - print(f"Markets with equal edge: {equal_count} ({equal_count/total_markets*100:.1f}%)") - print(f"\nAverage edge difference: {avg_edge_difference:.4f}") - print(f"Max edge difference: {max_edge_difference:.4f}") - # Overall winner - print("\n" + "="*80) - if total_kalshi_edge > total_polymarket_edge: - advantage = total_kalshi_edge - total_polymarket_edge - print(f"OVERALL: Kalshi has greater total edge (+{advantage:.4f})") - elif total_polymarket_edge > total_kalshi_edge: - advantage = total_polymarket_edge - total_kalshi_edge - print(f"OVERALL: Polymarket has greater total edge (+{advantage:.4f})") - else: - print(f"OVERALL: Both platforms have equal total edge") - print("="*80) + def get_information(self, questions): + results = [self.exa.answer(q, text=True) for q in questions] + answers = [r.answer for r in results] + return answers + + def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]: + prompt = ( + "Analyze the provided information below to answer the given binary question. " + "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n" + "--- Information ---\n" + f"{information}\n\n" + "--- Question ---\n" + f"{question}\n\n" + "IMPORTANT INSTRUCTIONS:\n" + "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n" + " Yes: %, No: %, Explanation: \n" + "2. Percentages must sum to 100%.\n" + "3. Do NOT include any preamble, summary, or additional text.\n" + "4. Provide a brief but clear explanation supporting your probabilities.\n\n" + ) + + response = self._generate_response(prompt) + + match = re.search(r"Yes: (.*?), No: (.*?), Explanation: (.*)", response, re.DOTALL) -def main(): - CSV_PATH = 'markets.csv' - ENDPOINT_URL = 'https://{cerebrium}/predict' # Your hosted endpoint + if match: + yes, no, explanation = match.groups() + return yes.strip(), no.strip(), explanation.strip() + else: + raise ValueError(f"Failed to parse LLM response: {response}") + + def evaluate(self, question): + relevant_questions = self.get_relevant_questions(question) + answers = self.get_information(relevant_questions) + + information = "" + for i, v in enumerate(relevant_questions): + information += f"INFORMATION {i+1}: \n" + information += f"QUESTION {i+1}: {v}\n" + information += f"ANSWER {i+1}: {answers[i]} \n\n" + + information.rstrip("\n") + + yes, no, explanation = self.get_binary_answer_with_percentage(information, question) + return yes, no, explanation + + +predictor = BetPredictor() + +def predict(kalshi_ticker, poly_slug): + kalshi_market = getMarket(True, kalshi_ticker) + poly_market = getMarket(False, poly_slug) + question = poly_market['question'] # we use polymarket because they have direct question + + kalshi_real_yes = float(kalshi_market['market']['yes_ask']) + kalshi_real_no = float(kalshi_market['market']['no_ask']) - print("Starting async market analysis...") - results = asyncio.run(analyze_markets_async(CSV_PATH, ENDPOINT_URL)) + poly_values = json.loads(poly_market['outcomePrices']) + poly_real_yes, poly_real_no = [float(v) for v in poly_values] + + print(f"Question: {question}") - print(f"\nSuccessfully fetched {len(results)} markets") + pred_yes, pred_no, explanation = predictor.evaluate(question) + + match_yes = re.search(r"(\d+)%", pred_yes) + match_no = re.search(r"(\d+)%", pred_no) + pred_yes = float(match_yes.group(1)) + pred_no = float(match_no.group(1)) + + kalshi_real_yes = float(kalshi_market['market']['yes_ask']) + kalshi_real_no = float(kalshi_market['market']['no_ask']) - compute_statistics(results) + poly_values = json.loads(poly_market['outcomePrices']) + poly_real_yes, poly_real_no = [float(v)*100 for v in poly_values] + + kalshi_buy_yes = kalshi_real_yes < pred_yes + kalshi_buy_no = kalshi_real_no < pred_no + + poly_buy_yes = poly_real_yes < pred_yes + poly_buy_no = poly_real_no < pred_no -if __name__ == "__main__": - main() \ No newline at end of file + return { + "kalshi": { + "buy_yes":kalshi_buy_yes, + "buy_no": kalshi_buy_no, + "edge": max(pred_yes-kalshi_real_yes, pred_no-kalshi_real_no), + }, + "polymarket": { + "buy_yes":poly_buy_yes, + "buy_no": poly_buy_no, + "edge": max(pred_yes-poly_real_yes, pred_no-poly_real_no), + }, + "yes": pred_yes, + "no": pred_no, + "explanation": explanation + } \ No newline at end of file diff --git a/8-application-demos/6-kalshi-bet-predictor/predictor.py b/8-application-demos/6-kalshi-bet-predictor/predictor.py deleted file mode 100644 index f1d9a4c0..00000000 --- a/8-application-demos/6-kalshi-bet-predictor/predictor.py +++ /dev/null @@ -1,188 +0,0 @@ -from typing import Tuple -import requests -import re -from dotenv import load_dotenv -import os -from exa_py import Exa -from openai import OpenAI - - -def getMarket(is_kalshi, ticker): - if is_kalshi: - url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" # market ticker - else: - url = f"https://gamma-api.polymarket.com/markets/slug/{ticker}" # slug - try: - res = requests.get(url) - res.raise_for_status() - obj = res.json() - return obj - except requests.exceptions.RequestException as e: - raise RuntimeError(f"Error fetching Kalshi market data: {e}") - - -class BetPredictor: - def __init__(self, model_name: str = "gpt-5-nano"): - - - load_dotenv() - exa_api_key = os.environ.get("EXA_API_KEY") - openai_api_key = os.environ.get("OPENAI_API_KEY") - - if not exa_api_key: - raise EnvironmentError("Missing EXA_API_KEY in environment variables") - if not openai_api_key: - raise EnvironmentError("Missing OPENAI_API_KEY in environment variables") - - self.exa = Exa(exa_api_key) - self.client = OpenAI(api_key=openai_api_key) - self.model_name = model_name - - print(f"Using model: {model_name}") - - def _generate_response(self, prompt: str) -> str: - - response = self.client.responses.create( - model=self.model_name, - input=prompt - ) - - output = response.output_text.strip() - print(f"Generated this response: {output}") - - return output - - def convert_rules_to_question(self, rules:str) -> str: - prompt = ( - "A market resolution statement usually describes the conditions under which a market would resolve to \"Yes\" or \"No\". " - "Your task is to extract and rewrite the core factual question that determines whether the market will resolve to \"Yes.\"\n" - "Input format: A sentence describing how a market will resolve.\n" - "Example: \"Will the market resolve to Yes if Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025?\"\n" - "Output format: A clear, grammatically correct factual question that reflects the underlying condition of the resolution, removing any meta-language about the market or its resolution.\n" - "The condition will start with \"if [event], then the market will resolve to \"Yes\" and you must convert it to \"Will [event]?\"\n" - "Examples:\n" - "Input: \"If Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025, then the market resolves to Yes\"\n" - "Output: \"Will Curtis Sliwa drop out of the NYC Mayoral race before Nov 4, 2025?\"\n" - "Input: \"If Donald Trump wins the 2024 U.S. presidential election, then the market resolves to Yes\"\n" - "Output: \"Will Donald Trump win the 2024 U.S. presidential election?\"\n" - "Only output the rewritten factual question with no explanations or commentary.\n" - f"STATEMENT: {rules}" - ) - - raw_response = self._generate_response(prompt) - - return raw_response - - def get_relevant_questions(self, question: str) -> list[str]: - - prompt = ( - "Based on the following question, generate a list of 5 relevant questions " - "that one could search online to gather more information. " - "These questions should yield information that would be helpful to answering " - "the following question in an objective manner.\n\n" - "Your response SHOULD ONLY BE the following lines, in this exact format:\n" - "1. \n" - "2. \n" - "3. \n" - "4. \n" - "5. \n" - "Do not add ANY preamble, conclusion, or extra text.\n\n" - f"Question: \"{question}\"" - ) - - raw_response = self._generate_response(prompt) - - relevant_questions = [] - for line in raw_response.split('\n'): - line = line.strip() - if line and line[0].isdigit(): - clean_question = line.split('.', 1)[-1].strip() - relevant_questions.append(clean_question) - - print(f"Generated relevant questions: {relevant_questions}") - - return relevant_questions - - - def get_information(self, questions): - results = [self.exa.answer(q, text=True) for q in questions] - answers = [r.answer for r in results] - return answers - - def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]: - prompt = ( - "Analyze the provided information below to answer the given binary question. " - "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n" - "--- Information ---\n" - f"{information}\n\n" - "--- Question ---\n" - f"{question}\n\n" - "IMPORTANT INSTRUCTIONS:\n" - "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n" - " Yes: %, No: %, Explanation: \n" - "2. Percentages must sum to 100%.\n" - "3. Do NOT include any preamble, summary, or additional text.\n" - "4. Provide a brief but clear explanation supporting your probabilities.\n\n" - ) - - response = self._generate_response(prompt) - - match = re.search(r"Yes: (.*?), No: (.*?), Explanation: (.*)", response, re.DOTALL) - - if match: - yes, no, explanation = match.groups() - return yes.strip(), no.strip(), explanation.strip() - else: - raise ValueError(f"Failed to parse LLM response: {response}") - - def predict(self, question): - relevant_questions = self.get_relevant_questions(question) - answers = self.get_information(relevant_questions) - - information = "" - for i, v in enumerate(relevant_questions): - information += f"INFORMATION {i+1}: \n" - information += f"QUESTION {i+1}: {v}\n" - information += f"ANSWER {i+1}: {answers[i]} \n\n" - - information.rstrip("\n") - - yes, no, explanation = self.get_binary_answer_with_percentage(information, question) - return yes, no, explanation - - -predictor = BetPredictor() - -def predictMarket(isKalshi: bool, ticker: str): - - if isKalshi: - market = getMarket(isKalshi, ticker) - rules = market['market']['rules_primary'] - print(f"Rules: {rules}") - question = predictor.convert_rules_to_question(rules) - else: - market = getMarket(isKalshi, ticker) - question = market['question'] - - print(f"Question: {question}") - - pred_yes, pred_no, explanation = predictor.predict(question) - - match_yes = re.search(r"(\d+)%", pred_yes) - match_no = re.search(r"(\d+)%", pred_no) - pred_yes = int(match_yes.group(1)) - pred_no = int(match_no.group(1)) - - real_yes = int(market['market']['yes_ask']) - real_no = int(market['market']['no_ask']) - - buy_yes = real_yes < pred_yes - buy_no = real_no < pred_no - - return {"buy_yes":buy_yes, "buy_no": buy_no, "edge": max(pred_yes-real_yes, pred_no-real_no), "yes": pred_yes, "no": pred_no, "explanation": explanation} - -def predict(kalshi_ticker, poly_slug): - kalshi_res = predictMarket(True, kalshi_ticker) - poly_res = predictMarket(False, poly_slug) - - return {"Kalshi": kalshi_res, "Polymarket": poly_res} \ No newline at end of file From be71caf2462db4d7af0d3594fb8fd3098a7b81aa Mon Sep 17 00:00:00 2001 From: SeaUrc Date: Fri, 24 Oct 2025 15:34:34 -0400 Subject: [PATCH 05/10] minor tweaks --- .../6-kalshi-bet-predictor/compare.py | 5 ++-- .../find_equiv_markets.py | 27 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/8-application-demos/6-kalshi-bet-predictor/compare.py b/8-application-demos/6-kalshi-bet-predictor/compare.py index 813dfc91..2facddf5 100644 --- a/8-application-demos/6-kalshi-bet-predictor/compare.py +++ b/8-application-demos/6-kalshi-bet-predictor/compare.py @@ -8,6 +8,7 @@ def load_markets(csv_path: str) -> List[Tuple[str, str]]: markets = [] with open(csv_path, 'r') as f: reader = csv.reader(f) + next(reader) # skip header for row in reader: if len(row) >= 2: markets.append((row[0], row[1])) @@ -112,8 +113,8 @@ def compute_statistics(results: List[Dict]) -> None: print("="*80) def main(): - CSV_PATH = '' - ENDPOINT_URL = '' + CSV_PATH = '' + ENDPOINT_URL = '' print("Starting async market analysis...") results = asyncio.run(analyze_markets_async(CSV_PATH, ENDPOINT_URL)) diff --git a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py index e5ed8c9a..a534dfc3 100644 --- a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py +++ b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py @@ -5,21 +5,20 @@ from sentence_transformers import SentenceTransformer from typing import List, Dict, Any -# --- CONFIGURATION --- -SIMILARITY_THRESHOLD = 0.70 -MAX_MARKET_LIMIT = 40000 +# --- Config --- +SIMILARITY_THRESHOLD = 0.70 # threshold for cosine simlarity +MAX_MARKET_LIMIT = 40000 # max number of active & open markets to gather TOP_K = 5 # number of top Polymarket markets to check for each Kalshi market KALSHI_API_URL = "https://api.elections.kalshi.com/trade-api/v2/markets" POLYMARKET_API_URL = "https://clob.polymarket.com/markets" -OUTPUT_FILE = "equivalent_markets.csv" +OUTPUT_FILE = "markets.csv" -# ---------------------- API FETCH FUNCTIONS ---------------------- +# ---------------------- API Fetch Functions ---------------------- def get_kalshi_markets() -> List[Dict[str, Any]]: print("Fetching Kalshi markets...") markets_list = [] cursor = "" - try: while True: params = {'limit': 1000} @@ -47,7 +46,7 @@ def get_kalshi_markets() -> List[Dict[str, Any]]: }) cursor = data['cursor'] - print(f"{cursor} | {len(markets_list)}") + print(f"Found {len(markets_list)} active and open markets") if len(markets_list) > MAX_MARKET_LIMIT or not cursor: break @@ -65,7 +64,7 @@ def get_kalshi_market(ticker): return title['market']['title'] def get_polymarket_markets() -> List[Dict[str, Any]]: - print("Fetching Polymarket markets (CLOB API)...") + print("Fetching Polymarket markets...") markets_list = [] next_cursor = None @@ -94,7 +93,7 @@ def get_polymarket_markets() -> List[Dict[str, Any]]: }) next_cursor = data.get('next_cursor') - print(f"cursor {next_cursor} | {len(markets_list)}") + print(f"Found {len(markets_list)} active and open markets") if len(markets_list) > MAX_MARKET_LIMIT or not next_cursor or next_cursor == 'LTE=': break @@ -107,10 +106,10 @@ def get_polymarket_markets() -> List[Dict[str, Any]]: return [] -# ---------------------- FAISS-BASED MATCHING ---------------------- +# ---------------------- Matching ---------------------- def find_similar_markets(kalshi_markets, polymarket_markets, threshold=0.9, top_k=TOP_K): - print("\nLoading NLP model (SentenceTransformer)...") + print("\nLoading NLP model...") model = SentenceTransformer('all-MiniLM-L6-v2') kalshi_titles = [m['title'] for m in kalshi_markets] @@ -120,11 +119,11 @@ def find_similar_markets(kalshi_markets, polymarket_markets, threshold=0.9, top_ print("Not enough market data to compare.") return [] - print("Encoding market titles into embeddings...") + print("Encoding titles into embeddings...") kalshi_embeddings = model.encode(kalshi_titles, convert_to_numpy=True, normalize_embeddings=True) poly_embeddings = model.encode(poly_titles, convert_to_numpy=True, normalize_embeddings=True) - print(f"Building FAISS index for {len(poly_embeddings)} Polymarket markets...") + print(f"Building vector index for {len(poly_embeddings)} Polymarket markets...") dim = poly_embeddings.shape[1] index = faiss.IndexFlatIP(dim) # Inner product for cosine similarity index.add(poly_embeddings) @@ -150,7 +149,7 @@ def find_similar_markets(kalshi_markets, polymarket_markets, threshold=0.9, top_ return potential_matches def interactive_save(matches: List[Dict[str, Any]]): - print("\n--- Interactive Review Mode ---") + print("\n--- Review Mode ---") print("Press 'y' to save a match, anything else to skip.\n") file_exists = os.path.exists(OUTPUT_FILE) From 36ae1f18f1fefb5765a0f1b0990b8f6721c4e6a5 Mon Sep 17 00:00:00 2001 From: SeaUrc Date: Fri, 31 Oct 2025 16:23:55 -0400 Subject: [PATCH 06/10] fix: code quality --- .../6-kalshi-bet-predictor/analyst.py | 146 ++++++++++++ .../6-kalshi-bet-predictor/compare.py | 68 +++--- .../find_equiv_markets.py | 26 +-- .../6-kalshi-bet-predictor/main.py | 219 ++++++------------ 4 files changed, 259 insertions(+), 200 deletions(-) create mode 100644 8-application-demos/6-kalshi-bet-predictor/analyst.py diff --git a/8-application-demos/6-kalshi-bet-predictor/analyst.py b/8-application-demos/6-kalshi-bet-predictor/analyst.py new file mode 100644 index 00000000..171610f3 --- /dev/null +++ b/8-application-demos/6-kalshi-bet-predictor/analyst.py @@ -0,0 +1,146 @@ + +from typing import Tuple +from dotenv import load_dotenv +import os +import json +from exa_py import Exa +from openai import OpenAI + +class BetAnalyst: + def __init__(self, model_name: str = "gpt-5-nano"): + + load_dotenv() + + exa_api_key = os.environ.get("EXA_API_KEY") + openai_api_key = os.environ.get("OPENAI_API_KEY") + + if not exa_api_key: + raise EnvironmentError("Missing EXA_API_KEY in environment variables") + if not openai_api_key: + raise EnvironmentError("Missing OPENAI_API_KEY in environment variables") + + self.exa = Exa(exa_api_key) + self.client = OpenAI(api_key=openai_api_key) + self.model_name = model_name + + print(f"Using model: {model_name}") + + def _generate_response(self, prompt: str, response_format: dict | None = None) -> str: + + request_args = { + "model": self.model_name, + "input": prompt, + } + + if response_format is not None: + request_args["response_format"] = response_format + + try: + response = self.client.responses.create(**request_args) + + output = response.output_text.strip() + print(f"Generated this response: {output}") + return output + except Exception as e: + raise RuntimeError(f"Error during API Call: {e}") + + def convert_market_to_resolution(self, rules:str) -> str: + prompt = ( + "A market resolution statement usually describes the conditions under which a market would resolve to \"Yes\" or \"No\". " + "Your task is to extract and rewrite the core factual question that determines whether the market will resolve to \"Yes.\"\n" + "Input format: A sentence describing how a market will resolve.\n" + "Example: \"Will the market resolve to Yes if Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025?\"\n" + "Output format: A clear, grammatically correct factual question that reflects the underlying condition of the resolution, removing any meta-language about the market or its resolution.\n" + "The condition will start with \"if [event], then the market will resolve to \"Yes\" and you must convert it to \"Will [event]?\"\n" + "Examples:\n" + "Input: \"If Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025, then the market resolves to Yes\"\n" + "Output: \"Will Curtis Sliwa drop out of the NYC Mayoral race before Nov 4, 2025?\"\n" + "Input: \"If Donald Trump wins the 2024 U.S. presidential election, then the market resolves to Yes\"\n" + "Output: \"Will Donald Trump win the 2024 U.S. presidential election?\"\n" + "Only output the rewritten factual question with no explanations or commentary.\n" + f"STATEMENT: {rules}" + ) + + raw_response = self._generate_response(prompt) + + return raw_response + + def get_relevant_questions(self, question: str) -> list[str]: + + prompt = ( + "Based on the following question, generate a list of 5 relevant questions " + "that one could search online to gather more information. " + "These questions should yield information that would be helpful to answering " + "the following question in an objective manner.\n\n" + "Your response SHOULD ONLY BE the following lines, in this exact format:\n" + "1. \n" + "2. \n" + "3. \n" + "4. \n" + "5. \n" + "Do not add ANY preamble, conclusion, or extra text.\n\n" + f"Question: \"{question}\"" + ) + + raw_response = self._generate_response(prompt) + + relevant_questions = [] + for line in raw_response.split('\n'): + line = line.strip() + if line and line[0].isdigit(): + clean_question = line.split('.', 1)[-1].strip() + relevant_questions.append(clean_question) + + print(f"Generated relevant questions: {relevant_questions}") + + return relevant_questions + + + def get_web_info(self, questions): + results = [self.exa.answer(q, text=True) for q in questions] + answers = [r.answer for r in results] + return answers + + def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]: + prompt = ( + "Analyze the provided information below to answer the given binary question. " + "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n" + "--- Information ---\n" + f"{information}\n\n" + "--- Question ---\n" + f"{question}\n\n" + "IMPORTANT INSTRUCTIONS:\n" + "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n" + " Yes: %, No: %, Explanation: \n" + "2. Percentages must sum to 100%.\n" + "3. Do NOT include any preamble, summary, or additional text.\n" + "4. Provide a brief but clear explanation supporting your probabilities.\n\n" + ) + + sentiment_schema = { + "type": "object", + "properties": { + "yes_percentage": { + "type": "number", + "description": "Confidence in positive sentiment (0-100)" + }, + "no_percentage": { + "type": "number", + "description": "Confidence in negative sentiment (0-100)" + }, + "explanation": { + "type": "string", + "description": "Brief explanation for the sentiment analysis" + } + }, + "required": ["yes_percentage", "no_percentage", "explanation"] + } + + response = self._generate_response(prompt, sentiment_schema) + + try: + parsed_data = json.loads(response) + return parsed_data.get('yes_percentage'), parsed_data.get('no_percentage'), parsed_data.get('explanation') + except json.JSONDecodeError: + raise RuntimeError(f"Failed to parse output as JSON: {response}") + \ No newline at end of file diff --git a/8-application-demos/6-kalshi-bet-predictor/compare.py b/8-application-demos/6-kalshi-bet-predictor/compare.py index 2facddf5..d0fa5e92 100644 --- a/8-application-demos/6-kalshi-bet-predictor/compare.py +++ b/8-application-demos/6-kalshi-bet-predictor/compare.py @@ -15,11 +15,11 @@ def load_markets(csv_path: str) -> List[Tuple[str, str]]: return markets async def get_market_data(session: aiohttp.ClientSession, kalshi_ticker: str, - polymarket_slug: str, endpoint_url: str) -> Dict: + poly_slug: str, endpoint_url: str) -> Dict: payload = json.dumps({ 'kalshi_ticker': kalshi_ticker, - 'poly_slug': polymarket_slug + 'poly_slug': poly_slug }) headers = { @@ -35,20 +35,20 @@ async def get_market_data(session: aiohttp.ClientSession, kalshi_ticker: str, data = data['result'] kalshi_data = data['kalshi'] - polymarket_data = data['polymarket'] + poly_data = data['polymarket'] return { 'kalshi_ticker': kalshi_ticker, - 'polymarket_slug': polymarket_slug, - 'kalshi_edge': kalshi_data['edge'], - 'polymarket_edge': polymarket_data['edge'], - 'kalshi_buy_yes': kalshi_data['buy_yes'], - 'kalshi_buy_no': kalshi_data['buy_no'], - 'polymarket_buy_yes': polymarket_data['buy_yes'], - 'polymarket_buy_no': polymarket_data['buy_no'], + 'poly_slug': poly_slug, + 'kalshi_edge_value': kalshi_data['edge'], + 'poly_edge_value': poly_data['edge'], + 'kalshi_is_buy_yes': kalshi_data['buy_yes'], + 'kalshi_is_buy_no': kalshi_data['buy_no'], + 'poly_is_buy_yes': poly_data['buy_yes'], + 'poly_is_buy_no': poly_data['buy_no'], } except Exception as e: - print(f"Error fetching data for {kalshi_ticker}/{polymarket_slug}: {e}") + print(f"Error fetching data for {kalshi_ticker}/{poly_slug}: {e}") return None async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]: @@ -57,8 +57,8 @@ async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]: print(f"Fetching data for {len(markets)} markets all at once...") async with aiohttp.ClientSession() as session: - tasks = [get_market_data(session, kalshi_ticker, polymarket_slug, endpoint_url) - for kalshi_ticker, polymarket_slug in markets] + tasks = [get_market_data(session, kalshi_ticker, poly_slug, endpoint_url) + for kalshi_ticker, poly_slug in markets] results = await asyncio.gather(*tasks) @@ -75,17 +75,17 @@ def compute_statistics(results: List[Dict]) -> None: total_markets = len(results) - kalshi_edges = [r['kalshi_edge'] for r in results] - total_kalshi_edge = sum(kalshi_edges) + kalshi_edges_values = [r['kalshi_edge_value'] for r in results] + kalshi_edge_sum = sum(kalshi_edges_values) - polymarket_edges = [r['polymarket_edge'] for r in results] - total_polymarket_edge = sum(polymarket_edges) + poly_edges_values = [r['poly_edge_value'] for r in results] + poly_edge_sum = sum(poly_edges_values) - kalshi_better_count = sum(1 for r in results if r['kalshi_edge'] > r['polymarket_edge']) - polymarket_better_count = sum(1 for r in results if r['polymarket_edge'] > r['kalshi_edge']) - equal_count = total_markets - kalshi_better_count - polymarket_better_count + kalshi_better_count = sum(1 for r in results if r['kalshi_edge_value'] > r['poly_edge_value']) + poly_better_count = sum(1 for r in results if r['poly_edge_value'] > r['kalshi_edge_value']) + equal_count = total_markets - kalshi_better_count - poly_better_count - edge_differences = [abs(r['kalshi_edge'] - r['polymarket_edge']) for r in results] + edge_differences = [abs(r['kalshi_edge_value'] - r['poly_edge_value']) for r in results] avg_edge_difference = sum(edge_differences) / total_markets max_edge_difference = max(edge_differences) @@ -94,27 +94,27 @@ def compute_statistics(results: List[Dict]) -> None: print("COMPARISON") print("-"*80) print(f"Markets with greater Kalshi edge: {kalshi_better_count} ({kalshi_better_count/total_markets*100:.1f}%)") - print(f"Markets with greater Polymarket edge: {polymarket_better_count} ({polymarket_better_count/total_markets*100:.1f}%)") + print(f"Markets with greater Polymarket edge: {poly_better_count} ({poly_better_count/total_markets*100:.1f}%)") print(f"Markets with equal edge: {equal_count} ({equal_count/total_markets*100:.1f}%)") - print(f"\nAverage edge difference: {avg_edge_difference:.4f}") - print(f"Max edge difference: {max_edge_difference:.4f}") + print(f"\nAverage edge difference: {avg_edge_difference:.4f} cents") + print(f"Max edge difference: {max_edge_difference:.4f} cents") print("\n" + "="*80) - if total_kalshi_edge > total_polymarket_edge: - advantage = total_kalshi_edge - total_polymarket_edge - print(f"OVERALL: Kalshi has greater total edge (+{advantage:.4f})") - print(f"OVERALL: Kalshi has an average edge of (+{advantage/total_markets:.4f}) per market") - elif total_polymarket_edge > total_kalshi_edge: - advantage = total_polymarket_edge - total_kalshi_edge - print(f"OVERALL: Polymarket has greater total edge (+{advantage:.4f})") - print(f"OVERALL: Polymarket has an average edge of (+{advantage/total_markets:.4f}) per market") + if kalshi_edge_sum > poly_edge_sum: + advantage = kalshi_edge_sum - poly_edge_sum + print(f"OVERALL: Kalshi has greater total edge (+{advantage:.4f}) cents") + print(f"OVERALL: Kalshi has an average edge of (+{advantage/total_markets:.4f}) cents per market") + elif poly_edge_sum > kalshi_edge_sum: + advantage = poly_edge_sum - kalshi_edge_sum + print(f"OVERALL: Polymarket has greater total edge (+{advantage:.4f}) cents") + print(f"OVERALL: Polymarket has an average edge of (+{advantage/total_markets:.4f}) cents per market") else: print(f"OVERALL: Both platforms have equal total edge") print("="*80) def main(): - CSV_PATH = '' - ENDPOINT_URL = '' + CSV_PATH = "" + ENDPOINT_URL = '' print("Starting async market analysis...") results = asyncio.run(analyze_markets_async(CSV_PATH, ENDPOINT_URL)) diff --git a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py index a534dfc3..e2806174 100644 --- a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py +++ b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py @@ -13,8 +13,6 @@ POLYMARKET_API_URL = "https://clob.polymarket.com/markets" OUTPUT_FILE = "markets.csv" -# ---------------------- API Fetch Functions ---------------------- - def get_kalshi_markets() -> List[Dict[str, Any]]: print("Fetching Kalshi markets...") markets_list = [] @@ -106,14 +104,12 @@ def get_polymarket_markets() -> List[Dict[str, Any]]: return [] -# ---------------------- Matching ---------------------- - -def find_similar_markets(kalshi_markets, polymarket_markets, threshold=0.9, top_k=TOP_K): +def find_similar_markets(kalshi_markets, poly_markets, threshold=0.9, top_k=TOP_K): print("\nLoading NLP model...") model = SentenceTransformer('all-MiniLM-L6-v2') kalshi_titles = [m['title'] for m in kalshi_markets] - poly_titles = [m['title'] for m in polymarket_markets] + poly_titles = [m['title'] for m in poly_markets] if not kalshi_titles or not poly_titles: print("Not enough market data to compare.") @@ -136,11 +132,11 @@ def find_similar_markets(kalshi_markets, polymarket_markets, threshold=0.9, top_ for j in range(top_k): score = float(scores[i][j]) if score >= threshold: - poly_market = polymarket_markets[indices[i][j]] + poly_market = poly_markets[indices[i][j]] potential_matches.append({ 'score': score, 'kalshi_market': kalshi_market, - 'polymarket_market': poly_market + 'poly_market': poly_market }) if i % 100 == 0: print(f"Processed {i}/{len(kalshi_markets)} Kalshi markets...") @@ -156,20 +152,20 @@ def interactive_save(matches: List[Dict[str, Any]]): with open(OUTPUT_FILE, "a", newline='', encoding="utf-8") as csvfile: writer = csv.writer(csvfile) if not file_exists: - writer.writerow(["kalshi_ticker", "polymarket_slug"]) + writer.writerow(["kalshi_ticker", "poly_slug"]) for i, match in enumerate(matches): kalshi_ticker = match['kalshi_market']['ticker'] - poly_slug = match['polymarket_market']['url'].split("event/")[1] + poly_slug = match['poly_market']['url'].split("event/")[1] kalshi_title = get_kalshi_market(kalshi_ticker) - poly_title = match['polymarket_market']['title'] + poly_title = match['poly_market']['title'] score = match['score'] print(f"\nMatch #{i+1} (Score: {score:.4f})") print(f"[KALSHI] {kalshi_title}") print(f"[POLYMARKET] {poly_title}") print(f" > Kalshi URL: {match['kalshi_market']['url']}") - print(f" > Polymarket URL:{match['polymarket_market']['url']}") + print(f" > Polymarket URL:{match['poly_market']['url']}") choice = input("Save this match? (y/n): ").strip().lower() if choice == 'y': @@ -182,13 +178,13 @@ def interactive_save(matches: List[Dict[str, Any]]): def main(): kalshi_markets = get_kalshi_markets() - polymarket_markets = get_polymarket_markets() + poly_markets = get_polymarket_markets() - if not kalshi_markets or not polymarket_markets: + if not kalshi_markets or not poly_markets: print("\nCould not fetch markets from one or both platforms. Exiting.") return - matches = find_similar_markets(kalshi_markets, polymarket_markets, SIMILARITY_THRESHOLD) + matches = find_similar_markets(kalshi_markets, poly_markets, SIMILARITY_THRESHOLD) print(f"\n--- Found {len(matches)} Potential Matches ---") if not matches: diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py index cfa962e9..7fb89bcd 100644 --- a/8-application-demos/6-kalshi-bet-predictor/main.py +++ b/8-application-demos/6-kalshi-bet-predictor/main.py @@ -1,18 +1,16 @@ import json -from typing import Tuple import requests import re -from dotenv import load_dotenv -import os -from exa_py import Exa -from openai import OpenAI +from dataclasses import dataclass +from analyst import BetAnalyst +@dataclass +class MarketData: + question: str + yes_price: str + no_price: str -def getMarket(is_kalshi, ticker): - if is_kalshi: - url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" # market ticker - else: - url = f"https://gamma-api.polymarket.com/markets/slug/{ticker}" # slug +def _fetch_api_data(url: str): try: res = requests.get(url) res.raise_for_status() @@ -20,182 +18,101 @@ def getMarket(is_kalshi, ticker): return obj except requests.exceptions.RequestException as e: raise RuntimeError(f"Error fetching Kalshi market data: {e}") - - -class BetPredictor: - def __init__(self, model_name: str = "gpt-5-nano"): - - load_dotenv() - exa_api_key = os.environ.get("EXA_API_KEY") - openai_api_key = os.environ.get("OPENAI_API_KEY") - - if not exa_api_key: - raise EnvironmentError("Missing EXA_API_KEY in environment variables") - if not openai_api_key: - raise EnvironmentError("Missing OPENAI_API_KEY in environment variables") - - self.exa = Exa(exa_api_key) - self.client = OpenAI(api_key=openai_api_key) - self.model_name = model_name - - print(f"Using model: {model_name}") - - def _generate_response(self, prompt: str) -> str: +def get_kalshi_market(ticker: str) -> MarketData: + url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" + raw_data = _fetch_api_data(url) - response = self.client.responses.create( - model=self.model_name, - input=prompt + try: + market = raw_data['market'] + return MarketData( + question = market['title'], + yes_price=float(market['yes_ask']), + no_price=float(market['no_ask']) ) + except (KeyError, TypeError, ValueError) as e: + raise RuntimeError(f"Error parsing Kalshi data structure: {e}") from e - output = response.output_text.strip() - print(f"Generated this response: {output}") - - return output +def get_polymarket_market(slug: str) -> MarketData: + url = f"https://gamma-api.polymarket.com/markets/slug/{slug}" # slug + raw_data = _fetch_api_data(url) - def convert_rules_to_question(self, rules:str) -> str: - prompt = ( - "A market resolution statement usually describes the conditions under which a market would resolve to \"Yes\" or \"No\". " - "Your task is to extract and rewrite the core factual question that determines whether the market will resolve to \"Yes.\"\n" - "Input format: A sentence describing how a market will resolve.\n" - "Example: \"Will the market resolve to Yes if Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025?\"\n" - "Output format: A clear, grammatically correct factual question that reflects the underlying condition of the resolution, removing any meta-language about the market or its resolution.\n" - "The condition will start with \"if [event], then the market will resolve to \"Yes\" and you must convert it to \"Will [event]?\"\n" - "Examples:\n" - "Input: \"If Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025, then the market resolves to Yes\"\n" - "Output: \"Will Curtis Sliwa drop out of the NYC Mayoral race before Nov 4, 2025?\"\n" - "Input: \"If Donald Trump wins the 2024 U.S. presidential election, then the market resolves to Yes\"\n" - "Output: \"Will Donald Trump win the 2024 U.S. presidential election?\"\n" - "Only output the rewritten factual question with no explanations or commentary.\n" - f"STATEMENT: {rules}" - ) - - raw_response = self._generate_response(prompt) - - return raw_response - - def get_relevant_questions(self, question: str) -> list[str]: + try: + poly_values = json.loads(raw_data['outcomePrices']) + yes_price, no_price = [float(v) for v in poly_values] - prompt = ( - "Based on the following question, generate a list of 5 relevant questions " - "that one could search online to gather more information. " - "These questions should yield information that would be helpful to answering " - "the following question in an objective manner.\n\n" - "Your response SHOULD ONLY BE the following lines, in this exact format:\n" - "1. \n" - "2. \n" - "3. \n" - "4. \n" - "5. \n" - "Do not add ANY preamble, conclusion, or extra text.\n\n" - f"Question: \"{question}\"" + return MarketData( + question = raw_data['question'], + yes_price=yes_price, + no_price=no_price ) - - raw_response = self._generate_response(prompt) - - relevant_questions = [] - for line in raw_response.split('\n'): - line = line.strip() - if line and line[0].isdigit(): - clean_question = line.split('.', 1)[-1].strip() - relevant_questions.append(clean_question) + except (KeyError, TypeError, ValueError) as e: + raise RuntimeError(f"Error parsing Kalshi data structure: {e}") from e - print(f"Generated relevant questions: {relevant_questions}") - return relevant_questions - - - def get_information(self, questions): - results = [self.exa.answer(q, text=True) for q in questions] - answers = [r.answer for r in results] - return answers +def getMarket(is_kalshi, ticker): + if is_kalshi: + url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" # market ticker + else: + url = f"https://gamma-api.polymarket.com/markets/slug/{ticker}" # slug + try: + res = requests.get(url) + res.raise_for_status() + obj = res.json() + return obj + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Error fetching Kalshi market data: {e}") - def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]: - prompt = ( - "Analyze the provided information below to answer the given binary question. " - "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n" - "--- Information ---\n" - f"{information}\n\n" - "--- Question ---\n" - f"{question}\n\n" - "IMPORTANT INSTRUCTIONS:\n" - "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n" - " Yes: %, No: %, Explanation: \n" - "2. Percentages must sum to 100%.\n" - "3. Do NOT include any preamble, summary, or additional text.\n" - "4. Provide a brief but clear explanation supporting your probabilities.\n\n" - ) - response = self._generate_response(prompt) +def evaluate(analyst, question): + # Generate questions using OpenAI API + relevant_questions = analyst.get_relevant_questions(question) + # Use Exa semantic search to retrieve answers to questions + answers = analyst.get_information(relevant_questions) - match = re.search(r"Yes: (.*?), No: (.*?), Explanation: (.*)", response, re.DOTALL) + information = "" + for i, v in enumerate(relevant_questions): + information += f"INFORMATION {i+1}: \n" + information += f"QUESTION {i+1}: {v}\n" + information += f"ANSWER {i+1}: {answers[i]} \n\n" - if match: - yes, no, explanation = match.groups() - return yes.strip(), no.strip(), explanation.strip() - else: - raise ValueError(f"Failed to parse LLM response: {response}") + information.rstrip("\n") - def evaluate(self, question): - relevant_questions = self.get_relevant_questions(question) - answers = self.get_information(relevant_questions) - - information = "" - for i, v in enumerate(relevant_questions): - information += f"INFORMATION {i+1}: \n" - information += f"QUESTION {i+1}: {v}\n" - information += f"ANSWER {i+1}: {answers[i]} \n\n" - - information.rstrip("\n") - - yes, no, explanation = self.get_binary_answer_with_percentage(information, question) - return yes, no, explanation - - -predictor = BetPredictor() + # Passes relevant Q&As to OpenAI API and generates Y/N percentage with explanation + yes, no, explanation = analyst.get_binary_answer_with_percentage(information, question) + return yes, no, explanation def predict(kalshi_ticker, poly_slug): - kalshi_market = getMarket(True, kalshi_ticker) - poly_market = getMarket(False, poly_slug) - question = poly_market['question'] # we use polymarket because they have direct question - - kalshi_real_yes = float(kalshi_market['market']['yes_ask']) - kalshi_real_no = float(kalshi_market['market']['no_ask']) - - poly_values = json.loads(poly_market['outcomePrices']) - poly_real_yes, poly_real_no = [float(v) for v in poly_values] + kalshi_market = get_kalshi_market(kalshi_ticker) + poly_market = get_polymarket_market(poly_slug) + question = poly_market.question # we use polymarket because they have direct question print(f"Question: {question}") - pred_yes, pred_no, explanation = predictor.evaluate(question) + analyst = BetAnalyst() + pred_yes, pred_no, explanation = evaluate(analyst, question) match_yes = re.search(r"(\d+)%", pred_yes) match_no = re.search(r"(\d+)%", pred_no) pred_yes = float(match_yes.group(1)) pred_no = float(match_no.group(1)) - - kalshi_real_yes = float(kalshi_market['market']['yes_ask']) - kalshi_real_no = float(kalshi_market['market']['no_ask']) - poly_values = json.loads(poly_market['outcomePrices']) - poly_real_yes, poly_real_no = [float(v)*100 for v in poly_values] - - kalshi_buy_yes = kalshi_real_yes < pred_yes - kalshi_buy_no = kalshi_real_no < pred_no + kalshi_buy_yes = kalshi_market.yes_price < pred_yes + kalshi_buy_no = kalshi_market.no_price < pred_no - poly_buy_yes = poly_real_yes < pred_yes - poly_buy_no = poly_real_no < pred_no + poly_buy_yes = poly_market.yes_price < pred_yes + poly_buy_no = poly_market.no_price < pred_no return { "kalshi": { "buy_yes":kalshi_buy_yes, "buy_no": kalshi_buy_no, - "edge": max(pred_yes-kalshi_real_yes, pred_no-kalshi_real_no), + "edge": max(pred_yes-kalshi_market.yes_price, pred_no-kalshi_market.no_price), }, "polymarket": { "buy_yes":poly_buy_yes, "buy_no": poly_buy_no, - "edge": max(pred_yes-poly_real_yes, pred_no-poly_real_no), + "edge": max(pred_yes-poly_market.yes_price, pred_no-poly_market.no_price), }, "yes": pred_yes, "no": pred_no, From bbe69f2102cb1689efd723960ed72ef6ae3241d6 Mon Sep 17 00:00:00 2001 From: SeaUrc Date: Fri, 31 Oct 2025 19:35:38 -0400 Subject: [PATCH 07/10] minor fixes --- .../6-kalshi-bet-predictor/analyst.py | 71 +++++++++---------- .../6-kalshi-bet-predictor/compare.py | 4 +- .../6-kalshi-bet-predictor/main.py | 4 +- 3 files changed, 38 insertions(+), 41 deletions(-) diff --git a/8-application-demos/6-kalshi-bet-predictor/analyst.py b/8-application-demos/6-kalshi-bet-predictor/analyst.py index 171610f3..f50bd3a9 100644 --- a/8-application-demos/6-kalshi-bet-predictor/analyst.py +++ b/8-application-demos/6-kalshi-bet-predictor/analyst.py @@ -5,6 +5,7 @@ import json from exa_py import Exa from openai import OpenAI +from pydantic import BaseModel class BetAnalyst: def __init__(self, model_name: str = "gpt-5-nano"): @@ -25,24 +26,34 @@ def __init__(self, model_name: str = "gpt-5-nano"): print(f"Using model: {model_name}") - def _generate_response(self, prompt: str, response_format: dict | None = None) -> str: - - request_args = { - "model": self.model_name, - "input": prompt, - } - - if response_format is not None: - request_args["response_format"] = response_format - + def _generate_response(self, prompt: str, text_format = None): try: - response = self.client.responses.create(**request_args) + response = self.client.responses.create( + model=self.model_name, + input=prompt, + ) + + output_text = response.output_text.strip() + print(f"Generated raw response: {output_text}") + + if text_format is not None: + parsed = self.client.responses.parse( + model=self.model_name, + input=[ + { + "role": "user", + "content": output_text + }, + ], + text_format=text_format, + ) + print(f"Parsed structured response: {parsed.output_parsed}") + return parsed.output_parsed + + return output_text - output = response.output_text.strip() - print(f"Generated this response: {output}") - return output except Exception as e: - raise RuntimeError(f"Error during API Call: {e}") + raise RuntimeError(f"Error during API call: {e}") from e def convert_market_to_resolution(self, rules:str) -> str: prompt = ( @@ -117,30 +128,16 @@ def get_binary_answer_with_percentage(self, information: str, question: str) -> "4. Provide a brief but clear explanation supporting your probabilities.\n\n" ) - sentiment_schema = { - "type": "object", - "properties": { - "yes_percentage": { - "type": "number", - "description": "Confidence in positive sentiment (0-100)" - }, - "no_percentage": { - "type": "number", - "description": "Confidence in negative sentiment (0-100)" - }, - "explanation": { - "type": "string", - "description": "Brief explanation for the sentiment analysis" - } - }, - "required": ["yes_percentage", "no_percentage", "explanation"] - } - - response = self._generate_response(prompt, sentiment_schema) + class Response(BaseModel): + yes_percentage: str + no_percentage: str + explanation: str + + response = self._generate_response(prompt, Response) + print(f"HELLO {response}") try: - parsed_data = json.loads(response) - return parsed_data.get('yes_percentage'), parsed_data.get('no_percentage'), parsed_data.get('explanation') + return response.yes_percentage, response.no_percentage, response.explanation except json.JSONDecodeError: raise RuntimeError(f"Failed to parse output as JSON: {response}") \ No newline at end of file diff --git a/8-application-demos/6-kalshi-bet-predictor/compare.py b/8-application-demos/6-kalshi-bet-predictor/compare.py index d0fa5e92..c8a184ce 100644 --- a/8-application-demos/6-kalshi-bet-predictor/compare.py +++ b/8-application-demos/6-kalshi-bet-predictor/compare.py @@ -109,12 +109,12 @@ def compute_statistics(results: List[Dict]) -> None: print(f"OVERALL: Polymarket has greater total edge (+{advantage:.4f}) cents") print(f"OVERALL: Polymarket has an average edge of (+{advantage/total_markets:.4f}) cents per market") else: - print(f"OVERALL: Both platforms have equal total edge") + print("OVERALL: Both platforms have equal total edge") print("="*80) def main(): CSV_PATH = "" - ENDPOINT_URL = '' + ENDPOINT_URL = "" print("Starting async market analysis...") results = asyncio.run(analyze_markets_async(CSV_PATH, ENDPOINT_URL)) diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py index 7fb89bcd..e32a3b29 100644 --- a/8-application-demos/6-kalshi-bet-predictor/main.py +++ b/8-application-demos/6-kalshi-bet-predictor/main.py @@ -68,7 +68,7 @@ def evaluate(analyst, question): # Generate questions using OpenAI API relevant_questions = analyst.get_relevant_questions(question) # Use Exa semantic search to retrieve answers to questions - answers = analyst.get_information(relevant_questions) + answers = analyst.get_web_info(relevant_questions) information = "" for i, v in enumerate(relevant_questions): @@ -117,4 +117,4 @@ def predict(kalshi_ticker, poly_slug): "yes": pred_yes, "no": pred_no, "explanation": explanation - } \ No newline at end of file + } From a2085e4d432efb0691db0ed4869b9d5d43d1a996 Mon Sep 17 00:00:00 2001 From: SeaUrc Date: Fri, 31 Oct 2025 20:15:26 -0400 Subject: [PATCH 08/10] add readme --- .../6-kalshi-bet-predictor/README.md | 55 +++++++++++++++++++ .../6-kalshi-bet-predictor/cerebrium.toml | 2 +- .../cerebrium_requirements.txt | 22 ++++++++ .../6-kalshi-bet-predictor/requirements.txt | 32 +++++++++++ 4 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 8-application-demos/6-kalshi-bet-predictor/README.md create mode 100644 8-application-demos/6-kalshi-bet-predictor/cerebrium_requirements.txt diff --git a/8-application-demos/6-kalshi-bet-predictor/README.md b/8-application-demos/6-kalshi-bet-predictor/README.md new file mode 100644 index 00000000..80bd5e78 --- /dev/null +++ b/8-application-demos/6-kalshi-bet-predictor/README.md @@ -0,0 +1,55 @@ +# Kalshi Bet Predictor +This repository contains a set of Python scripts designed to find equivalent binary markets across Kalshi and Polymarket, use an LLM via OpenAI and web search via Exa to generate an independent prediction, and then calculate the trading "edge" on both platforms. + +Core Components +--------------- + +The project is structured around three main scripts: + +**`find_equiv_markets.py`**: A utility script to automatically search Kalshi and Polymarket APIs, use a Sentence Transformer and FAISS vector index to find markets with similar titles (i.e., equivalent questions), and save potential matches to a CSV file for manual review. + +**`analyst.py`** and **`main.py`**: These scripts form the core prediction engine. + +* `analyst.py` handles API interactions with OpenAI (for prediction and question generation) and Exa (for web information retrieval). + +* `main.py` fetches the current prices from both Kalshi and Polymarket, runs the prediction via `BetAnalyst`, and calculates the trading edge against the model's prediction. This logic is intended to be hosted on Cerebrium + +* **`compare.py`**: This script reads the market pairs from the CSV, asynchronously calls the hosted prediction endpoint for each pair, and compiles statistics on the trading edge and which platform offers a better opportunity more frequently. + + + +Prerequisites +------------- + +You will need API keys for the following services: + +* **OpenAI**: For the large language model (`BetAnalyst` class). + +* **Exa**: For semantic search/information retrieval (`BetAnalyst` class). + +* **Cerebrium** (or similar hosting platform): To deploy the `main.py` and `analyst.py` logic as a prediction endpoint. + + +Create a `.env` file in your project root to store your keys: + +``` OPENAI_API_KEY="your_openai_key" EXA_API_KEY="your_exa_key" ``` + +Setup and Installation +---------------------- + +### Dependencies + +Install the required Python packages: + +```bash +pip install -r requirements.txt +``` + + +Workflow +-------- + +1. Host the prediction service by deploying `main.py` and `analyst.py` on Cerebrium to expose a `predict` endpoint that runs the `BetAnalyst` logic. +2. Run `find_equiv_markets.py` to identify equivalent Kalshi and Polymarket markets and export the candidate pairs to a CSV file. +3. Execute `compare.py`, which loads the CSV pairs, calls the hosted prediction endpoint for each pair, and aggregates the edge statistics to highlight the most favorable markets. + diff --git a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml index 0e73bf8d..704861f3 100644 --- a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml +++ b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml @@ -7,7 +7,7 @@ include = ['./*', 'main.py', 'cerebrium.toml'] exclude = ['.*'] [cerebrium.dependencies.paths] -pip = "requirements.txt" +pip = "cerebrium_requirements.txt" [cerebrium.hardware] cpu = 4 diff --git a/8-application-demos/6-kalshi-bet-predictor/cerebrium_requirements.txt b/8-application-demos/6-kalshi-bet-predictor/cerebrium_requirements.txt new file mode 100644 index 00000000..dfebee10 --- /dev/null +++ b/8-application-demos/6-kalshi-bet-predictor/cerebrium_requirements.txt @@ -0,0 +1,22 @@ +annotated-types==0.7.0 +anyio==4.11.0 +certifi==2025.10.5 +charset-normalizer==3.4.4 +distro==1.9.0 +dotenv==0.9.9 +exa-py==1.16.1 +h11==0.16.0 +httpcore==1.0.9 +httpx==0.28.1 +idna==3.11 +jiter==0.11.1 +openai==2.6.0 +pydantic==2.12.3 +pydantic_core==2.41.4 +python-dotenv==1.1.1 +requests==2.32.5 +sniffio==1.3.1 +tqdm==4.67.1 +typing-inspection==0.4.2 +typing_extensions==4.15.0 +urllib3==2.5.0 diff --git a/8-application-demos/6-kalshi-bet-predictor/requirements.txt b/8-application-demos/6-kalshi-bet-predictor/requirements.txt index dfebee10..6f68e701 100644 --- a/8-application-demos/6-kalshi-bet-predictor/requirements.txt +++ b/8-application-demos/6-kalshi-bet-predictor/requirements.txt @@ -1,22 +1,54 @@ +aiohappyeyeballs==2.6.1 +aiohttp==3.13.1 +aiosignal==1.4.0 annotated-types==0.7.0 anyio==4.11.0 +attrs==25.4.0 certifi==2025.10.5 charset-normalizer==3.4.4 distro==1.9.0 dotenv==0.9.9 exa-py==1.16.1 +faiss-cpu==1.12.0 +filelock==3.20.0 +frozenlist==1.8.0 +fsspec==2025.9.0 h11==0.16.0 +hf-xet==1.1.10 httpcore==1.0.9 httpx==0.28.1 +huggingface-hub==0.35.3 idna==3.11 +Jinja2==3.1.6 jiter==0.11.1 +joblib==1.5.2 +MarkupSafe==3.0.3 +mpmath==1.3.0 +multidict==6.7.0 +networkx==3.5 +numpy==2.3.4 openai==2.6.0 +packaging==25.0 +pillow==12.0.0 +propcache==0.4.1 pydantic==2.12.3 pydantic_core==2.41.4 python-dotenv==1.1.1 +PyYAML==6.0.3 +regex==2025.10.23 requests==2.32.5 +safetensors==0.6.2 +scikit-learn==1.7.2 +scipy==1.16.2 +sentence-transformers==5.1.2 sniffio==1.3.1 +sympy==1.14.0 +threadpoolctl==3.6.0 +tokenizers==0.22.1 +torch==2.9.0 tqdm==4.67.1 +transformers==4.57.1 typing-inspection==0.4.2 typing_extensions==4.15.0 urllib3==2.5.0 +yarl==1.22.0 From e0e7ba89a65f87db1e30ae83ae4b5b034badcdb2 Mon Sep 17 00:00:00 2001 From: SeaUrc Date: Sat, 1 Nov 2025 14:26:26 -0400 Subject: [PATCH 09/10] remove unecessary function --- .../6-kalshi-bet-predictor/analyst.py | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/8-application-demos/6-kalshi-bet-predictor/analyst.py b/8-application-demos/6-kalshi-bet-predictor/analyst.py index f50bd3a9..2cfaa146 100644 --- a/8-application-demos/6-kalshi-bet-predictor/analyst.py +++ b/8-application-demos/6-kalshi-bet-predictor/analyst.py @@ -1,4 +1,3 @@ - from typing import Tuple from dotenv import load_dotenv import os @@ -54,30 +53,8 @@ def _generate_response(self, prompt: str, text_format = None): except Exception as e: raise RuntimeError(f"Error during API call: {e}") from e - - def convert_market_to_resolution(self, rules:str) -> str: - prompt = ( - "A market resolution statement usually describes the conditions under which a market would resolve to \"Yes\" or \"No\". " - "Your task is to extract and rewrite the core factual question that determines whether the market will resolve to \"Yes.\"\n" - "Input format: A sentence describing how a market will resolve.\n" - "Example: \"Will the market resolve to Yes if Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025?\"\n" - "Output format: A clear, grammatically correct factual question that reflects the underlying condition of the resolution, removing any meta-language about the market or its resolution.\n" - "The condition will start with \"if [event], then the market will resolve to \"Yes\" and you must convert it to \"Will [event]?\"\n" - "Examples:\n" - "Input: \"If Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025, then the market resolves to Yes\"\n" - "Output: \"Will Curtis Sliwa drop out of the NYC Mayoral race before Nov 4, 2025?\"\n" - "Input: \"If Donald Trump wins the 2024 U.S. presidential election, then the market resolves to Yes\"\n" - "Output: \"Will Donald Trump win the 2024 U.S. presidential election?\"\n" - "Only output the rewritten factual question with no explanations or commentary.\n" - f"STATEMENT: {rules}" - ) - - raw_response = self._generate_response(prompt) - - return raw_response def get_relevant_questions(self, question: str) -> list[str]: - prompt = ( "Based on the following question, generate a list of 5 relevant questions " "that one could search online to gather more information. " From 1d9ad7287ff396d1ece27778373016bcafa8c135 Mon Sep 17 00:00:00 2001 From: SeaUrc Date: Wed, 5 Nov 2025 20:57:52 -0500 Subject: [PATCH 10/10] comments + cleanup --- .../6-kalshi-bet-predictor/analyst.py | 9 ++++++++- .../6-kalshi-bet-predictor/compare.py | 8 ++++++-- .../find_equiv_markets.py | 9 ++++++++- .../6-kalshi-bet-predictor/main.py | 20 ++++++------------- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/8-application-demos/6-kalshi-bet-predictor/analyst.py b/8-application-demos/6-kalshi-bet-predictor/analyst.py index 2cfaa146..16131c15 100644 --- a/8-application-demos/6-kalshi-bet-predictor/analyst.py +++ b/8-application-demos/6-kalshi-bet-predictor/analyst.py @@ -8,7 +8,7 @@ class BetAnalyst: def __init__(self, model_name: str = "gpt-5-nano"): - + """Initializes the API clients and loads necessary API keys from environment variables""" load_dotenv() exa_api_key = os.environ.get("EXA_API_KEY") @@ -26,6 +26,7 @@ def __init__(self, model_name: str = "gpt-5-nano"): print(f"Using model: {model_name}") def _generate_response(self, prompt: str, text_format = None): + """Sends a prompt to the OpenAI API and optionally parses the output into a structured format""" try: response = self.client.responses.create( model=self.model_name, @@ -36,6 +37,7 @@ def _generate_response(self, prompt: str, text_format = None): print(f"Generated raw response: {output_text}") if text_format is not None: + # If a Pydantic model (text_format) is provided, re-parse the raw output into that structure. parsed = self.client.responses.parse( model=self.model_name, input=[ @@ -55,6 +57,7 @@ def _generate_response(self, prompt: str, text_format = None): raise RuntimeError(f"Error during API call: {e}") from e def get_relevant_questions(self, question: str) -> list[str]: + """Generates a list of related search queries based on an initial user question""" prompt = ( "Based on the following question, generate a list of 5 relevant questions " "that one could search online to gather more information. " @@ -76,6 +79,7 @@ def get_relevant_questions(self, question: str) -> list[str]: for line in raw_response.split('\n'): line = line.strip() if line and line[0].isdigit(): + # Parse lines like "1. What is..." into "What is..." clean_question = line.split('.', 1)[-1].strip() relevant_questions.append(clean_question) @@ -85,11 +89,13 @@ def get_relevant_questions(self, question: str) -> list[str]: def get_web_info(self, questions): + """Uses the Exa API to find answers for a list of questions.""" results = [self.exa.answer(q, text=True) for q in questions] answers = [r.answer for r in results] return answers def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]: + """Analyzes provided information to return a Yes/No probability and explanation for a given question""" prompt = ( "Analyze the provided information below to answer the given binary question. " "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n" @@ -105,6 +111,7 @@ def get_binary_answer_with_percentage(self, information: str, question: str) -> "4. Provide a brief but clear explanation supporting your probabilities.\n\n" ) + # Define the expected Pydantic structure for the _generate_response 'text_format' parameter class Response(BaseModel): yes_percentage: str no_percentage: str diff --git a/8-application-demos/6-kalshi-bet-predictor/compare.py b/8-application-demos/6-kalshi-bet-predictor/compare.py index c8a184ce..fdb1bde0 100644 --- a/8-application-demos/6-kalshi-bet-predictor/compare.py +++ b/8-application-demos/6-kalshi-bet-predictor/compare.py @@ -5,6 +5,7 @@ import aiohttp def load_markets(csv_path: str) -> List[Tuple[str, str]]: + # Loads market pairs (Kalshi ticker, Polymarket slug) from a CSV file. markets = [] with open(csv_path, 'r') as f: reader = csv.reader(f) @@ -16,7 +17,7 @@ def load_markets(csv_path: str) -> List[Tuple[str, str]]: async def get_market_data(session: aiohttp.ClientSession, kalshi_ticker: str, poly_slug: str, endpoint_url: str) -> Dict: - + # Asynchronously fetches and processes edge data for a single market pair from a specified API endpoint. payload = json.dumps({ 'kalshi_ticker': kalshi_ticker, 'poly_slug': poly_slug @@ -52,6 +53,7 @@ async def get_market_data(session: aiohttp.ClientSession, kalshi_ticker: str, return None async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]: + # Orchestrates the asynchronous fetching of data for all markets listed in the CSV. markets = load_markets(csv_path) print(f"Fetching data for {len(markets)} markets all at once...") @@ -61,10 +63,12 @@ async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]: for kalshi_ticker, poly_slug in markets] results = await asyncio.gather(*tasks) - + + # Filter out None results from failed requests return [r for r in results if r is not None] def compute_statistics(results: List[Dict]) -> None: + # Calculates and prints summary statistics comparing Kalshi and Polymarket edge data. print("\n" + "="*80) print("STATISTICS") print("="*80) diff --git a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py index e2806174..97093b71 100644 --- a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py +++ b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py @@ -14,12 +14,13 @@ OUTPUT_FILE = "markets.csv" def get_kalshi_markets() -> List[Dict[str, Any]]: + # Fetches all active, binary markets from the Kalshi API via pagination print("Fetching Kalshi markets...") markets_list = [] cursor = "" try: while True: - params = {'limit': 1000} + params = {'limit': 1000} # Request 1000 markets per page. if cursor: params['cursor'] = cursor @@ -57,11 +58,13 @@ def get_kalshi_markets() -> List[Dict[str, Any]]: return [] def get_kalshi_market(ticker): + # Retrieves the full title for a single Kalshi market by its ticker title = requests.get(f"{KALSHI_API_URL}/{ticker}") title = title.json() return title['market']['title'] def get_polymarket_markets() -> List[Dict[str, Any]]: + # Fetches all active, non-closed markets from the Polymarket API via pagination print("Fetching Polymarket markets...") markets_list = [] next_cursor = None @@ -94,6 +97,7 @@ def get_polymarket_markets() -> List[Dict[str, Any]]: print(f"Found {len(markets_list)} active and open markets") if len(markets_list) > MAX_MARKET_LIMIT or not next_cursor or next_cursor == 'LTE=': + # 'LTE=' is Polymarket's cursor value indicating the end of pagination. break print(f"Found {len(markets_list)} open markets on Polymarket.") @@ -105,7 +109,9 @@ def get_polymarket_markets() -> List[Dict[str, Any]]: def find_similar_markets(kalshi_markets, poly_markets, threshold=0.9, top_k=TOP_K): + # Uses a sentence transformer and FAISS to find markets with similar titles above a given threshold. print("\nLoading NLP model...") + # 'all-MiniLM-L6-v2' is a small and fast model model = SentenceTransformer('all-MiniLM-L6-v2') kalshi_titles = [m['title'] for m in kalshi_markets] @@ -145,6 +151,7 @@ def find_similar_markets(kalshi_markets, poly_markets, threshold=0.9, top_k=TOP_ return potential_matches def interactive_save(matches: List[Dict[str, Any]]): + # Presents potential matches to the user for manual review and saves confirmed pairs to a CSV file print("\n--- Review Mode ---") print("Press 'y' to save a match, anything else to skip.\n") diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py index e32a3b29..8106e740 100644 --- a/8-application-demos/6-kalshi-bet-predictor/main.py +++ b/8-application-demos/6-kalshi-bet-predictor/main.py @@ -20,6 +20,7 @@ def _fetch_api_data(url: str): raise RuntimeError(f"Error fetching Kalshi market data: {e}") def get_kalshi_market(ticker: str) -> MarketData: + # Fetches and standardizes market data for a specific Kalshi market ticker url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" raw_data = _fetch_api_data(url) @@ -34,6 +35,7 @@ def get_kalshi_market(ticker: str) -> MarketData: raise RuntimeError(f"Error parsing Kalshi data structure: {e}") from e def get_polymarket_market(slug: str) -> MarketData: + # Fetches and standardizes market data for a specific Polymarket market slug url = f"https://gamma-api.polymarket.com/markets/slug/{slug}" # slug raw_data = _fetch_api_data(url) @@ -48,23 +50,11 @@ def get_polymarket_market(slug: str) -> MarketData: ) except (KeyError, TypeError, ValueError) as e: raise RuntimeError(f"Error parsing Kalshi data structure: {e}") from e - - -def getMarket(is_kalshi, ticker): - if is_kalshi: - url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" # market ticker - else: - url = f"https://gamma-api.polymarket.com/markets/slug/{ticker}" # slug - try: - res = requests.get(url) - res.raise_for_status() - obj = res.json() - return obj - except requests.exceptions.RequestException as e: - raise RuntimeError(f"Error fetching Kalshi market data: {e}") def evaluate(analyst, question): + # Uses the BetAnalyst class to research and form an opinion on a market question. + # Generate questions using OpenAI API relevant_questions = analyst.get_relevant_questions(question) # Use Exa semantic search to retrieve answers to questions @@ -83,6 +73,7 @@ def evaluate(analyst, question): return yes, no, explanation def predict(kalshi_ticker, poly_slug): + # Orchestrates the entire process of finding arbitrage opportunities kalshi_market = get_kalshi_market(kalshi_ticker) poly_market = get_polymarket_market(poly_slug) question = poly_market.question # we use polymarket because they have direct question @@ -92,6 +83,7 @@ def predict(kalshi_ticker, poly_slug): analyst = BetAnalyst() pred_yes, pred_no, explanation = evaluate(analyst, question) + # The model returns probabilities as strings (e.g., "80%"), so we parse the number. match_yes = re.search(r"(\d+)%", pred_yes) match_no = re.search(r"(\d+)%", pred_no) pred_yes = float(match_yes.group(1))