From d135abbefe7fab52b5fa16f9326d637172dea3e2 Mon Sep 17 00:00:00 2001
From: SeaUrc <nicksong03@gmail.com>
Date: Mon, 20 Oct 2025 12:39:23 -0400
Subject: [PATCH 01/10] Added kalshi bet predictor example

---
 .../6-kalshi-bet-predictor/.gitignore         |   4 +
 .../6-kalshi-bet-predictor/cerebrium.toml     |  23 +++
 .../6-kalshi-bet-predictor/main.py            | 170 ++++++++++++++++++
 .../6-kalshi-bet-predictor/requirements.txt   |  44 +++++
 4 files changed, 241 insertions(+)
 create mode 100644 8-application-demos/6-kalshi-bet-predictor/.gitignore
 create mode 100644 8-application-demos/6-kalshi-bet-predictor/cerebrium.toml
 create mode 100644 8-application-demos/6-kalshi-bet-predictor/main.py
 create mode 100644 8-application-demos/6-kalshi-bet-predictor/requirements.txt

diff --git a/8-application-demos/6-kalshi-bet-predictor/.gitignore b/8-application-demos/6-kalshi-bet-predictor/.gitignore
new file mode 100644
index 00000000..01d392ed
--- /dev/null
+++ b/8-application-demos/6-kalshi-bet-predictor/.gitignore
@@ -0,0 +1,4 @@
+.DS_Store
+.env
+.venv/
+.vscode/
\ No newline at end of file
diff --git a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml
new file mode 100644
index 00000000..67cd4b9f
--- /dev/null
+++ b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml
@@ -0,0 +1,23 @@
+[cerebrium.deployment]
+name = "kalshi-bet-predictor"
+python_version = "3.11"
+docker_base_image_url = "debian:bookworm-slim"
+disable_auth = true
+include = ['./*', 'main.py', 'cerebrium.toml']
+exclude = ['.*']
+
+[cerebrium.dependencies.paths]
+pip = "requirements.txt"
+
+[cerebrium.hardware]
+cpu = 4
+memory = 36
+compute = "ADA_L40"
+
+[cerebrium.scaling]
+min_replicas = 1
+max_replicas = 2
+cooldown = 30
+replica_concurrency = 1
+scaling_metric = "concurrency_utilization"
+
diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py
new file mode 100644
index 00000000..5904cf9a
--- /dev/null
+++ b/8-application-demos/6-kalshi-bet-predictor/main.py
@@ -0,0 +1,170 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from typing import Tuple
+import requests
+import re
+from dotenv import load_dotenv
+import os
+from exa_py import Exa
+
+def getKalshiQuestion(market_ticker)->Tuple[str,str]:
+    url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{market_ticker}"
+    try:
+        res = requests.get(url)
+        res.raise_for_status()
+        obj = res.json()
+        return obj['market']['rules_primary']
+    except requests.exceptions.RequestException as e:
+        raise RuntimeError(f"Error fetching Kalshi market data: {e}")
+
+def getKalshiOdds(market_ticker)->Tuple[str, str]:
+    url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{market_ticker}"
+    try:
+        res = requests.get(url)
+        res.raise_for_status()
+        obj = res.json()
+        return obj['market']['yes_ask'], obj['market']['no_ask']
+    except requests.exceptions.RequestException as e:
+        raise RuntimeError(f"Error fetching Kalshi market data: {e}")
+
+
+class BetPredictor:
+    def __init__(self, model_name: str = "Qwen/Qwen3-4B-Instruct-2507"):
+
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype = torch.bfloat16,
+            device_map="auto"
+        )
+
+        load_dotenv()
+
+        self.exa = Exa(os.environ.get("EXA_API_KEY"))
+
+        print(f"Loaded model {model_name}!")
+
+    def _generate_response(self, prompt: str, max_new_tokens: int) -> str:
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
+        input_ids_len = inputs['input_ids'].shape[-1]
+
+        output_sequences = self.model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            pad_token_id=self.tokenizer.eos_token_id,
+            do_sample=False,
+        )
+
+        newly_generated_ids = output_sequences[0, input_ids_len:]
+        
+        response = self.tokenizer.decode(newly_generated_ids, skip_special_tokens=True).strip()
+        
+        print(f"Generated this response! {response}")
+        return response
+    
+    def convert_rules_to_question(self, rules:str) -> str:
+        prompt = (
+            "You will receive a sentence that is a statement of the following type:"
+            "If <conditional>, then the market resolves to Yes"
+            "Convert the conditional to a yes/no question"
+            "Your response SHOULD ONLY BE a SINGLE line consisting of the yes/no question:\n"
+            "Do not add ANY preamble, conclusion, or extra text.\n\n"
+            f"STATEMENT: {rules}\n"
+        )
+
+        raw_response = self._generate_response(prompt, max_new_tokens=400)
+
+        return raw_response
+
+    def get_relevant_questions(self, question: str) -> list[str]:
+
+        prompt = (
+            "Based on the following question, generate a list of 5 relevant questions "
+            "that one could search online to gather more information. "
+            "These questions should yield information that would be helpful to answering "
+            "the following question in an objective manner.\n\n"
+            "Your response SHOULD ONLY BE the following lines, in this exact format:\n"
+            "1. <question 1>\n"
+            "2. <question 2>\n"
+            "3. <question 3>\n"
+            "4. <question 4>\n"
+            "5. <question 5>\n"
+            "Do not add ANY preamble, conclusion, or extra text.\n\n"
+            f"Question: \"{question}\"\n"
+        )
+
+        raw_response = self._generate_response(prompt, max_new_tokens=400)
+
+        relevant_questions = []
+        for line in raw_response.split('\n'):
+            line = line.strip()
+            if line and line[0].isdigit():
+                clean_question = line.split('.', 1)[-1].strip()
+                relevant_questions.append(clean_question)
+        
+        return relevant_questions
+
+    
+    def get_information(self, questions):
+        results = [self.exa.answer(q, text=True) for q in questions]
+        answers = [r.answer for r in results]
+        return answers
+
+    def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]:
+        prompt = (
+            "Analyze the provided information below to answer the given binary question. "
+            "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n"
+            "--- Information ---\n"
+            f"{information}\n\n"
+            "--- Question ---\n"
+            f"{question}\n\n"
+            "IMPORTANT INSTRUCTIONS:\n"
+            "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n"
+            "   Yes: <YES PERCENTAGE>%, No: <NO PERCENTAGE>%, Explanation: <EXPLANATION>\n"
+            "2. Percentages must sum to 100%.\n"
+            "3. Do NOT include any preamble, summary, or additional text.\n"
+            "4. Provide a brief but clear explanation supporting your probabilities.\n\n"
+            "AGAIN, Your response MUST ONLY be a single line in THIS EXACT FORMAT: Yes: <YES PERCENTAGE>%, No: <NO PERCENTAGE>%, Explanation: <EXPLANATION>"
+        )
+
+        response = self._generate_response(prompt, max_new_tokens=800)
+
+        match = re.search(r"Yes: (.*?), No: (.*?), Explanation: (.*)", response, re.DOTALL)
+
+        if match:
+            yes, no, explanation = match.groups()
+            return yes.strip(), no.strip(), explanation.strip()
+        else:
+            raise ValueError(f"Failed to parse LLM response: {response}")
+    
+    def predict(self, question):
+        relevant_questions = self.get_relevant_questions(question)
+        answers = self.get_information(relevant_questions)
+
+        information = ""
+        for i, v in enumerate(relevant_questions):
+            information += f"INFORMATION {i+1}: \n"
+            information += f"QUESTION {i+1}: {v}\n"
+            information += f"ANSWER {i+1}: {answers[i]} \n\n"
+        
+        yes, no, explanation = self.get_binary_answer_with_percentage(information, question)
+        return yes, no, explanation
+
+
+predictor = BetPredictor()
+
+def predict(ticker: str):
+    rules = getKalshiQuestion(ticker)
+    question = predictor.convert_rules_to_question(rules)
+    
+    predYes, predNo, explanation = predictor.predict(question)
+
+    realYes, realNo = getKalshiOdds(ticker)
+    
+    if realYes < predYes: # undervalued
+        buyYes = True
+    if realNo < predNo: # undervalued
+        buyNo = True
+
+    return {"buy_yes":buyYes, "buy_no": buyNo, "yes": predYes, "no": predNo, "explanation": explanation}
+
diff --git a/8-application-demos/6-kalshi-bet-predictor/requirements.txt b/8-application-demos/6-kalshi-bet-predictor/requirements.txt
new file mode 100644
index 00000000..328692b6
--- /dev/null
+++ b/8-application-demos/6-kalshi-bet-predictor/requirements.txt
@@ -0,0 +1,44 @@
+accelerate==1.10.1
+annotated-types==0.7.0
+anyio==4.11.0
+certifi==2025.10.5
+charset-normalizer==3.4.4
+distro==1.9.0
+exa-py==1.16.1
+filelock==3.20.0
+fsspec==2025.9.0
+greenlet==3.2.4
+h11==0.16.0
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.3
+idna==3.11
+Jinja2==3.1.6
+jiter==0.11.1
+MarkupSafe==3.0.3
+mpmath==1.3.0
+networkx==3.5
+numpy==2.3.4
+openai==2.5.0
+packaging==25.0
+playwright==1.55.0
+psutil==7.1.0
+pydantic==2.12.3
+pydantic_core==2.41.4
+pyee==13.0.0
+PyPDF2==3.0.1
+python-dotenv==1.1.1
+PyYAML==6.0.3
+regex==2025.9.18
+requests==2.32.5
+safetensors==0.6.2
+sniffio==1.3.1
+sympy==1.14.0
+tokenizers==0.22.1
+torch==2.9.0
+tqdm==4.67.1
+transformers==4.57.1
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.5.0

From 07eb90a2a021c1c2940303663c8456f9250ab16b Mon Sep 17 00:00:00 2001
From: SeaUrc <nicksong03@gmail.com>
Date: Tue, 21 Oct 2025 22:28:47 -0400
Subject: [PATCH 02/10] fix: requirements and kalshi call and real odds
 comparison

---
 .../6-kalshi-bet-predictor/main.py            | 40 ++++++++-----------
 .../6-kalshi-bet-predictor/requirements.txt   | 13 +++---
 2 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py
index 5904cf9a..d49ef642 100644
--- a/8-application-demos/6-kalshi-bet-predictor/main.py
+++ b/8-application-demos/6-kalshi-bet-predictor/main.py
@@ -7,27 +7,17 @@
 import os
 from exa_py import Exa
 
-def getKalshiQuestion(market_ticker)->Tuple[str,str]:
-    url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{market_ticker}"
-    try:
-        res = requests.get(url)
-        res.raise_for_status()
-        obj = res.json()
-        return obj['market']['rules_primary']
-    except requests.exceptions.RequestException as e:
-        raise RuntimeError(f"Error fetching Kalshi market data: {e}")
 
-def getKalshiOdds(market_ticker)->Tuple[str, str]:
+def getKalshiMarket(market_ticker)->Tuple[str,str]:
     url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{market_ticker}"
     try:
         res = requests.get(url)
         res.raise_for_status()
         obj = res.json()
-        return obj['market']['yes_ask'], obj['market']['no_ask']
+        return obj
     except requests.exceptions.RequestException as e:
         raise RuntimeError(f"Error fetching Kalshi market data: {e}")
 
-
 class BetPredictor:
     def __init__(self, model_name: str = "Qwen/Qwen3-4B-Instruct-2507"):
 
@@ -102,8 +92,9 @@ def get_relevant_questions(self, question: str) -> list[str]:
                 clean_question = line.split('.', 1)[-1].strip()
                 relevant_questions.append(clean_question)
         
-        return relevant_questions
+        print(f"Generated relevant questions: {relevant_questions}")
 
+        return relevant_questions
     
     def get_information(self, questions):
         results = [self.exa.answer(q, text=True) for q in questions]
@@ -124,7 +115,7 @@ def get_binary_answer_with_percentage(self, information: str, question: str) ->
             "2. Percentages must sum to 100%.\n"
             "3. Do NOT include any preamble, summary, or additional text.\n"
             "4. Provide a brief but clear explanation supporting your probabilities.\n\n"
-            "AGAIN, Your response MUST ONLY be a single line in THIS EXACT FORMAT: Yes: <YES PERCENTAGE>%, No: <NO PERCENTAGE>%, Explanation: <EXPLANATION>"
+            "Again, your response MUST ONLY be a single line in THIS EXACT FORMAT: Yes: <YES PERCENTAGE>%, No: <NO PERCENTAGE>%, Explanation: <EXPLANATION>"
         )
 
         response = self._generate_response(prompt, max_new_tokens=800)
@@ -154,17 +145,20 @@ def predict(self, question):
 predictor = BetPredictor()
 
 def predict(ticker: str):
-    rules = getKalshiQuestion(ticker)
+    market = getKalshiMarket(ticker)
+    rules = market['market']['rules_primary']
+
     question = predictor.convert_rules_to_question(rules)
     
-    predYes, predNo, explanation = predictor.predict(question)
+    pred_yes, pred_no, explanation = predictor.predict(question)
 
-    realYes, realNo = getKalshiOdds(ticker)
-    
-    if realYes < predYes: # undervalued
-        buyYes = True
-    if realNo < predNo: # undervalued
-        buyNo = True
+    pred_yes = int(pred_yes[:2])
+    pred_no = int(pred_no[:2])
+
+    real_yes = int(market['market']['yes_ask'])
+    real_no = int(market['market']['no_ask'])
 
-    return {"buy_yes":buyYes, "buy_no": buyNo, "yes": predYes, "no": predNo, "explanation": explanation}
+    buy_yes = real_yes < pred_yes
+    buy_no = real_no < pred_no
 
+    return {"buy_yes":buy_yes, "buy_no": buy_no, "yes": pred_yes, "no": pred_no, "explanation": explanation}
diff --git a/8-application-demos/6-kalshi-bet-predictor/requirements.txt b/8-application-demos/6-kalshi-bet-predictor/requirements.txt
index 328692b6..b3d3fbd3 100644
--- a/8-application-demos/6-kalshi-bet-predictor/requirements.txt
+++ b/8-application-demos/6-kalshi-bet-predictor/requirements.txt
@@ -1,13 +1,13 @@
-accelerate==1.10.1
+accelerate==1.11.0
 annotated-types==0.7.0
 anyio==4.11.0
 certifi==2025.10.5
 charset-normalizer==3.4.4
 distro==1.9.0
+dotenv==0.9.9
 exa-py==1.16.1
 filelock==3.20.0
 fsspec==2025.9.0
-greenlet==3.2.4
 h11==0.16.0
 hf-xet==1.1.10
 httpcore==1.0.9
@@ -20,17 +20,14 @@ MarkupSafe==3.0.3
 mpmath==1.3.0
 networkx==3.5
 numpy==2.3.4
-openai==2.5.0
+openai==2.6.0
 packaging==25.0
-playwright==1.55.0
-psutil==7.1.0
+psutil==7.1.1
 pydantic==2.12.3
 pydantic_core==2.41.4
-pyee==13.0.0
-PyPDF2==3.0.1
 python-dotenv==1.1.1
 PyYAML==6.0.3
-regex==2025.9.18
+regex==2025.10.23
 requests==2.32.5
 safetensors==0.6.2
 sniffio==1.3.1

From 669982a1164c923ccf10494110438995628b4b73 Mon Sep 17 00:00:00 2001
From: SeaUrc <nicksong03@gmail.com>
Date: Wed, 22 Oct 2025 23:00:12 -0400
Subject: [PATCH 03/10] add polymarket, switch to API call, create script to
 make parallel calls to cerebrium endpoint

---
 .../6-kalshi-bet-predictor/cerebrium.toml     |   4 +-
 .../6-kalshi-bet-predictor/main.py            | 270 ++++++++----------
 .../6-kalshi-bet-predictor/predictor.py       | 188 ++++++++++++
 .../6-kalshi-bet-predictor/requirements.txt   |  19 --
 4 files changed, 303 insertions(+), 178 deletions(-)
 create mode 100644 8-application-demos/6-kalshi-bet-predictor/predictor.py

diff --git a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml
index 67cd4b9f..7133c08c 100644
--- a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml
+++ b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml
@@ -3,7 +3,7 @@ name = "kalshi-bet-predictor"
 python_version = "3.11"
 docker_base_image_url = "debian:bookworm-slim"
 disable_auth = true
-include = ['./*', 'main.py', 'cerebrium.toml']
+include = ['./*', 'predictor.py', 'cerebrium.toml']
 exclude = ['.*']
 
 [cerebrium.dependencies.paths]
@@ -15,7 +15,7 @@ memory = 36
 compute = "ADA_L40"
 
 [cerebrium.scaling]
-min_replicas = 1
+min_replicas = 0
 max_replicas = 2
 cooldown = 30
 replica_concurrency = 1
diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py
index d49ef642..8a81115c 100644
--- a/8-application-demos/6-kalshi-bet-predictor/main.py
+++ b/8-application-demos/6-kalshi-bet-predictor/main.py
@@ -1,164 +1,120 @@
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from typing import Tuple
+import csv
 import requests
-import re
-from dotenv import load_dotenv
-import os
-from exa_py import Exa
-
-
-def getKalshiMarket(market_ticker)->Tuple[str,str]:
-    url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{market_ticker}"
+from typing import Dict, List, Tuple
+import asyncio
+import aiohttp
+
+def load_markets(csv_path: str) -> List[Tuple[str, str]]:
+    markets = []
+    with open(csv_path, 'r') as f:
+        reader = csv.reader(f)
+        next(reader)  # Skip header if present
+        for row in reader:
+            if len(row) >= 2:
+                markets.append((row[0], row[1]))
+    return markets
+
+async def get_market_data(session: aiohttp.ClientSession, kalshi_id: str, 
+                         polymarket_slug: str, endpoint_url: str) -> Dict:
+    
+    payload = {
+        'kalshi_id': kalshi_id,
+        'polymarket_slug': polymarket_slug
+    }
+    
     try:
-        res = requests.get(url)
-        res.raise_for_status()
-        obj = res.json()
-        return obj
-    except requests.exceptions.RequestException as e:
-        raise RuntimeError(f"Error fetching Kalshi market data: {e}")
-
-class BetPredictor:
-    def __init__(self, model_name: str = "Qwen/Qwen3-4B-Instruct-2507"):
-
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype = torch.bfloat16,
-            device_map="auto"
-        )
-
-        load_dotenv()
-
-        self.exa = Exa(os.environ.get("EXA_API_KEY"))
-
-        print(f"Loaded model {model_name}!")
-
-    def _generate_response(self, prompt: str, max_new_tokens: int) -> str:
-        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
-        input_ids_len = inputs['input_ids'].shape[-1]
-
-        output_sequences = self.model.generate(
-            **inputs,
-            max_new_tokens=max_new_tokens,
-            pad_token_id=self.tokenizer.eos_token_id,
-            do_sample=False,
-        )
-
-        newly_generated_ids = output_sequences[0, input_ids_len:]
-        
-        response = self.tokenizer.decode(newly_generated_ids, skip_special_tokens=True).strip()
-        
-        print(f"Generated this response! {response}")
-        return response
-    
-    def convert_rules_to_question(self, rules:str) -> str:
-        prompt = (
-            "You will receive a sentence that is a statement of the following type:"
-            "If <conditional>, then the market resolves to Yes"
-            "Convert the conditional to a yes/no question"
-            "Your response SHOULD ONLY BE a SINGLE line consisting of the yes/no question:\n"
-            "Do not add ANY preamble, conclusion, or extra text.\n\n"
-            f"STATEMENT: {rules}\n"
-        )
-
-        raw_response = self._generate_response(prompt, max_new_tokens=400)
-
-        return raw_response
-
-    def get_relevant_questions(self, question: str) -> list[str]:
-
-        prompt = (
-            "Based on the following question, generate a list of 5 relevant questions "
-            "that one could search online to gather more information. "
-            "These questions should yield information that would be helpful to answering "
-            "the following question in an objective manner.\n\n"
-            "Your response SHOULD ONLY BE the following lines, in this exact format:\n"
-            "1. <question 1>\n"
-            "2. <question 2>\n"
-            "3. <question 3>\n"
-            "4. <question 4>\n"
-            "5. <question 5>\n"
-            "Do not add ANY preamble, conclusion, or extra text.\n\n"
-            f"Question: \"{question}\"\n"
-        )
-
-        raw_response = self._generate_response(prompt, max_new_tokens=400)
-
-        relevant_questions = []
-        for line in raw_response.split('\n'):
-            line = line.strip()
-            if line and line[0].isdigit():
-                clean_question = line.split('.', 1)[-1].strip()
-                relevant_questions.append(clean_question)
-        
-        print(f"Generated relevant questions: {relevant_questions}")
-
-        return relevant_questions
+        async with session.post(endpoint_url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as response:
+            response.raise_for_status()
+            data = await response.json()
+            
+            kalshi_data = data['Kalshi']
+            polymarket_data = data['Polymarket']
+            
+            return {
+                'kalshi_id': kalshi_id,
+                'polymarket_slug': polymarket_slug,
+                'kalshi_edge': kalshi_data['edge'],
+                'polymarket_edge': polymarket_data['edge'],
+                'kalshi_buy_yes': kalshi_data['buy_yes'],
+                'kalshi_buy_no': kalshi_data['buy_no'],
+                'polymarket_buy_yes': polymarket_data['buy_yes'],
+                'polymarket_buy_no': polymarket_data['buy_no'],
+            }
+    except Exception as e:
+        print(f"Error fetching data for {kalshi_id}/{polymarket_slug}: {e}")
+        return None
+
+async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]:
+    markets = load_markets(csv_path)
     
-    def get_information(self, questions):
-        results = [self.exa.answer(q, text=True) for q in questions]
-        answers = [r.answer for r in results]
-        return answers
-
-    def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]:
-        prompt = (
-            "Analyze the provided information below to answer the given binary question. "
-            "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n"
-            "--- Information ---\n"
-            f"{information}\n\n"
-            "--- Question ---\n"
-            f"{question}\n\n"
-            "IMPORTANT INSTRUCTIONS:\n"
-            "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n"
-            "   Yes: <YES PERCENTAGE>%, No: <NO PERCENTAGE>%, Explanation: <EXPLANATION>\n"
-            "2. Percentages must sum to 100%.\n"
-            "3. Do NOT include any preamble, summary, or additional text.\n"
-            "4. Provide a brief but clear explanation supporting your probabilities.\n\n"
-            "Again, your response MUST ONLY be a single line in THIS EXACT FORMAT: Yes: <YES PERCENTAGE>%, No: <NO PERCENTAGE>%, Explanation: <EXPLANATION>"
-        )
-
-        response = self._generate_response(prompt, max_new_tokens=800)
-
-        match = re.search(r"Yes: (.*?), No: (.*?), Explanation: (.*)", response, re.DOTALL)
-
-        if match:
-            yes, no, explanation = match.groups()
-            return yes.strip(), no.strip(), explanation.strip()
-        else:
-            raise ValueError(f"Failed to parse LLM response: {response}")
-    
-    def predict(self, question):
-        relevant_questions = self.get_relevant_questions(question)
-        answers = self.get_information(relevant_questions)
-
-        information = ""
-        for i, v in enumerate(relevant_questions):
-            information += f"INFORMATION {i+1}: \n"
-            information += f"QUESTION {i+1}: {v}\n"
-            information += f"ANSWER {i+1}: {answers[i]} \n\n"
+    print(f"Fetching data for {len(markets)} markets all at once...")
+    
+    async with aiohttp.ClientSession() as session:
+        tasks = [get_market_data(session, kalshi_id, polymarket_slug, endpoint_url) 
+                for kalshi_id, polymarket_slug in markets]
         
-        yes, no, explanation = self.get_binary_answer_with_percentage(information, question)
-        return yes, no, explanation
-
-
-predictor = BetPredictor()
-
-def predict(ticker: str):
-    market = getKalshiMarket(ticker)
-    rules = market['market']['rules_primary']
-
-    question = predictor.convert_rules_to_question(rules)
+        results = await asyncio.gather(*tasks)
     
-    pred_yes, pred_no, explanation = predictor.predict(question)
-
-    pred_yes = int(pred_yes[:2])
-    pred_no = int(pred_no[:2])
-
-    real_yes = int(market['market']['yes_ask'])
-    real_no = int(market['market']['no_ask'])
+    return [r for r in results if r is not None]
 
-    buy_yes = real_yes < pred_yes
-    buy_no = real_no < pred_no
+def compute_statistics(results: List[Dict]) -> None:
+    print("\n" + "="*80)
+    print("STATISTICS")
+    print("="*80)
+    
+    if not results:
+        print("No results to analyze")
+        return
+    
+    total_markets = len(results)
+    
+    kalshi_edges = [r['kalshi_edge'] for r in results]
+    total_kalshi_edge = sum(kalshi_edges)
+    
+    polymarket_edges = [r['polymarket_edge'] for r in results]
+    total_polymarket_edge = sum(polymarket_edges)
+    
+    kalshi_better_count = sum(1 for r in results if r['kalshi_edge'] > r['polymarket_edge'])
+    polymarket_better_count = sum(1 for r in results if r['polymarket_edge'] > r['kalshi_edge'])
+    equal_count = total_markets - kalshi_better_count - polymarket_better_count
+    
+    edge_differences = [abs(r['kalshi_edge'] - r['polymarket_edge']) for r in results]
+    avg_edge_difference = sum(edge_differences) / total_markets
+    max_edge_difference = max(edge_differences)
+    
+    # Results
+    print(f"\nTotal markets analyzed: {total_markets}")
+    print("\n" + "-"*80)
+    print("COMPARISON")
+    print("-"*80)
+    print(f"Markets with greater Kalshi edge:      {kalshi_better_count} ({kalshi_better_count/total_markets*100:.1f}%)")
+    print(f"Markets with greater Polymarket edge:  {polymarket_better_count} ({polymarket_better_count/total_markets*100:.1f}%)")
+    print(f"Markets with equal edge:               {equal_count} ({equal_count/total_markets*100:.1f}%)")
+    print(f"\nAverage edge difference: {avg_edge_difference:.4f}")
+    print(f"Max edge difference:     {max_edge_difference:.4f}")
+    
+    # Overall winner
+    print("\n" + "="*80)
+    if total_kalshi_edge > total_polymarket_edge:
+        advantage = total_kalshi_edge - total_polymarket_edge
+        print(f"OVERALL: Kalshi has greater total edge (+{advantage:.4f})")
+    elif total_polymarket_edge > total_kalshi_edge:
+        advantage = total_polymarket_edge - total_kalshi_edge
+        print(f"OVERALL: Polymarket has greater total edge (+{advantage:.4f})")
+    else:
+        print(f"OVERALL: Both platforms have equal total edge")
+    print("="*80)
+
+def main():
+    CSV_PATH = 'markets.csv' 
+    ENDPOINT_URL = 'https://{cerebrium}/predict' # Your hosted endpoint
+    
+    print("Starting async market analysis...")
+    results = asyncio.run(analyze_markets_async(CSV_PATH, ENDPOINT_URL))
+    
+    print(f"\nSuccessfully fetched {len(results)} markets")
+    
+    compute_statistics(results)
 
-    return {"buy_yes":buy_yes, "buy_no": buy_no, "yes": pred_yes, "no": pred_no, "explanation": explanation}
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/8-application-demos/6-kalshi-bet-predictor/predictor.py b/8-application-demos/6-kalshi-bet-predictor/predictor.py
new file mode 100644
index 00000000..f1d9a4c0
--- /dev/null
+++ b/8-application-demos/6-kalshi-bet-predictor/predictor.py
@@ -0,0 +1,188 @@
+from typing import Tuple
+import requests
+import re
+from dotenv import load_dotenv
+import os
+from exa_py import Exa
+from openai import OpenAI
+
+
+def getMarket(is_kalshi, ticker):
+    if is_kalshi: 
+        url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" # market ticker
+    else:
+        url = f"https://gamma-api.polymarket.com/markets/slug/{ticker}" # slug
+    try:
+        res = requests.get(url)
+        res.raise_for_status()
+        obj = res.json()
+        return obj
+    except requests.exceptions.RequestException as e:
+        raise RuntimeError(f"Error fetching Kalshi market data: {e}")
+
+
+class BetPredictor:
+    def __init__(self, model_name: str = "gpt-5-nano"):
+
+
+        load_dotenv()
+        exa_api_key = os.environ.get("EXA_API_KEY")
+        openai_api_key = os.environ.get("OPENAI_API_KEY")
+
+        if not exa_api_key:
+            raise EnvironmentError("Missing EXA_API_KEY in environment variables")
+        if not openai_api_key: 
+            raise EnvironmentError("Missing OPENAI_API_KEY in environment variables")
+
+        self.exa = Exa(exa_api_key)
+        self.client = OpenAI(api_key=openai_api_key)
+        self.model_name = model_name
+
+        print(f"Using model: {model_name}")
+
+    def _generate_response(self, prompt: str) -> str:
+
+        response = self.client.responses.create(
+            model=self.model_name,
+            input=prompt
+        )
+
+        output = response.output_text.strip()
+        print(f"Generated this response: {output}")
+
+        return output
+    
+    def convert_rules_to_question(self, rules:str) -> str:
+        prompt = (
+            "A market resolution statement usually describes the conditions under which a market would resolve to \"Yes\" or \"No\". "
+            "Your task is to extract and rewrite the core factual question that determines whether the market will resolve to \"Yes.\"\n"
+            "Input format: A sentence describing how a market will resolve.\n"
+            "Example: \"Will the market resolve to Yes if Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025?\"\n"
+            "Output format: A clear, grammatically correct factual question that reflects the underlying condition of the resolution, removing any meta-language about the market or its resolution.\n"
+            "The condition will start with \"if [event], then the market will resolve to \"Yes\" and you must convert it to \"Will [event]?\"\n"
+            "Examples:\n"
+            "Input: \"If Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025, then the market resolves to Yes\"\n"
+            "Output: \"Will Curtis Sliwa drop out of the NYC Mayoral race before Nov 4, 2025?\"\n"
+            "Input: \"If Donald Trump wins the 2024 U.S. presidential election, then the market resolves to Yes\"\n"
+            "Output: \"Will Donald Trump win the 2024 U.S. presidential election?\"\n"
+            "Only output the rewritten factual question with no explanations or commentary.\n"
+            f"STATEMENT: {rules}"
+        )
+
+        raw_response = self._generate_response(prompt)
+
+        return raw_response
+
+    def get_relevant_questions(self, question: str) -> list[str]:
+
+        prompt = (
+            "Based on the following question, generate a list of 5 relevant questions "
+            "that one could search online to gather more information. "
+            "These questions should yield information that would be helpful to answering "
+            "the following question in an objective manner.\n\n"
+            "Your response SHOULD ONLY BE the following lines, in this exact format:\n"
+            "1. <question 1>\n"
+            "2. <question 2>\n"
+            "3. <question 3>\n"
+            "4. <question 4>\n"
+            "5. <question 5>\n"
+            "Do not add ANY preamble, conclusion, or extra text.\n\n"
+            f"Question: \"{question}\""
+        )
+
+        raw_response = self._generate_response(prompt)
+
+        relevant_questions = []
+        for line in raw_response.split('\n'):
+            line = line.strip()
+            if line and line[0].isdigit():
+                clean_question = line.split('.', 1)[-1].strip()
+                relevant_questions.append(clean_question)
+        
+        print(f"Generated relevant questions: {relevant_questions}")
+
+        return relevant_questions
+
+    
+    def get_information(self, questions):
+        results = [self.exa.answer(q, text=True) for q in questions]
+        answers = [r.answer for r in results]
+        return answers
+
+    def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]:
+        prompt = (
+            "Analyze the provided information below to answer the given binary question. "
+            "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n"
+            "--- Information ---\n"
+            f"{information}\n\n"
+            "--- Question ---\n"
+            f"{question}\n\n"
+            "IMPORTANT INSTRUCTIONS:\n"
+            "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n"
+            "   Yes: <YES PERCENTAGE>%, No: <NO PERCENTAGE>%, Explanation: <EXPLANATION>\n"
+            "2. Percentages must sum to 100%.\n"
+            "3. Do NOT include any preamble, summary, or additional text.\n"
+            "4. Provide a brief but clear explanation supporting your probabilities.\n\n"
+        )
+
+        response = self._generate_response(prompt)
+
+        match = re.search(r"Yes: (.*?), No: (.*?), Explanation: (.*)", response, re.DOTALL)
+
+        if match:
+            yes, no, explanation = match.groups()
+            return yes.strip(), no.strip(), explanation.strip()
+        else:
+            raise ValueError(f"Failed to parse LLM response: {response}")
+    
+    def predict(self, question):
+        relevant_questions = self.get_relevant_questions(question)
+        answers = self.get_information(relevant_questions)
+
+        information = ""
+        for i, v in enumerate(relevant_questions):
+            information += f"INFORMATION {i+1}: \n"
+            information += f"QUESTION {i+1}: {v}\n"
+            information += f"ANSWER {i+1}: {answers[i]} \n\n"
+
+        information.rstrip("\n")
+        
+        yes, no, explanation = self.get_binary_answer_with_percentage(information, question)
+        return yes, no, explanation
+
+
+predictor = BetPredictor()
+
+def predictMarket(isKalshi: bool, ticker: str):
+
+    if isKalshi:
+        market = getMarket(isKalshi, ticker)
+        rules = market['market']['rules_primary']
+        print(f"Rules: {rules}")
+        question = predictor.convert_rules_to_question(rules)
+    else:
+        market = getMarket(isKalshi, ticker)
+        question = market['question']
+
+    print(f"Question: {question}") 
+    
+    pred_yes, pred_no, explanation = predictor.predict(question)
+
+    match_yes = re.search(r"(\d+)%", pred_yes)
+    match_no = re.search(r"(\d+)%", pred_no)
+    pred_yes = int(match_yes.group(1))
+    pred_no = int(match_no.group(1))
+
+    real_yes = int(market['market']['yes_ask'])
+    real_no = int(market['market']['no_ask'])
+
+    buy_yes = real_yes < pred_yes
+    buy_no = real_no < pred_no
+
+    return {"buy_yes":buy_yes, "buy_no": buy_no, "edge": max(pred_yes-real_yes, pred_no-real_no), "yes": pred_yes, "no": pred_no, "explanation": explanation}
+
+def predict(kalshi_ticker, poly_slug):
+    kalshi_res = predictMarket(True, kalshi_ticker)
+    poly_res = predictMarket(False, poly_slug)
+
+    return {"Kalshi": kalshi_res, "Polymarket": poly_res}
\ No newline at end of file
diff --git a/8-application-demos/6-kalshi-bet-predictor/requirements.txt b/8-application-demos/6-kalshi-bet-predictor/requirements.txt
index b3d3fbd3..dfebee10 100644
--- a/8-application-demos/6-kalshi-bet-predictor/requirements.txt
+++ b/8-application-demos/6-kalshi-bet-predictor/requirements.txt
@@ -1,4 +1,3 @@
-accelerate==1.11.0
 annotated-types==0.7.0
 anyio==4.11.0
 certifi==2025.10.5
@@ -6,36 +5,18 @@ charset-normalizer==3.4.4
 distro==1.9.0
 dotenv==0.9.9
 exa-py==1.16.1
-filelock==3.20.0
-fsspec==2025.9.0
 h11==0.16.0
-hf-xet==1.1.10
 httpcore==1.0.9
 httpx==0.28.1
-huggingface-hub==0.35.3
 idna==3.11
-Jinja2==3.1.6
 jiter==0.11.1
-MarkupSafe==3.0.3
-mpmath==1.3.0
-networkx==3.5
-numpy==2.3.4
 openai==2.6.0
-packaging==25.0
-psutil==7.1.1
 pydantic==2.12.3
 pydantic_core==2.41.4
 python-dotenv==1.1.1
-PyYAML==6.0.3
-regex==2025.10.23
 requests==2.32.5
-safetensors==0.6.2
 sniffio==1.3.1
-sympy==1.14.0
-tokenizers==0.22.1
-torch==2.9.0
 tqdm==4.67.1
-transformers==4.57.1
 typing-inspection==0.4.2
 typing_extensions==4.15.0
 urllib3==2.5.0

From e308d8c8dbc44031c8138f9501874929e63802a9 Mon Sep 17 00:00:00 2001
From: SeaUrc <nicksong03@gmail.com>
Date: Fri, 24 Oct 2025 15:28:04 -0400
Subject: [PATCH 04/10] find equiv markets

---
 .../6-kalshi-bet-predictor/cerebrium.toml     |   8 +-
 .../6-kalshi-bet-predictor/compare.py         | 126 ++++++++
 .../find_equiv_markets.py                     | 203 ++++++++++++
 .../6-kalshi-bet-predictor/main.py            | 303 +++++++++++-------
 .../6-kalshi-bet-predictor/predictor.py       | 188 -----------
 5 files changed, 526 insertions(+), 302 deletions(-)
 create mode 100644 8-application-demos/6-kalshi-bet-predictor/compare.py
 create mode 100644 8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py
 delete mode 100644 8-application-demos/6-kalshi-bet-predictor/predictor.py

diff --git a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml
index 7133c08c..0e73bf8d 100644
--- a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml
+++ b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml
@@ -3,7 +3,7 @@ name = "kalshi-bet-predictor"
 python_version = "3.11"
 docker_base_image_url = "debian:bookworm-slim"
 disable_auth = true
-include = ['./*', 'predictor.py', 'cerebrium.toml']
+include = ['./*', 'main.py', 'cerebrium.toml']
 exclude = ['.*']
 
 [cerebrium.dependencies.paths]
@@ -11,12 +11,12 @@ pip = "requirements.txt"
 
 [cerebrium.hardware]
 cpu = 4
-memory = 36
-compute = "ADA_L40"
+memory = 16
+compute = "CPU"
 
 [cerebrium.scaling]
 min_replicas = 0
-max_replicas = 2
+max_replicas = 100
 cooldown = 30
 replica_concurrency = 1
 scaling_metric = "concurrency_utilization"
diff --git a/8-application-demos/6-kalshi-bet-predictor/compare.py b/8-application-demos/6-kalshi-bet-predictor/compare.py
new file mode 100644
index 00000000..813dfc91
--- /dev/null
+++ b/8-application-demos/6-kalshi-bet-predictor/compare.py
@@ -0,0 +1,126 @@
+import csv
+import json
+from typing import Dict, List, Tuple
+import asyncio
+import aiohttp
+
+def load_markets(csv_path: str) -> List[Tuple[str, str]]:
+    markets = []
+    with open(csv_path, 'r') as f:
+        reader = csv.reader(f)
+        for row in reader:
+            if len(row) >= 2:
+                markets.append((row[0], row[1]))
+    return markets
+
+async def get_market_data(session: aiohttp.ClientSession, kalshi_ticker: str, 
+                         polymarket_slug: str, endpoint_url: str) -> Dict:
+    
+    payload = json.dumps({
+        'kalshi_ticker': kalshi_ticker,
+        'poly_slug': polymarket_slug
+    })
+    
+    headers = {
+        'Authorization': '<YOUR AUTHORIZATION>',
+        'Content-Type': 'application/json'
+    }
+    
+    try:
+        async with session.post(endpoint_url, headers=headers, data=payload) as response:
+            response.raise_for_status()
+            data = await response.json()
+            print(data)
+            data = data['result']
+            
+            kalshi_data = data['kalshi']
+            polymarket_data = data['polymarket']
+            
+            return {
+                'kalshi_ticker': kalshi_ticker,
+                'polymarket_slug': polymarket_slug,
+                'kalshi_edge': kalshi_data['edge'],
+                'polymarket_edge': polymarket_data['edge'],
+                'kalshi_buy_yes': kalshi_data['buy_yes'],
+                'kalshi_buy_no': kalshi_data['buy_no'],
+                'polymarket_buy_yes': polymarket_data['buy_yes'],
+                'polymarket_buy_no': polymarket_data['buy_no'],
+            }
+    except Exception as e:
+        print(f"Error fetching data for {kalshi_ticker}/{polymarket_slug}: {e}")
+        return None
+
+async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]:
+    markets = load_markets(csv_path)
+    
+    print(f"Fetching data for {len(markets)} markets all at once...")
+    
+    async with aiohttp.ClientSession() as session:
+        tasks = [get_market_data(session, kalshi_ticker, polymarket_slug, endpoint_url) 
+                for kalshi_ticker, polymarket_slug in markets]
+        
+        results = await asyncio.gather(*tasks)
+    
+    return [r for r in results if r is not None]
+
+def compute_statistics(results: List[Dict]) -> None:
+    print("\n" + "="*80)
+    print("STATISTICS")
+    print("="*80)
+    
+    if not results:
+        print("No results to analyze")
+        return
+    
+    total_markets = len(results)
+    
+    kalshi_edges = [r['kalshi_edge'] for r in results]
+    total_kalshi_edge = sum(kalshi_edges)
+    
+    polymarket_edges = [r['polymarket_edge'] for r in results]
+    total_polymarket_edge = sum(polymarket_edges)
+    
+    kalshi_better_count = sum(1 for r in results if r['kalshi_edge'] > r['polymarket_edge'])
+    polymarket_better_count = sum(1 for r in results if r['polymarket_edge'] > r['kalshi_edge'])
+    equal_count = total_markets - kalshi_better_count - polymarket_better_count
+    
+    edge_differences = [abs(r['kalshi_edge'] - r['polymarket_edge']) for r in results]
+    avg_edge_difference = sum(edge_differences) / total_markets
+    max_edge_difference = max(edge_differences)
+    
+    print(f"\nTotal markets analyzed: {total_markets}")
+    print("\n" + "-"*80)
+    print("COMPARISON")
+    print("-"*80)
+    print(f"Markets with greater Kalshi edge:      {kalshi_better_count} ({kalshi_better_count/total_markets*100:.1f}%)")
+    print(f"Markets with greater Polymarket edge:  {polymarket_better_count} ({polymarket_better_count/total_markets*100:.1f}%)")
+    print(f"Markets with equal edge:               {equal_count} ({equal_count/total_markets*100:.1f}%)")
+    print(f"\nAverage edge difference: {avg_edge_difference:.4f}")
+    print(f"Max edge difference:     {max_edge_difference:.4f}")
+    
+    print("\n" + "="*80)
+    if total_kalshi_edge > total_polymarket_edge:
+        advantage = total_kalshi_edge - total_polymarket_edge
+        print(f"OVERALL: Kalshi has greater total edge (+{advantage:.4f})")
+        print(f"OVERALL: Kalshi has an average edge of (+{advantage/total_markets:.4f}) per market")
+    elif total_polymarket_edge > total_kalshi_edge:
+        advantage = total_polymarket_edge - total_kalshi_edge
+        print(f"OVERALL: Polymarket has greater total edge (+{advantage:.4f})")
+        print(f"OVERALL: Polymarket has an average edge of (+{advantage/total_markets:.4f}) per market")
+    else:
+        print(f"OVERALL: Both platforms have equal total edge")
+    print("="*80)
+
+def main():
+    CSV_PATH = '<path to your .csv file>' 
+    ENDPOINT_URL = '<your hosted endpoint>'
+    
+    print("Starting async market analysis...")
+    results = asyncio.run(analyze_markets_async(CSV_PATH, ENDPOINT_URL))
+    
+    print(f"\nSuccessfully fetched {len(results)} markets")
+    
+    compute_statistics(results)
+
+if __name__ == "__main__":    
+    main()
\ No newline at end of file
diff --git a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py
new file mode 100644
index 00000000..e5ed8c9a
--- /dev/null
+++ b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py
@@ -0,0 +1,203 @@
+import csv
+import os
+import requests
+import faiss
+from sentence_transformers import SentenceTransformer
+from typing import List, Dict, Any
+
+# --- CONFIGURATION ---
+SIMILARITY_THRESHOLD = 0.70
+MAX_MARKET_LIMIT = 40000
+TOP_K = 5  # number of top Polymarket markets to check for each Kalshi market
+KALSHI_API_URL = "https://api.elections.kalshi.com/trade-api/v2/markets"
+POLYMARKET_API_URL = "https://clob.polymarket.com/markets"
+OUTPUT_FILE = "equivalent_markets.csv"
+
+# ---------------------- API FETCH FUNCTIONS ----------------------
+
+def get_kalshi_markets() -> List[Dict[str, Any]]:
+    print("Fetching Kalshi markets...")
+    markets_list = []
+    cursor = ""
+
+    try:
+        while True:
+            params = {'limit': 1000}
+            if cursor:
+                params['cursor'] = cursor
+
+            response = requests.get(KALSHI_API_URL, params=params)
+            response.raise_for_status()
+            data = response.json()
+
+            if 'markets' not in data:
+                print("Error: 'markets' key not in Kalshi response.")
+                break
+
+            for market in data['markets']:
+                if market['status'] == 'active' and market['market_type'] == 'binary':
+
+                    markets_list.append({
+                        'platform': 'Kalshi',
+                        'title': market['title'],
+                        'ticker': market['ticker'],
+                        'url': f"https://kalshi.com/markets/{market['ticker']}",
+                        'event_url': f"https://kalshi.com/markets/{market['event_ticker']}",
+                        'close_date': market['close_time']
+                    })
+
+            cursor = data['cursor']
+            print(f"{cursor} | {len(markets_list)}")
+
+            if len(markets_list) > MAX_MARKET_LIMIT or not cursor:
+                break
+
+        print(f"Found {len(markets_list)} open binary markets on Kalshi.")
+        return markets_list
+
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching Kalshi markets: {e}")
+        return []
+    
+def get_kalshi_market(ticker):
+    title = requests.get(f"{KALSHI_API_URL}/{ticker}")
+    title = title.json()
+    return title['market']['title']
+
+def get_polymarket_markets() -> List[Dict[str, Any]]:
+    print("Fetching Polymarket markets (CLOB API)...")
+    markets_list = []
+    next_cursor = None
+
+    try:
+        while True:
+            params = {}
+            if next_cursor:
+                params['next_cursor'] = next_cursor
+
+            response = requests.get(POLYMARKET_API_URL, params=params)
+            response.raise_for_status()
+            data = response.json()
+
+            market_list_page = data['data']
+            if not market_list_page:
+                break
+
+            for market in market_list_page:
+                if market.get('active') and not market.get('closed'):
+                    markets_list.append({
+                        'platform': 'Polymarket',
+                        'title': market.get('question'),
+                        'id': market.get('condition_id'),
+                        'url': f"https://polymarket.com/event/{market.get('market_slug')}",
+                        'close_date': market.get('end_date_iso')
+                    })
+
+            next_cursor = data.get('next_cursor')
+            print(f"cursor {next_cursor} | {len(markets_list)}")
+
+            if len(markets_list) > MAX_MARKET_LIMIT or not next_cursor or next_cursor == 'LTE=':
+                break
+
+        print(f"Found {len(markets_list)} open markets on Polymarket.")
+        return markets_list
+
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching Polymarket markets: {e}")
+        return []
+
+
+# ---------------------- FAISS-BASED MATCHING ----------------------
+
+def find_similar_markets(kalshi_markets, polymarket_markets, threshold=0.9, top_k=TOP_K):
+    print("\nLoading NLP model (SentenceTransformer)...")
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+
+    kalshi_titles = [m['title'] for m in kalshi_markets]
+    poly_titles = [m['title'] for m in polymarket_markets]
+
+    if not kalshi_titles or not poly_titles:
+        print("Not enough market data to compare.")
+        return []
+
+    print("Encoding market titles into embeddings...")
+    kalshi_embeddings = model.encode(kalshi_titles, convert_to_numpy=True, normalize_embeddings=True)
+    poly_embeddings = model.encode(poly_titles, convert_to_numpy=True, normalize_embeddings=True)
+
+    print(f"Building FAISS index for {len(poly_embeddings)} Polymarket markets...")
+    dim = poly_embeddings.shape[1]
+    index = faiss.IndexFlatIP(dim)  # Inner product for cosine similarity
+    index.add(poly_embeddings)
+
+    print(f"Querying top {top_k} nearest Polymarket markets for each Kalshi market...")
+    scores, indices = index.search(kalshi_embeddings, top_k)
+
+    potential_matches = []
+    for i, kalshi_market in enumerate(kalshi_markets):
+        for j in range(top_k):
+            score = float(scores[i][j])
+            if score >= threshold:
+                poly_market = polymarket_markets[indices[i][j]]
+                potential_matches.append({
+                    'score': score,
+                    'kalshi_market': kalshi_market,
+                    'polymarket_market': poly_market
+                })
+        if i % 100 == 0:
+            print(f"Processed {i}/{len(kalshi_markets)} Kalshi markets...")
+
+
+    return potential_matches
+    
+def interactive_save(matches: List[Dict[str, Any]]):
+    print("\n--- Interactive Review Mode ---")
+    print("Press 'y' to save a match, anything else to skip.\n")
+    
+    file_exists = os.path.exists(OUTPUT_FILE)
+    with open(OUTPUT_FILE, "a", newline='', encoding="utf-8") as csvfile:
+        writer = csv.writer(csvfile)
+        if not file_exists:
+            writer.writerow(["kalshi_ticker", "polymarket_slug"])
+
+        for i, match in enumerate(matches):
+            kalshi_ticker = match['kalshi_market']['ticker']
+            poly_slug = match['polymarket_market']['url'].split("event/")[1]
+            kalshi_title = get_kalshi_market(kalshi_ticker)
+            poly_title = match['polymarket_market']['title']
+            score = match['score']
+
+            print(f"\nMatch #{i+1} (Score: {score:.4f})")
+            print(f"[KALSHI]     {kalshi_title}")
+            print(f"[POLYMARKET] {poly_title}")
+            print(f"  > Kalshi URL:    {match['kalshi_market']['url']}")
+            print(f"  > Polymarket URL:{match['polymarket_market']['url']}")
+
+            choice = input("Save this match? (y/n): ").strip().lower()
+            if choice == 'y':
+                writer.writerow([kalshi_ticker, poly_slug])
+                print("Saved.")
+            else:
+                print("Skipped.")
+
+    print(f"\nDone. Saved matches to '{OUTPUT_FILE}'.")
+
+def main():
+    kalshi_markets = get_kalshi_markets()
+    polymarket_markets = get_polymarket_markets()
+
+    if not kalshi_markets or not polymarket_markets:
+        print("\nCould not fetch markets from one or both platforms. Exiting.")
+        return
+    
+    matches = find_similar_markets(kalshi_markets, polymarket_markets, SIMILARITY_THRESHOLD)
+    print(f"\n--- Found {len(matches)} Potential Matches ---")
+    
+    if not matches:
+        print("No strong matches found.")
+        return
+
+    matches.sort(key=lambda x: x['score'], reverse=True)
+    interactive_save(matches)
+
+if __name__ == "__main__":
+    main()
diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py
index 8a81115c..cfa962e9 100644
--- a/8-application-demos/6-kalshi-bet-predictor/main.py
+++ b/8-application-demos/6-kalshi-bet-predictor/main.py
@@ -1,120 +1,203 @@
-import csv
+import json
+from typing import Tuple
 import requests
-from typing import Dict, List, Tuple
-import asyncio
-import aiohttp
-
-def load_markets(csv_path: str) -> List[Tuple[str, str]]:
-    markets = []
-    with open(csv_path, 'r') as f:
-        reader = csv.reader(f)
-        next(reader)  # Skip header if present
-        for row in reader:
-            if len(row) >= 2:
-                markets.append((row[0], row[1]))
-    return markets
-
-async def get_market_data(session: aiohttp.ClientSession, kalshi_id: str, 
-                         polymarket_slug: str, endpoint_url: str) -> Dict:
-    
-    payload = {
-        'kalshi_id': kalshi_id,
-        'polymarket_slug': polymarket_slug
-    }
-    
+import re
+from dotenv import load_dotenv
+import os
+from exa_py import Exa
+from openai import OpenAI
+
+
+def getMarket(is_kalshi, ticker):
+    if is_kalshi: 
+        url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" # market ticker
+    else:
+        url = f"https://gamma-api.polymarket.com/markets/slug/{ticker}" # slug
     try:
-        async with session.post(endpoint_url, json=payload, timeout=aiohttp.ClientTimeout(total=10)) as response:
-            response.raise_for_status()
-            data = await response.json()
-            
-            kalshi_data = data['Kalshi']
-            polymarket_data = data['Polymarket']
-            
-            return {
-                'kalshi_id': kalshi_id,
-                'polymarket_slug': polymarket_slug,
-                'kalshi_edge': kalshi_data['edge'],
-                'polymarket_edge': polymarket_data['edge'],
-                'kalshi_buy_yes': kalshi_data['buy_yes'],
-                'kalshi_buy_no': kalshi_data['buy_no'],
-                'polymarket_buy_yes': polymarket_data['buy_yes'],
-                'polymarket_buy_no': polymarket_data['buy_no'],
-            }
-    except Exception as e:
-        print(f"Error fetching data for {kalshi_id}/{polymarket_slug}: {e}")
-        return None
-
-async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]:
-    markets = load_markets(csv_path)
-    
-    print(f"Fetching data for {len(markets)} markets all at once...")
-    
-    async with aiohttp.ClientSession() as session:
-        tasks = [get_market_data(session, kalshi_id, polymarket_slug, endpoint_url) 
-                for kalshi_id, polymarket_slug in markets]
+        res = requests.get(url)
+        res.raise_for_status()
+        obj = res.json()
+        return obj
+    except requests.exceptions.RequestException as e:
+        raise RuntimeError(f"Error fetching Kalshi market data: {e}")
+
+
+class BetPredictor:
+    def __init__(self, model_name: str = "gpt-5-nano"):
         
-        results = await asyncio.gather(*tasks)
-    
-    return [r for r in results if r is not None]
+        load_dotenv()
+        
+        exa_api_key = os.environ.get("EXA_API_KEY")
+        openai_api_key = os.environ.get("OPENAI_API_KEY")
+
+        if not exa_api_key:
+            raise EnvironmentError("Missing EXA_API_KEY in environment variables")
+        if not openai_api_key: 
+            raise EnvironmentError("Missing OPENAI_API_KEY in environment variables")
+
+        self.exa = Exa(exa_api_key)
+        self.client = OpenAI(api_key=openai_api_key)
+        self.model_name = model_name
+
+        print(f"Using model: {model_name}")
+
+    def _generate_response(self, prompt: str) -> str:
+
+        response = self.client.responses.create(
+            model=self.model_name,
+            input=prompt
+        )
+
+        output = response.output_text.strip()
+        print(f"Generated this response: {output}")
+
+        return output
+    
+    def convert_rules_to_question(self, rules:str) -> str:
+        prompt = (
+            "A market resolution statement usually describes the conditions under which a market would resolve to \"Yes\" or \"No\". "
+            "Your task is to extract and rewrite the core factual question that determines whether the market will resolve to \"Yes.\"\n"
+            "Input format: A sentence describing how a market will resolve.\n"
+            "Example: \"Will the market resolve to Yes if Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025?\"\n"
+            "Output format: A clear, grammatically correct factual question that reflects the underlying condition of the resolution, removing any meta-language about the market or its resolution.\n"
+            "The condition will start with \"if [event], then the market will resolve to \"Yes\" and you must convert it to \"Will [event]?\"\n"
+            "Examples:\n"
+            "Input: \"If Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025, then the market resolves to Yes\"\n"
+            "Output: \"Will Curtis Sliwa drop out of the NYC Mayoral race before Nov 4, 2025?\"\n"
+            "Input: \"If Donald Trump wins the 2024 U.S. presidential election, then the market resolves to Yes\"\n"
+            "Output: \"Will Donald Trump win the 2024 U.S. presidential election?\"\n"
+            "Only output the rewritten factual question with no explanations or commentary.\n"
+            f"STATEMENT: {rules}"
+        )
+
+        raw_response = self._generate_response(prompt)
+
+        return raw_response
+
+    def get_relevant_questions(self, question: str) -> list[str]:
+
+        prompt = (
+            "Based on the following question, generate a list of 5 relevant questions "
+            "that one could search online to gather more information. "
+            "These questions should yield information that would be helpful to answering "
+            "the following question in an objective manner.\n\n"
+            "Your response SHOULD ONLY BE the following lines, in this exact format:\n"
+            "1. <question 1>\n"
+            "2. <question 2>\n"
+            "3. <question 3>\n"
+            "4. <question 4>\n"
+            "5. <question 5>\n"
+            "Do not add ANY preamble, conclusion, or extra text.\n\n"
+            f"Question: \"{question}\""
+        )
+
+        raw_response = self._generate_response(prompt)
+
+        relevant_questions = []
+        for line in raw_response.split('\n'):
+            line = line.strip()
+            if line and line[0].isdigit():
+                clean_question = line.split('.', 1)[-1].strip()
+                relevant_questions.append(clean_question)
+        
+        print(f"Generated relevant questions: {relevant_questions}")
+
+        return relevant_questions
 
-def compute_statistics(results: List[Dict]) -> None:
-    print("\n" + "="*80)
-    print("STATISTICS")
-    print("="*80)
-    
-    if not results:
-        print("No results to analyze")
-        return
-    
-    total_markets = len(results)
-    
-    kalshi_edges = [r['kalshi_edge'] for r in results]
-    total_kalshi_edge = sum(kalshi_edges)
-    
-    polymarket_edges = [r['polymarket_edge'] for r in results]
-    total_polymarket_edge = sum(polymarket_edges)
-    
-    kalshi_better_count = sum(1 for r in results if r['kalshi_edge'] > r['polymarket_edge'])
-    polymarket_better_count = sum(1 for r in results if r['polymarket_edge'] > r['kalshi_edge'])
-    equal_count = total_markets - kalshi_better_count - polymarket_better_count
-    
-    edge_differences = [abs(r['kalshi_edge'] - r['polymarket_edge']) for r in results]
-    avg_edge_difference = sum(edge_differences) / total_markets
-    max_edge_difference = max(edge_differences)
-    
-    # Results
-    print(f"\nTotal markets analyzed: {total_markets}")
-    print("\n" + "-"*80)
-    print("COMPARISON")
-    print("-"*80)
-    print(f"Markets with greater Kalshi edge:      {kalshi_better_count} ({kalshi_better_count/total_markets*100:.1f}%)")
-    print(f"Markets with greater Polymarket edge:  {polymarket_better_count} ({polymarket_better_count/total_markets*100:.1f}%)")
-    print(f"Markets with equal edge:               {equal_count} ({equal_count/total_markets*100:.1f}%)")
-    print(f"\nAverage edge difference: {avg_edge_difference:.4f}")
-    print(f"Max edge difference:     {max_edge_difference:.4f}")
     
-    # Overall winner
-    print("\n" + "="*80)
-    if total_kalshi_edge > total_polymarket_edge:
-        advantage = total_kalshi_edge - total_polymarket_edge
-        print(f"OVERALL: Kalshi has greater total edge (+{advantage:.4f})")
-    elif total_polymarket_edge > total_kalshi_edge:
-        advantage = total_polymarket_edge - total_kalshi_edge
-        print(f"OVERALL: Polymarket has greater total edge (+{advantage:.4f})")
-    else:
-        print(f"OVERALL: Both platforms have equal total edge")
-    print("="*80)
+    def get_information(self, questions):
+        results = [self.exa.answer(q, text=True) for q in questions]
+        answers = [r.answer for r in results]
+        return answers
+
+    def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]:
+        prompt = (
+            "Analyze the provided information below to answer the given binary question. "
+            "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n"
+            "--- Information ---\n"
+            f"{information}\n\n"
+            "--- Question ---\n"
+            f"{question}\n\n"
+            "IMPORTANT INSTRUCTIONS:\n"
+            "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n"
+            "   Yes: <YES PERCENTAGE>%, No: <NO PERCENTAGE>%, Explanation: <EXPLANATION>\n"
+            "2. Percentages must sum to 100%.\n"
+            "3. Do NOT include any preamble, summary, or additional text.\n"
+            "4. Provide a brief but clear explanation supporting your probabilities.\n\n"
+        )
+
+        response = self._generate_response(prompt)
+
+        match = re.search(r"Yes: (.*?), No: (.*?), Explanation: (.*)", response, re.DOTALL)
 
-def main():
-    CSV_PATH = 'markets.csv' 
-    ENDPOINT_URL = 'https://{cerebrium}/predict' # Your hosted endpoint
+        if match:
+            yes, no, explanation = match.groups()
+            return yes.strip(), no.strip(), explanation.strip()
+        else:
+            raise ValueError(f"Failed to parse LLM response: {response}")
+    
+    def evaluate(self, question):
+        relevant_questions = self.get_relevant_questions(question)
+        answers = self.get_information(relevant_questions)
+
+        information = ""
+        for i, v in enumerate(relevant_questions):
+            information += f"INFORMATION {i+1}: \n"
+            information += f"QUESTION {i+1}: {v}\n"
+            information += f"ANSWER {i+1}: {answers[i]} \n\n"
+
+        information.rstrip("\n")
+        
+        yes, no, explanation = self.get_binary_answer_with_percentage(information, question)
+        return yes, no, explanation
+
+
+predictor = BetPredictor()
+
+def predict(kalshi_ticker, poly_slug):
+    kalshi_market = getMarket(True, kalshi_ticker)
+    poly_market = getMarket(False, poly_slug)
+    question = poly_market['question'] # we use polymarket because they have direct question
+
+    kalshi_real_yes = float(kalshi_market['market']['yes_ask'])
+    kalshi_real_no = float(kalshi_market['market']['no_ask'])
     
-    print("Starting async market analysis...")
-    results = asyncio.run(analyze_markets_async(CSV_PATH, ENDPOINT_URL))
+    poly_values = json.loads(poly_market['outcomePrices'])
+    poly_real_yes, poly_real_no = [float(v) for v in poly_values]
+
+    print(f"Question: {question}") 
     
-    print(f"\nSuccessfully fetched {len(results)} markets")
+    pred_yes, pred_no, explanation = predictor.evaluate(question)
+
+    match_yes = re.search(r"(\d+)%", pred_yes)
+    match_no = re.search(r"(\d+)%", pred_no)
+    pred_yes = float(match_yes.group(1))
+    pred_no = float(match_no.group(1))
+
+    kalshi_real_yes = float(kalshi_market['market']['yes_ask'])
+    kalshi_real_no = float(kalshi_market['market']['no_ask'])
     
-    compute_statistics(results)
+    poly_values = json.loads(poly_market['outcomePrices'])
+    poly_real_yes, poly_real_no = [float(v)*100 for v in poly_values]
+
+    kalshi_buy_yes = kalshi_real_yes < pred_yes
+    kalshi_buy_no = kalshi_real_no < pred_no
+
+    poly_buy_yes = poly_real_yes < pred_yes
+    poly_buy_no = poly_real_no < pred_no
 
-if __name__ == "__main__":
-    main()
\ No newline at end of file
+    return {
+        "kalshi": {
+            "buy_yes":kalshi_buy_yes,
+            "buy_no": kalshi_buy_no,
+            "edge": max(pred_yes-kalshi_real_yes, pred_no-kalshi_real_no),   
+        },
+        "polymarket": {
+            "buy_yes":poly_buy_yes,
+            "buy_no": poly_buy_no,
+            "edge": max(pred_yes-poly_real_yes, pred_no-poly_real_no),
+        },
+        "yes": pred_yes,
+        "no": pred_no,
+        "explanation": explanation
+    }
\ No newline at end of file
diff --git a/8-application-demos/6-kalshi-bet-predictor/predictor.py b/8-application-demos/6-kalshi-bet-predictor/predictor.py
deleted file mode 100644
index f1d9a4c0..00000000
--- a/8-application-demos/6-kalshi-bet-predictor/predictor.py
+++ /dev/null
@@ -1,188 +0,0 @@
-from typing import Tuple
-import requests
-import re
-from dotenv import load_dotenv
-import os
-from exa_py import Exa
-from openai import OpenAI
-
-
-def getMarket(is_kalshi, ticker):
-    if is_kalshi: 
-        url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" # market ticker
-    else:
-        url = f"https://gamma-api.polymarket.com/markets/slug/{ticker}" # slug
-    try:
-        res = requests.get(url)
-        res.raise_for_status()
-        obj = res.json()
-        return obj
-    except requests.exceptions.RequestException as e:
-        raise RuntimeError(f"Error fetching Kalshi market data: {e}")
-
-
-class BetPredictor:
-    def __init__(self, model_name: str = "gpt-5-nano"):
-
-
-        load_dotenv()
-        exa_api_key = os.environ.get("EXA_API_KEY")
-        openai_api_key = os.environ.get("OPENAI_API_KEY")
-
-        if not exa_api_key:
-            raise EnvironmentError("Missing EXA_API_KEY in environment variables")
-        if not openai_api_key: 
-            raise EnvironmentError("Missing OPENAI_API_KEY in environment variables")
-
-        self.exa = Exa(exa_api_key)
-        self.client = OpenAI(api_key=openai_api_key)
-        self.model_name = model_name
-
-        print(f"Using model: {model_name}")
-
-    def _generate_response(self, prompt: str) -> str:
-
-        response = self.client.responses.create(
-            model=self.model_name,
-            input=prompt
-        )
-
-        output = response.output_text.strip()
-        print(f"Generated this response: {output}")
-
-        return output
-    
-    def convert_rules_to_question(self, rules:str) -> str:
-        prompt = (
-            "A market resolution statement usually describes the conditions under which a market would resolve to \"Yes\" or \"No\". "
-            "Your task is to extract and rewrite the core factual question that determines whether the market will resolve to \"Yes.\"\n"
-            "Input format: A sentence describing how a market will resolve.\n"
-            "Example: \"Will the market resolve to Yes if Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025?\"\n"
-            "Output format: A clear, grammatically correct factual question that reflects the underlying condition of the resolution, removing any meta-language about the market or its resolution.\n"
-            "The condition will start with \"if [event], then the market will resolve to \"Yes\" and you must convert it to \"Will [event]?\"\n"
-            "Examples:\n"
-            "Input: \"If Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025, then the market resolves to Yes\"\n"
-            "Output: \"Will Curtis Sliwa drop out of the NYC Mayoral race before Nov 4, 2025?\"\n"
-            "Input: \"If Donald Trump wins the 2024 U.S. presidential election, then the market resolves to Yes\"\n"
-            "Output: \"Will Donald Trump win the 2024 U.S. presidential election?\"\n"
-            "Only output the rewritten factual question with no explanations or commentary.\n"
-            f"STATEMENT: {rules}"
-        )
-
-        raw_response = self._generate_response(prompt)
-
-        return raw_response
-
-    def get_relevant_questions(self, question: str) -> list[str]:
-
-        prompt = (
-            "Based on the following question, generate a list of 5 relevant questions "
-            "that one could search online to gather more information. "
-            "These questions should yield information that would be helpful to answering "
-            "the following question in an objective manner.\n\n"
-            "Your response SHOULD ONLY BE the following lines, in this exact format:\n"
-            "1. <question 1>\n"
-            "2. <question 2>\n"
-            "3. <question 3>\n"
-            "4. <question 4>\n"
-            "5. <question 5>\n"
-            "Do not add ANY preamble, conclusion, or extra text.\n\n"
-            f"Question: \"{question}\""
-        )
-
-        raw_response = self._generate_response(prompt)
-
-        relevant_questions = []
-        for line in raw_response.split('\n'):
-            line = line.strip()
-            if line and line[0].isdigit():
-                clean_question = line.split('.', 1)[-1].strip()
-                relevant_questions.append(clean_question)
-        
-        print(f"Generated relevant questions: {relevant_questions}")
-
-        return relevant_questions
-
-    
-    def get_information(self, questions):
-        results = [self.exa.answer(q, text=True) for q in questions]
-        answers = [r.answer for r in results]
-        return answers
-
-    def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]:
-        prompt = (
-            "Analyze the provided information below to answer the given binary question. "
-            "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n"
-            "--- Information ---\n"
-            f"{information}\n\n"
-            "--- Question ---\n"
-            f"{question}\n\n"
-            "IMPORTANT INSTRUCTIONS:\n"
-            "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n"
-            "   Yes: <YES PERCENTAGE>%, No: <NO PERCENTAGE>%, Explanation: <EXPLANATION>\n"
-            "2. Percentages must sum to 100%.\n"
-            "3. Do NOT include any preamble, summary, or additional text.\n"
-            "4. Provide a brief but clear explanation supporting your probabilities.\n\n"
-        )
-
-        response = self._generate_response(prompt)
-
-        match = re.search(r"Yes: (.*?), No: (.*?), Explanation: (.*)", response, re.DOTALL)
-
-        if match:
-            yes, no, explanation = match.groups()
-            return yes.strip(), no.strip(), explanation.strip()
-        else:
-            raise ValueError(f"Failed to parse LLM response: {response}")
-    
-    def predict(self, question):
-        relevant_questions = self.get_relevant_questions(question)
-        answers = self.get_information(relevant_questions)
-
-        information = ""
-        for i, v in enumerate(relevant_questions):
-            information += f"INFORMATION {i+1}: \n"
-            information += f"QUESTION {i+1}: {v}\n"
-            information += f"ANSWER {i+1}: {answers[i]} \n\n"
-
-        information.rstrip("\n")
-        
-        yes, no, explanation = self.get_binary_answer_with_percentage(information, question)
-        return yes, no, explanation
-
-
-predictor = BetPredictor()
-
-def predictMarket(isKalshi: bool, ticker: str):
-
-    if isKalshi:
-        market = getMarket(isKalshi, ticker)
-        rules = market['market']['rules_primary']
-        print(f"Rules: {rules}")
-        question = predictor.convert_rules_to_question(rules)
-    else:
-        market = getMarket(isKalshi, ticker)
-        question = market['question']
-
-    print(f"Question: {question}") 
-    
-    pred_yes, pred_no, explanation = predictor.predict(question)
-
-    match_yes = re.search(r"(\d+)%", pred_yes)
-    match_no = re.search(r"(\d+)%", pred_no)
-    pred_yes = int(match_yes.group(1))
-    pred_no = int(match_no.group(1))
-
-    real_yes = int(market['market']['yes_ask'])
-    real_no = int(market['market']['no_ask'])
-
-    buy_yes = real_yes < pred_yes
-    buy_no = real_no < pred_no
-
-    return {"buy_yes":buy_yes, "buy_no": buy_no, "edge": max(pred_yes-real_yes, pred_no-real_no), "yes": pred_yes, "no": pred_no, "explanation": explanation}
-
-def predict(kalshi_ticker, poly_slug):
-    kalshi_res = predictMarket(True, kalshi_ticker)
-    poly_res = predictMarket(False, poly_slug)
-
-    return {"Kalshi": kalshi_res, "Polymarket": poly_res}
\ No newline at end of file

From be71caf2462db4d7af0d3594fb8fd3098a7b81aa Mon Sep 17 00:00:00 2001
From: SeaUrc <nicksong03@gmail.com>
Date: Fri, 24 Oct 2025 15:34:34 -0400
Subject: [PATCH 05/10] minor tweaks

---
 .../6-kalshi-bet-predictor/compare.py         |  5 ++--
 .../find_equiv_markets.py                     | 27 +++++++++----------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/8-application-demos/6-kalshi-bet-predictor/compare.py b/8-application-demos/6-kalshi-bet-predictor/compare.py
index 813dfc91..2facddf5 100644
--- a/8-application-demos/6-kalshi-bet-predictor/compare.py
+++ b/8-application-demos/6-kalshi-bet-predictor/compare.py
@@ -8,6 +8,7 @@ def load_markets(csv_path: str) -> List[Tuple[str, str]]:
     markets = []
     with open(csv_path, 'r') as f:
         reader = csv.reader(f)
+        next(reader) # skip header
         for row in reader:
             if len(row) >= 2:
                 markets.append((row[0], row[1]))
@@ -112,8 +113,8 @@ def compute_statistics(results: List[Dict]) -> None:
     print("="*80)
 
 def main():
-    CSV_PATH = '<path to your .csv file>' 
-    ENDPOINT_URL = '<your hosted endpoint>'
+    CSV_PATH = '<PATH TO YOUR .csv FILE>' 
+    ENDPOINT_URL = '<YOUR HOSTED ENDPOINT>'
     
     print("Starting async market analysis...")
     results = asyncio.run(analyze_markets_async(CSV_PATH, ENDPOINT_URL))
diff --git a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py
index e5ed8c9a..a534dfc3 100644
--- a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py
+++ b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py
@@ -5,21 +5,20 @@
 from sentence_transformers import SentenceTransformer
 from typing import List, Dict, Any
 
-# --- CONFIGURATION ---
-SIMILARITY_THRESHOLD = 0.70
-MAX_MARKET_LIMIT = 40000
+# --- Config ---
+SIMILARITY_THRESHOLD = 0.70 # threshold for cosine simlarity
+MAX_MARKET_LIMIT = 40000 # max number of active & open markets to gather
 TOP_K = 5  # number of top Polymarket markets to check for each Kalshi market
 KALSHI_API_URL = "https://api.elections.kalshi.com/trade-api/v2/markets"
 POLYMARKET_API_URL = "https://clob.polymarket.com/markets"
-OUTPUT_FILE = "equivalent_markets.csv"
+OUTPUT_FILE = "markets.csv"
 
-# ---------------------- API FETCH FUNCTIONS ----------------------
+# ---------------------- API Fetch Functions ----------------------
 
 def get_kalshi_markets() -> List[Dict[str, Any]]:
     print("Fetching Kalshi markets...")
     markets_list = []
     cursor = ""
-
     try:
         while True:
             params = {'limit': 1000}
@@ -47,7 +46,7 @@ def get_kalshi_markets() -> List[Dict[str, Any]]:
                     })
 
             cursor = data['cursor']
-            print(f"{cursor} | {len(markets_list)}")
+            print(f"Found {len(markets_list)} active and open markets")
 
             if len(markets_list) > MAX_MARKET_LIMIT or not cursor:
                 break
@@ -65,7 +64,7 @@ def get_kalshi_market(ticker):
     return title['market']['title']
 
 def get_polymarket_markets() -> List[Dict[str, Any]]:
-    print("Fetching Polymarket markets (CLOB API)...")
+    print("Fetching Polymarket markets...")
     markets_list = []
     next_cursor = None
 
@@ -94,7 +93,7 @@ def get_polymarket_markets() -> List[Dict[str, Any]]:
                     })
 
             next_cursor = data.get('next_cursor')
-            print(f"cursor {next_cursor} | {len(markets_list)}")
+            print(f"Found {len(markets_list)} active and open markets")
 
             if len(markets_list) > MAX_MARKET_LIMIT or not next_cursor or next_cursor == 'LTE=':
                 break
@@ -107,10 +106,10 @@ def get_polymarket_markets() -> List[Dict[str, Any]]:
         return []
 
 
-# ---------------------- FAISS-BASED MATCHING ----------------------
+# ---------------------- Matching ----------------------
 
 def find_similar_markets(kalshi_markets, polymarket_markets, threshold=0.9, top_k=TOP_K):
-    print("\nLoading NLP model (SentenceTransformer)...")
+    print("\nLoading NLP model...")
     model = SentenceTransformer('all-MiniLM-L6-v2')
 
     kalshi_titles = [m['title'] for m in kalshi_markets]
@@ -120,11 +119,11 @@ def find_similar_markets(kalshi_markets, polymarket_markets, threshold=0.9, top_
         print("Not enough market data to compare.")
         return []
 
-    print("Encoding market titles into embeddings...")
+    print("Encoding titles into embeddings...")
     kalshi_embeddings = model.encode(kalshi_titles, convert_to_numpy=True, normalize_embeddings=True)
     poly_embeddings = model.encode(poly_titles, convert_to_numpy=True, normalize_embeddings=True)
 
-    print(f"Building FAISS index for {len(poly_embeddings)} Polymarket markets...")
+    print(f"Building vector index for {len(poly_embeddings)} Polymarket markets...")
     dim = poly_embeddings.shape[1]
     index = faiss.IndexFlatIP(dim)  # Inner product for cosine similarity
     index.add(poly_embeddings)
@@ -150,7 +149,7 @@ def find_similar_markets(kalshi_markets, polymarket_markets, threshold=0.9, top_
     return potential_matches
     
 def interactive_save(matches: List[Dict[str, Any]]):
-    print("\n--- Interactive Review Mode ---")
+    print("\n--- Review Mode ---")
     print("Press 'y' to save a match, anything else to skip.\n")
     
     file_exists = os.path.exists(OUTPUT_FILE)

From 36ae1f18f1fefb5765a0f1b0990b8f6721c4e6a5 Mon Sep 17 00:00:00 2001
From: SeaUrc <nicksong03@gmail.com>
Date: Fri, 31 Oct 2025 16:23:55 -0400
Subject: [PATCH 06/10] fix: code quality

---
 .../6-kalshi-bet-predictor/analyst.py         | 146 ++++++++++++
 .../6-kalshi-bet-predictor/compare.py         |  68 +++---
 .../find_equiv_markets.py                     |  26 +--
 .../6-kalshi-bet-predictor/main.py            | 219 ++++++------------
 4 files changed, 259 insertions(+), 200 deletions(-)
 create mode 100644 8-application-demos/6-kalshi-bet-predictor/analyst.py

diff --git a/8-application-demos/6-kalshi-bet-predictor/analyst.py b/8-application-demos/6-kalshi-bet-predictor/analyst.py
new file mode 100644
index 00000000..171610f3
--- /dev/null
+++ b/8-application-demos/6-kalshi-bet-predictor/analyst.py
@@ -0,0 +1,146 @@
+
+from typing import Tuple
+from dotenv import load_dotenv
+import os
+import json
+from exa_py import Exa
+from openai import OpenAI
+
+class BetAnalyst:
+    def __init__(self, model_name: str = "gpt-5-nano"):
+        
+        load_dotenv()
+        
+        exa_api_key = os.environ.get("EXA_API_KEY")
+        openai_api_key = os.environ.get("OPENAI_API_KEY")
+
+        if not exa_api_key:
+            raise EnvironmentError("Missing EXA_API_KEY in environment variables")
+        if not openai_api_key: 
+            raise EnvironmentError("Missing OPENAI_API_KEY in environment variables")
+
+        self.exa = Exa(exa_api_key)
+        self.client = OpenAI(api_key=openai_api_key)
+        self.model_name = model_name
+
+        print(f"Using model: {model_name}")
+
+    def _generate_response(self, prompt: str, response_format: dict | None = None) -> str:
+
+        request_args = {
+            "model": self.model_name,
+            "input": prompt,
+        }
+
+        if response_format is not None:
+            request_args["response_format"] = response_format
+
+        try:
+            response = self.client.responses.create(**request_args)
+
+            output = response.output_text.strip()
+            print(f"Generated this response: {output}")
+            return output
+        except Exception as e:
+            raise RuntimeError(f"Error during API Call: {e}")
+    
+    def convert_market_to_resolution(self, rules:str) -> str:
+        prompt = (
+            "A market resolution statement usually describes the conditions under which a market would resolve to \"Yes\" or \"No\". "
+            "Your task is to extract and rewrite the core factual question that determines whether the market will resolve to \"Yes.\"\n"
+            "Input format: A sentence describing how a market will resolve.\n"
+            "Example: \"Will the market resolve to Yes if Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025?\"\n"
+            "Output format: A clear, grammatically correct factual question that reflects the underlying condition of the resolution, removing any meta-language about the market or its resolution.\n"
+            "The condition will start with \"if [event], then the market will resolve to \"Yes\" and you must convert it to \"Will [event]?\"\n"
+            "Examples:\n"
+            "Input: \"If Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025, then the market resolves to Yes\"\n"
+            "Output: \"Will Curtis Sliwa drop out of the NYC Mayoral race before Nov 4, 2025?\"\n"
+            "Input: \"If Donald Trump wins the 2024 U.S. presidential election, then the market resolves to Yes\"\n"
+            "Output: \"Will Donald Trump win the 2024 U.S. presidential election?\"\n"
+            "Only output the rewritten factual question with no explanations or commentary.\n"
+            f"STATEMENT: {rules}"
+        )
+
+        raw_response = self._generate_response(prompt)
+
+        return raw_response
+
+    def get_relevant_questions(self, question: str) -> list[str]:
+
+        prompt = (
+            "Based on the following question, generate a list of 5 relevant questions "
+            "that one could search online to gather more information. "
+            "These questions should yield information that would be helpful to answering "
+            "the following question in an objective manner.\n\n"
+            "Your response SHOULD ONLY BE the following lines, in this exact format:\n"
+            "1. <question 1>\n"
+            "2. <question 2>\n"
+            "3. <question 3>\n"
+            "4. <question 4>\n"
+            "5. <question 5>\n"
+            "Do not add ANY preamble, conclusion, or extra text.\n\n"
+            f"Question: \"{question}\""
+        )
+
+        raw_response = self._generate_response(prompt)
+
+        relevant_questions = []
+        for line in raw_response.split('\n'):
+            line = line.strip()
+            if line and line[0].isdigit():
+                clean_question = line.split('.', 1)[-1].strip()
+                relevant_questions.append(clean_question)
+        
+        print(f"Generated relevant questions: {relevant_questions}")
+
+        return relevant_questions
+
+    
+    def get_web_info(self, questions):
+        results = [self.exa.answer(q, text=True) for q in questions]
+        answers = [r.answer for r in results]
+        return answers
+
+    def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]:
+        prompt = (
+            "Analyze the provided information below to answer the given binary question. "
+            "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n"
+            "--- Information ---\n"
+            f"{information}\n\n"
+            "--- Question ---\n"
+            f"{question}\n\n"
+            "IMPORTANT INSTRUCTIONS:\n"
+            "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n"
+            "   Yes: <YES PERCENTAGE>%, No: <NO PERCENTAGE>%, Explanation: <EXPLANATION>\n"
+            "2. Percentages must sum to 100%.\n"
+            "3. Do NOT include any preamble, summary, or additional text.\n"
+            "4. Provide a brief but clear explanation supporting your probabilities.\n\n"
+        )
+        
+        sentiment_schema = {
+            "type": "object",
+            "properties": {
+                "yes_percentage": {
+                    "type": "number",
+                    "description": "Confidence in positive sentiment (0-100)"
+                },
+                "no_percentage": {
+                    "type": "number",
+                    "description": "Confidence in negative sentiment (0-100)"
+                },
+                "explanation": {
+                    "type": "string",
+                    "description": "Brief explanation for the sentiment analysis"
+                }
+            },
+            "required": ["yes_percentage", "no_percentage", "explanation"]
+        }
+
+        response = self._generate_response(prompt, sentiment_schema)
+
+        try:
+            parsed_data = json.loads(response)
+            return parsed_data.get('yes_percentage'), parsed_data.get('no_percentage'), parsed_data.get('explanation')
+        except json.JSONDecodeError:
+            raise RuntimeError(f"Failed to parse output as JSON: {response}")
+    
\ No newline at end of file
diff --git a/8-application-demos/6-kalshi-bet-predictor/compare.py b/8-application-demos/6-kalshi-bet-predictor/compare.py
index 2facddf5..d0fa5e92 100644
--- a/8-application-demos/6-kalshi-bet-predictor/compare.py
+++ b/8-application-demos/6-kalshi-bet-predictor/compare.py
@@ -15,11 +15,11 @@ def load_markets(csv_path: str) -> List[Tuple[str, str]]:
     return markets
 
 async def get_market_data(session: aiohttp.ClientSession, kalshi_ticker: str, 
-                         polymarket_slug: str, endpoint_url: str) -> Dict:
+                         poly_slug: str, endpoint_url: str) -> Dict:
     
     payload = json.dumps({
         'kalshi_ticker': kalshi_ticker,
-        'poly_slug': polymarket_slug
+        'poly_slug': poly_slug
     })
     
     headers = {
@@ -35,20 +35,20 @@ async def get_market_data(session: aiohttp.ClientSession, kalshi_ticker: str,
             data = data['result']
             
             kalshi_data = data['kalshi']
-            polymarket_data = data['polymarket']
+            poly_data = data['polymarket']
             
             return {
                 'kalshi_ticker': kalshi_ticker,
-                'polymarket_slug': polymarket_slug,
-                'kalshi_edge': kalshi_data['edge'],
-                'polymarket_edge': polymarket_data['edge'],
-                'kalshi_buy_yes': kalshi_data['buy_yes'],
-                'kalshi_buy_no': kalshi_data['buy_no'],
-                'polymarket_buy_yes': polymarket_data['buy_yes'],
-                'polymarket_buy_no': polymarket_data['buy_no'],
+                'poly_slug': poly_slug,
+                'kalshi_edge_value': kalshi_data['edge'],
+                'poly_edge_value': poly_data['edge'],
+                'kalshi_is_buy_yes': kalshi_data['buy_yes'],
+                'kalshi_is_buy_no': kalshi_data['buy_no'],
+                'poly_is_buy_yes': poly_data['buy_yes'],
+                'poly_is_buy_no': poly_data['buy_no'],
             }
     except Exception as e:
-        print(f"Error fetching data for {kalshi_ticker}/{polymarket_slug}: {e}")
+        print(f"Error fetching data for {kalshi_ticker}/{poly_slug}: {e}")
         return None
 
 async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]:
@@ -57,8 +57,8 @@ async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]:
     print(f"Fetching data for {len(markets)} markets all at once...")
     
     async with aiohttp.ClientSession() as session:
-        tasks = [get_market_data(session, kalshi_ticker, polymarket_slug, endpoint_url) 
-                for kalshi_ticker, polymarket_slug in markets]
+        tasks = [get_market_data(session, kalshi_ticker, poly_slug, endpoint_url) 
+                for kalshi_ticker, poly_slug in markets]
         
         results = await asyncio.gather(*tasks)
     
@@ -75,17 +75,17 @@ def compute_statistics(results: List[Dict]) -> None:
     
     total_markets = len(results)
     
-    kalshi_edges = [r['kalshi_edge'] for r in results]
-    total_kalshi_edge = sum(kalshi_edges)
+    kalshi_edges_values = [r['kalshi_edge_value'] for r in results]
+    kalshi_edge_sum = sum(kalshi_edges_values)
     
-    polymarket_edges = [r['polymarket_edge'] for r in results]
-    total_polymarket_edge = sum(polymarket_edges)
+    poly_edges_values = [r['poly_edge_value'] for r in results]
+    poly_edge_sum = sum(poly_edges_values)
     
-    kalshi_better_count = sum(1 for r in results if r['kalshi_edge'] > r['polymarket_edge'])
-    polymarket_better_count = sum(1 for r in results if r['polymarket_edge'] > r['kalshi_edge'])
-    equal_count = total_markets - kalshi_better_count - polymarket_better_count
+    kalshi_better_count = sum(1 for r in results if r['kalshi_edge_value'] > r['poly_edge_value'])
+    poly_better_count = sum(1 for r in results if r['poly_edge_value'] > r['kalshi_edge_value'])
+    equal_count = total_markets - kalshi_better_count - poly_better_count
     
-    edge_differences = [abs(r['kalshi_edge'] - r['polymarket_edge']) for r in results]
+    edge_differences = [abs(r['kalshi_edge_value'] - r['poly_edge_value']) for r in results]
     avg_edge_difference = sum(edge_differences) / total_markets
     max_edge_difference = max(edge_differences)
     
@@ -94,27 +94,27 @@ def compute_statistics(results: List[Dict]) -> None:
     print("COMPARISON")
     print("-"*80)
     print(f"Markets with greater Kalshi edge:      {kalshi_better_count} ({kalshi_better_count/total_markets*100:.1f}%)")
-    print(f"Markets with greater Polymarket edge:  {polymarket_better_count} ({polymarket_better_count/total_markets*100:.1f}%)")
+    print(f"Markets with greater Polymarket edge:  {poly_better_count} ({poly_better_count/total_markets*100:.1f}%)")
     print(f"Markets with equal edge:               {equal_count} ({equal_count/total_markets*100:.1f}%)")
-    print(f"\nAverage edge difference: {avg_edge_difference:.4f}")
-    print(f"Max edge difference:     {max_edge_difference:.4f}")
+    print(f"\nAverage edge difference: {avg_edge_difference:.4f} cents")
+    print(f"Max edge difference:     {max_edge_difference:.4f} cents")
     
     print("\n" + "="*80)
-    if total_kalshi_edge > total_polymarket_edge:
-        advantage = total_kalshi_edge - total_polymarket_edge
-        print(f"OVERALL: Kalshi has greater total edge (+{advantage:.4f})")
-        print(f"OVERALL: Kalshi has an average edge of (+{advantage/total_markets:.4f}) per market")
-    elif total_polymarket_edge > total_kalshi_edge:
-        advantage = total_polymarket_edge - total_kalshi_edge
-        print(f"OVERALL: Polymarket has greater total edge (+{advantage:.4f})")
-        print(f"OVERALL: Polymarket has an average edge of (+{advantage/total_markets:.4f}) per market")
+    if kalshi_edge_sum > poly_edge_sum:
+        advantage = kalshi_edge_sum - poly_edge_sum
+        print(f"OVERALL: Kalshi has greater total edge (+{advantage:.4f}) cents")
+        print(f"OVERALL: Kalshi has an average edge of (+{advantage/total_markets:.4f}) cents per market")
+    elif poly_edge_sum > kalshi_edge_sum:
+        advantage = poly_edge_sum - kalshi_edge_sum
+        print(f"OVERALL: Polymarket has greater total edge (+{advantage:.4f}) cents")
+        print(f"OVERALL: Polymarket has an average edge of (+{advantage/total_markets:.4f}) cents per market")
     else:
         print(f"OVERALL: Both platforms have equal total edge")
     print("="*80)
 
 def main():
-    CSV_PATH = '<PATH TO YOUR .csv FILE>' 
-    ENDPOINT_URL = '<YOUR HOSTED ENDPOINT>'
+    CSV_PATH = "<PATH_TO_YOUR_CSV_FILE>"
+    ENDPOINT_URL = '<YOUR_CEREBRIUM_PREDICT_URL>'
     
     print("Starting async market analysis...")
     results = asyncio.run(analyze_markets_async(CSV_PATH, ENDPOINT_URL))
diff --git a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py
index a534dfc3..e2806174 100644
--- a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py
+++ b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py
@@ -13,8 +13,6 @@
 POLYMARKET_API_URL = "https://clob.polymarket.com/markets"
 OUTPUT_FILE = "markets.csv"
 
-# ---------------------- API Fetch Functions ----------------------
-
 def get_kalshi_markets() -> List[Dict[str, Any]]:
     print("Fetching Kalshi markets...")
     markets_list = []
@@ -106,14 +104,12 @@ def get_polymarket_markets() -> List[Dict[str, Any]]:
         return []
 
 
-# ---------------------- Matching ----------------------
-
-def find_similar_markets(kalshi_markets, polymarket_markets, threshold=0.9, top_k=TOP_K):
+def find_similar_markets(kalshi_markets, poly_markets, threshold=0.9, top_k=TOP_K):
     print("\nLoading NLP model...")
     model = SentenceTransformer('all-MiniLM-L6-v2')
 
     kalshi_titles = [m['title'] for m in kalshi_markets]
-    poly_titles = [m['title'] for m in polymarket_markets]
+    poly_titles = [m['title'] for m in poly_markets]
 
     if not kalshi_titles or not poly_titles:
         print("Not enough market data to compare.")
@@ -136,11 +132,11 @@ def find_similar_markets(kalshi_markets, polymarket_markets, threshold=0.9, top_
         for j in range(top_k):
             score = float(scores[i][j])
             if score >= threshold:
-                poly_market = polymarket_markets[indices[i][j]]
+                poly_market = poly_markets[indices[i][j]]
                 potential_matches.append({
                     'score': score,
                     'kalshi_market': kalshi_market,
-                    'polymarket_market': poly_market
+                    'poly_market': poly_market
                 })
         if i % 100 == 0:
             print(f"Processed {i}/{len(kalshi_markets)} Kalshi markets...")
@@ -156,20 +152,20 @@ def interactive_save(matches: List[Dict[str, Any]]):
     with open(OUTPUT_FILE, "a", newline='', encoding="utf-8") as csvfile:
         writer = csv.writer(csvfile)
         if not file_exists:
-            writer.writerow(["kalshi_ticker", "polymarket_slug"])
+            writer.writerow(["kalshi_ticker", "poly_slug"])
 
         for i, match in enumerate(matches):
             kalshi_ticker = match['kalshi_market']['ticker']
-            poly_slug = match['polymarket_market']['url'].split("event/")[1]
+            poly_slug = match['poly_market']['url'].split("event/")[1]
             kalshi_title = get_kalshi_market(kalshi_ticker)
-            poly_title = match['polymarket_market']['title']
+            poly_title = match['poly_market']['title']
             score = match['score']
 
             print(f"\nMatch #{i+1} (Score: {score:.4f})")
             print(f"[KALSHI]     {kalshi_title}")
             print(f"[POLYMARKET] {poly_title}")
             print(f"  > Kalshi URL:    {match['kalshi_market']['url']}")
-            print(f"  > Polymarket URL:{match['polymarket_market']['url']}")
+            print(f"  > Polymarket URL:{match['poly_market']['url']}")
 
             choice = input("Save this match? (y/n): ").strip().lower()
             if choice == 'y':
@@ -182,13 +178,13 @@ def interactive_save(matches: List[Dict[str, Any]]):
 
 def main():
     kalshi_markets = get_kalshi_markets()
-    polymarket_markets = get_polymarket_markets()
+    poly_markets = get_polymarket_markets()
 
-    if not kalshi_markets or not polymarket_markets:
+    if not kalshi_markets or not poly_markets:
         print("\nCould not fetch markets from one or both platforms. Exiting.")
         return
     
-    matches = find_similar_markets(kalshi_markets, polymarket_markets, SIMILARITY_THRESHOLD)
+    matches = find_similar_markets(kalshi_markets, poly_markets, SIMILARITY_THRESHOLD)
     print(f"\n--- Found {len(matches)} Potential Matches ---")
     
     if not matches:
diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py
index cfa962e9..7fb89bcd 100644
--- a/8-application-demos/6-kalshi-bet-predictor/main.py
+++ b/8-application-demos/6-kalshi-bet-predictor/main.py
@@ -1,18 +1,16 @@
 import json
-from typing import Tuple
 import requests
 import re
-from dotenv import load_dotenv
-import os
-from exa_py import Exa
-from openai import OpenAI
+from dataclasses import dataclass
+from analyst import BetAnalyst
 
+@dataclass
+class MarketData:
+    question: str
+    yes_price: str
+    no_price: str
 
-def getMarket(is_kalshi, ticker):
-    if is_kalshi: 
-        url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" # market ticker
-    else:
-        url = f"https://gamma-api.polymarket.com/markets/slug/{ticker}" # slug
+def _fetch_api_data(url: str):
     try:
         res = requests.get(url)
         res.raise_for_status()
@@ -20,182 +18,101 @@ def getMarket(is_kalshi, ticker):
         return obj
     except requests.exceptions.RequestException as e:
         raise RuntimeError(f"Error fetching Kalshi market data: {e}")
-
-
-class BetPredictor:
-    def __init__(self, model_name: str = "gpt-5-nano"):
-        
-        load_dotenv()
         
-        exa_api_key = os.environ.get("EXA_API_KEY")
-        openai_api_key = os.environ.get("OPENAI_API_KEY")
-
-        if not exa_api_key:
-            raise EnvironmentError("Missing EXA_API_KEY in environment variables")
-        if not openai_api_key: 
-            raise EnvironmentError("Missing OPENAI_API_KEY in environment variables")
-
-        self.exa = Exa(exa_api_key)
-        self.client = OpenAI(api_key=openai_api_key)
-        self.model_name = model_name
-
-        print(f"Using model: {model_name}")
-
-    def _generate_response(self, prompt: str) -> str:
+def get_kalshi_market(ticker: str) -> MarketData:
+    url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}"
+    raw_data = _fetch_api_data(url)
 
-        response = self.client.responses.create(
-            model=self.model_name,
-            input=prompt
+    try:
+        market = raw_data['market']
+        return MarketData(
+            question = market['title'],
+            yes_price=float(market['yes_ask']),
+            no_price=float(market['no_ask'])
         )
+    except (KeyError, TypeError, ValueError) as e:
+        raise RuntimeError(f"Error parsing Kalshi data structure: {e}") from e
 
-        output = response.output_text.strip()
-        print(f"Generated this response: {output}")
-
-        return output
+def get_polymarket_market(slug: str) -> MarketData:
+    url = f"https://gamma-api.polymarket.com/markets/slug/{slug}" # slug
+    raw_data = _fetch_api_data(url)
     
-    def convert_rules_to_question(self, rules:str) -> str:
-        prompt = (
-            "A market resolution statement usually describes the conditions under which a market would resolve to \"Yes\" or \"No\". "
-            "Your task is to extract and rewrite the core factual question that determines whether the market will resolve to \"Yes.\"\n"
-            "Input format: A sentence describing how a market will resolve.\n"
-            "Example: \"Will the market resolve to Yes if Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025?\"\n"
-            "Output format: A clear, grammatically correct factual question that reflects the underlying condition of the resolution, removing any meta-language about the market or its resolution.\n"
-            "The condition will start with \"if [event], then the market will resolve to \"Yes\" and you must convert it to \"Will [event]?\"\n"
-            "Examples:\n"
-            "Input: \"If Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025, then the market resolves to Yes\"\n"
-            "Output: \"Will Curtis Sliwa drop out of the NYC Mayoral race before Nov 4, 2025?\"\n"
-            "Input: \"If Donald Trump wins the 2024 U.S. presidential election, then the market resolves to Yes\"\n"
-            "Output: \"Will Donald Trump win the 2024 U.S. presidential election?\"\n"
-            "Only output the rewritten factual question with no explanations or commentary.\n"
-            f"STATEMENT: {rules}"
-        )
-
-        raw_response = self._generate_response(prompt)
-
-        return raw_response
-
-    def get_relevant_questions(self, question: str) -> list[str]:
+    try:
+        poly_values = json.loads(raw_data['outcomePrices'])
+        yes_price, no_price = [float(v) for v in poly_values]
 
-        prompt = (
-            "Based on the following question, generate a list of 5 relevant questions "
-            "that one could search online to gather more information. "
-            "These questions should yield information that would be helpful to answering "
-            "the following question in an objective manner.\n\n"
-            "Your response SHOULD ONLY BE the following lines, in this exact format:\n"
-            "1. <question 1>\n"
-            "2. <question 2>\n"
-            "3. <question 3>\n"
-            "4. <question 4>\n"
-            "5. <question 5>\n"
-            "Do not add ANY preamble, conclusion, or extra text.\n\n"
-            f"Question: \"{question}\""
+        return MarketData(
+            question = raw_data['question'],
+            yes_price=yes_price,
+            no_price=no_price
         )
-
-        raw_response = self._generate_response(prompt)
-
-        relevant_questions = []
-        for line in raw_response.split('\n'):
-            line = line.strip()
-            if line and line[0].isdigit():
-                clean_question = line.split('.', 1)[-1].strip()
-                relevant_questions.append(clean_question)
+    except (KeyError, TypeError, ValueError) as e:
+        raise RuntimeError(f"Error parsing Kalshi data structure: {e}") from e
         
-        print(f"Generated relevant questions: {relevant_questions}")
 
-        return relevant_questions
-
-    
-    def get_information(self, questions):
-        results = [self.exa.answer(q, text=True) for q in questions]
-        answers = [r.answer for r in results]
-        return answers
+def getMarket(is_kalshi, ticker):
+    if is_kalshi: 
+        url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" # market ticker
+    else:
+        url = f"https://gamma-api.polymarket.com/markets/slug/{ticker}" # slug
+    try:
+        res = requests.get(url)
+        res.raise_for_status()
+        obj = res.json()
+        return obj
+    except requests.exceptions.RequestException as e:
+        raise RuntimeError(f"Error fetching Kalshi market data: {e}")
 
-    def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]:
-        prompt = (
-            "Analyze the provided information below to answer the given binary question. "
-            "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n"
-            "--- Information ---\n"
-            f"{information}\n\n"
-            "--- Question ---\n"
-            f"{question}\n\n"
-            "IMPORTANT INSTRUCTIONS:\n"
-            "1. Your response MUST ONLY be a single line in THIS EXACT FORMAT:\n"
-            "   Yes: <YES PERCENTAGE>%, No: <NO PERCENTAGE>%, Explanation: <EXPLANATION>\n"
-            "2. Percentages must sum to 100%.\n"
-            "3. Do NOT include any preamble, summary, or additional text.\n"
-            "4. Provide a brief but clear explanation supporting your probabilities.\n\n"
-        )
 
-        response = self._generate_response(prompt)
+def evaluate(analyst, question):
+    # Generate questions using OpenAI API
+    relevant_questions = analyst.get_relevant_questions(question)
+    # Use Exa semantic search to retrieve answers to questions
+    answers = analyst.get_information(relevant_questions)
 
-        match = re.search(r"Yes: (.*?), No: (.*?), Explanation: (.*)", response, re.DOTALL)
+    information = ""
+    for i, v in enumerate(relevant_questions):
+        information += f"INFORMATION {i+1}: \n"
+        information += f"QUESTION {i+1}: {v}\n"
+        information += f"ANSWER {i+1}: {answers[i]} \n\n"
 
-        if match:
-            yes, no, explanation = match.groups()
-            return yes.strip(), no.strip(), explanation.strip()
-        else:
-            raise ValueError(f"Failed to parse LLM response: {response}")
+    information.rstrip("\n")
     
-    def evaluate(self, question):
-        relevant_questions = self.get_relevant_questions(question)
-        answers = self.get_information(relevant_questions)
-
-        information = ""
-        for i, v in enumerate(relevant_questions):
-            information += f"INFORMATION {i+1}: \n"
-            information += f"QUESTION {i+1}: {v}\n"
-            information += f"ANSWER {i+1}: {answers[i]} \n\n"
-
-        information.rstrip("\n")
-        
-        yes, no, explanation = self.get_binary_answer_with_percentage(information, question)
-        return yes, no, explanation
-
-
-predictor = BetPredictor()
+    # Passes relevant Q&As to OpenAI API and generates Y/N percentage with explanation
+    yes, no, explanation = analyst.get_binary_answer_with_percentage(information, question)
+    return yes, no, explanation
 
 def predict(kalshi_ticker, poly_slug):
-    kalshi_market = getMarket(True, kalshi_ticker)
-    poly_market = getMarket(False, poly_slug)
-    question = poly_market['question'] # we use polymarket because they have direct question
-
-    kalshi_real_yes = float(kalshi_market['market']['yes_ask'])
-    kalshi_real_no = float(kalshi_market['market']['no_ask'])
-    
-    poly_values = json.loads(poly_market['outcomePrices'])
-    poly_real_yes, poly_real_no = [float(v) for v in poly_values]
+    kalshi_market = get_kalshi_market(kalshi_ticker)
+    poly_market = get_polymarket_market(poly_slug)
+    question = poly_market.question # we use polymarket because they have direct question
 
     print(f"Question: {question}") 
     
-    pred_yes, pred_no, explanation = predictor.evaluate(question)
+    analyst = BetAnalyst()    
+    pred_yes, pred_no, explanation = evaluate(analyst, question)
 
     match_yes = re.search(r"(\d+)%", pred_yes)
     match_no = re.search(r"(\d+)%", pred_no)
     pred_yes = float(match_yes.group(1))
     pred_no = float(match_no.group(1))
-
-    kalshi_real_yes = float(kalshi_market['market']['yes_ask'])
-    kalshi_real_no = float(kalshi_market['market']['no_ask'])
     
-    poly_values = json.loads(poly_market['outcomePrices'])
-    poly_real_yes, poly_real_no = [float(v)*100 for v in poly_values]
-
-    kalshi_buy_yes = kalshi_real_yes < pred_yes
-    kalshi_buy_no = kalshi_real_no < pred_no
+    kalshi_buy_yes = kalshi_market.yes_price < pred_yes
+    kalshi_buy_no = kalshi_market.no_price < pred_no
 
-    poly_buy_yes = poly_real_yes < pred_yes
-    poly_buy_no = poly_real_no < pred_no
+    poly_buy_yes = poly_market.yes_price < pred_yes
+    poly_buy_no = poly_market.no_price < pred_no
 
     return {
         "kalshi": {
             "buy_yes":kalshi_buy_yes,
             "buy_no": kalshi_buy_no,
-            "edge": max(pred_yes-kalshi_real_yes, pred_no-kalshi_real_no),   
+            "edge": max(pred_yes-kalshi_market.yes_price, pred_no-kalshi_market.no_price),   
         },
         "polymarket": {
             "buy_yes":poly_buy_yes,
             "buy_no": poly_buy_no,
-            "edge": max(pred_yes-poly_real_yes, pred_no-poly_real_no),
+            "edge": max(pred_yes-poly_market.yes_price, pred_no-poly_market.no_price),
         },
         "yes": pred_yes,
         "no": pred_no,

From bbe69f2102cb1689efd723960ed72ef6ae3241d6 Mon Sep 17 00:00:00 2001
From: SeaUrc <nicksong03@gmail.com>
Date: Fri, 31 Oct 2025 19:35:38 -0400
Subject: [PATCH 07/10] minor fixes

---
 .../6-kalshi-bet-predictor/analyst.py         | 71 +++++++++----------
 .../6-kalshi-bet-predictor/compare.py         |  4 +-
 .../6-kalshi-bet-predictor/main.py            |  4 +-
 3 files changed, 38 insertions(+), 41 deletions(-)

diff --git a/8-application-demos/6-kalshi-bet-predictor/analyst.py b/8-application-demos/6-kalshi-bet-predictor/analyst.py
index 171610f3..f50bd3a9 100644
--- a/8-application-demos/6-kalshi-bet-predictor/analyst.py
+++ b/8-application-demos/6-kalshi-bet-predictor/analyst.py
@@ -5,6 +5,7 @@
 import json
 from exa_py import Exa
 from openai import OpenAI
+from pydantic import BaseModel
 
 class BetAnalyst:
     def __init__(self, model_name: str = "gpt-5-nano"):
@@ -25,24 +26,34 @@ def __init__(self, model_name: str = "gpt-5-nano"):
 
         print(f"Using model: {model_name}")
 
-    def _generate_response(self, prompt: str, response_format: dict | None = None) -> str:
-
-        request_args = {
-            "model": self.model_name,
-            "input": prompt,
-        }
-
-        if response_format is not None:
-            request_args["response_format"] = response_format
-
+    def _generate_response(self, prompt: str, text_format = None):
         try:
-            response = self.client.responses.create(**request_args)
+            response = self.client.responses.create(
+                model=self.model_name,
+                input=prompt,
+            )
+
+            output_text = response.output_text.strip()
+            print(f"Generated raw response: {output_text}")
+
+            if text_format is not None:
+                parsed = self.client.responses.parse(
+                    model=self.model_name,
+                    input=[
+                        {
+                            "role": "user",
+                            "content": output_text
+                        },
+                    ],
+                    text_format=text_format,
+                )
+                print(f"Parsed structured response: {parsed.output_parsed}")
+                return parsed.output_parsed
+
+            return output_text
 
-            output = response.output_text.strip()
-            print(f"Generated this response: {output}")
-            return output
         except Exception as e:
-            raise RuntimeError(f"Error during API Call: {e}")
+            raise RuntimeError(f"Error during API call: {e}") from e
     
     def convert_market_to_resolution(self, rules:str) -> str:
         prompt = (
@@ -117,30 +128,16 @@ def get_binary_answer_with_percentage(self, information: str, question: str) ->
             "4. Provide a brief but clear explanation supporting your probabilities.\n\n"
         )
         
-        sentiment_schema = {
-            "type": "object",
-            "properties": {
-                "yes_percentage": {
-                    "type": "number",
-                    "description": "Confidence in positive sentiment (0-100)"
-                },
-                "no_percentage": {
-                    "type": "number",
-                    "description": "Confidence in negative sentiment (0-100)"
-                },
-                "explanation": {
-                    "type": "string",
-                    "description": "Brief explanation for the sentiment analysis"
-                }
-            },
-            "required": ["yes_percentage", "no_percentage", "explanation"]
-        }
-
-        response = self._generate_response(prompt, sentiment_schema)
+        class Response(BaseModel):
+            yes_percentage: str
+            no_percentage: str
+            explanation: str
+
+        response = self._generate_response(prompt, Response)
+        print(f"HELLO {response}")
 
         try:
-            parsed_data = json.loads(response)
-            return parsed_data.get('yes_percentage'), parsed_data.get('no_percentage'), parsed_data.get('explanation')
+            return response.yes_percentage, response.no_percentage, response.explanation
         except json.JSONDecodeError:
             raise RuntimeError(f"Failed to parse output as JSON: {response}")
     
\ No newline at end of file
diff --git a/8-application-demos/6-kalshi-bet-predictor/compare.py b/8-application-demos/6-kalshi-bet-predictor/compare.py
index d0fa5e92..c8a184ce 100644
--- a/8-application-demos/6-kalshi-bet-predictor/compare.py
+++ b/8-application-demos/6-kalshi-bet-predictor/compare.py
@@ -109,12 +109,12 @@ def compute_statistics(results: List[Dict]) -> None:
         print(f"OVERALL: Polymarket has greater total edge (+{advantage:.4f}) cents")
         print(f"OVERALL: Polymarket has an average edge of (+{advantage/total_markets:.4f}) cents per market")
     else:
-        print(f"OVERALL: Both platforms have equal total edge")
+        print("OVERALL: Both platforms have equal total edge")
     print("="*80)
 
 def main():
     CSV_PATH = "<PATH_TO_YOUR_CSV_FILE>"
-    ENDPOINT_URL = '<YOUR_CEREBRIUM_PREDICT_URL>'
+    ENDPOINT_URL = "<YOUR_CEREBRIUM_PREDICT_URL>"
     
     print("Starting async market analysis...")
     results = asyncio.run(analyze_markets_async(CSV_PATH, ENDPOINT_URL))
diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py
index 7fb89bcd..e32a3b29 100644
--- a/8-application-demos/6-kalshi-bet-predictor/main.py
+++ b/8-application-demos/6-kalshi-bet-predictor/main.py
@@ -68,7 +68,7 @@ def evaluate(analyst, question):
     # Generate questions using OpenAI API
     relevant_questions = analyst.get_relevant_questions(question)
     # Use Exa semantic search to retrieve answers to questions
-    answers = analyst.get_information(relevant_questions)
+    answers = analyst.get_web_info(relevant_questions)
 
     information = ""
     for i, v in enumerate(relevant_questions):
@@ -117,4 +117,4 @@ def predict(kalshi_ticker, poly_slug):
         "yes": pred_yes,
         "no": pred_no,
         "explanation": explanation
-    }
\ No newline at end of file
+    }

From a2085e4d432efb0691db0ed4869b9d5d43d1a996 Mon Sep 17 00:00:00 2001
From: SeaUrc <nicksong03@gmail.com>
Date: Fri, 31 Oct 2025 20:15:26 -0400
Subject: [PATCH 08/10] add readme

---
 .../6-kalshi-bet-predictor/README.md          | 55 +++++++++++++++++++
 .../6-kalshi-bet-predictor/cerebrium.toml     |  2 +-
 .../cerebrium_requirements.txt                | 22 ++++++++
 .../6-kalshi-bet-predictor/requirements.txt   | 32 +++++++++++
 4 files changed, 110 insertions(+), 1 deletion(-)
 create mode 100644 8-application-demos/6-kalshi-bet-predictor/README.md
 create mode 100644 8-application-demos/6-kalshi-bet-predictor/cerebrium_requirements.txt

diff --git a/8-application-demos/6-kalshi-bet-predictor/README.md b/8-application-demos/6-kalshi-bet-predictor/README.md
new file mode 100644
index 00000000..80bd5e78
--- /dev/null
+++ b/8-application-demos/6-kalshi-bet-predictor/README.md
@@ -0,0 +1,55 @@
+# Kalshi Bet Predictor
+This repository contains a set of Python scripts designed to find equivalent binary markets across Kalshi and Polymarket, use an LLM via OpenAI and web search via Exa to generate an independent prediction, and then calculate the trading "edge" on both platforms.
+
+Core Components
+---------------
+
+The project is structured around three main scripts:
+
+**`find_equiv_markets.py`**: A utility script to automatically search Kalshi and Polymarket APIs, use a Sentence Transformer and FAISS vector index to find markets with similar titles (i.e., equivalent questions), and save potential matches to a CSV file for manual review.
+    
+**`analyst.py`** and **`main.py`**: These scripts form the core prediction engine.
+    
+*   `analyst.py` handles API interactions with OpenAI (for prediction and question generation) and Exa (for web information retrieval).
+    
+*   `main.py` fetches the current prices from both Kalshi and Polymarket, runs the prediction via `BetAnalyst`, and calculates the trading edge against the model's prediction. This logic is intended to be hosted on Cerebrium
+        
+*   **`compare.py`**: This script reads the market pairs from the CSV, asynchronously calls the hosted prediction endpoint for each pair, and compiles statistics on the trading edge and which platform offers a better opportunity more frequently.
+    
+
+
+Prerequisites
+-------------
+
+You will need API keys for the following services:
+
+*   **OpenAI**: For the large language model (`BetAnalyst` class).
+    
+*   **Exa**: For semantic search/information retrieval (`BetAnalyst` class).
+    
+*   **Cerebrium** (or similar hosting platform): To deploy the `main.py` and `analyst.py` logic as a prediction endpoint.
+    
+
+Create a `.env` file in your project root to store your keys:
+
+``` OPENAI_API_KEY="your_openai_key" EXA_API_KEY="your_exa_key" ```
+
+Setup and Installation
+----------------------
+
+### Dependencies
+
+Install the required Python packages:
+
+```bash
+pip install -r requirements.txt
+```
+
+
+Workflow
+--------
+
+1. Host the prediction service by deploying `main.py` and `analyst.py` on Cerebrium to expose a `predict` endpoint that runs the `BetAnalyst` logic.
+2. Run `find_equiv_markets.py` to identify equivalent Kalshi and Polymarket markets and export the candidate pairs to a CSV file.
+3. Execute `compare.py`, which loads the CSV pairs, calls the hosted prediction endpoint for each pair, and aggregates the edge statistics to highlight the most favorable markets.
+
diff --git a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml
index 0e73bf8d..704861f3 100644
--- a/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml
+++ b/8-application-demos/6-kalshi-bet-predictor/cerebrium.toml
@@ -7,7 +7,7 @@ include = ['./*', 'main.py', 'cerebrium.toml']
 exclude = ['.*']
 
 [cerebrium.dependencies.paths]
-pip = "requirements.txt"
+pip = "cerebrium_requirements.txt"
 
 [cerebrium.hardware]
 cpu = 4
diff --git a/8-application-demos/6-kalshi-bet-predictor/cerebrium_requirements.txt b/8-application-demos/6-kalshi-bet-predictor/cerebrium_requirements.txt
new file mode 100644
index 00000000..dfebee10
--- /dev/null
+++ b/8-application-demos/6-kalshi-bet-predictor/cerebrium_requirements.txt
@@ -0,0 +1,22 @@
+annotated-types==0.7.0
+anyio==4.11.0
+certifi==2025.10.5
+charset-normalizer==3.4.4
+distro==1.9.0
+dotenv==0.9.9
+exa-py==1.16.1
+h11==0.16.0
+httpcore==1.0.9
+httpx==0.28.1
+idna==3.11
+jiter==0.11.1
+openai==2.6.0
+pydantic==2.12.3
+pydantic_core==2.41.4
+python-dotenv==1.1.1
+requests==2.32.5
+sniffio==1.3.1
+tqdm==4.67.1
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.5.0
diff --git a/8-application-demos/6-kalshi-bet-predictor/requirements.txt b/8-application-demos/6-kalshi-bet-predictor/requirements.txt
index dfebee10..6f68e701 100644
--- a/8-application-demos/6-kalshi-bet-predictor/requirements.txt
+++ b/8-application-demos/6-kalshi-bet-predictor/requirements.txt
@@ -1,22 +1,54 @@
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.1
+aiosignal==1.4.0
 annotated-types==0.7.0
 anyio==4.11.0
+attrs==25.4.0
 certifi==2025.10.5
 charset-normalizer==3.4.4
 distro==1.9.0
 dotenv==0.9.9
 exa-py==1.16.1
+faiss-cpu==1.12.0
+filelock==3.20.0
+frozenlist==1.8.0
+fsspec==2025.9.0
 h11==0.16.0
+hf-xet==1.1.10
 httpcore==1.0.9
 httpx==0.28.1
+huggingface-hub==0.35.3
 idna==3.11
+Jinja2==3.1.6
 jiter==0.11.1
+joblib==1.5.2
+MarkupSafe==3.0.3
+mpmath==1.3.0
+multidict==6.7.0
+networkx==3.5
+numpy==2.3.4
 openai==2.6.0
+packaging==25.0
+pillow==12.0.0
+propcache==0.4.1
 pydantic==2.12.3
 pydantic_core==2.41.4
 python-dotenv==1.1.1
+PyYAML==6.0.3
+regex==2025.10.23
 requests==2.32.5
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.16.2
+sentence-transformers==5.1.2
 sniffio==1.3.1
+sympy==1.14.0
+threadpoolctl==3.6.0
+tokenizers==0.22.1
+torch==2.9.0
 tqdm==4.67.1
+transformers==4.57.1
 typing-inspection==0.4.2
 typing_extensions==4.15.0
 urllib3==2.5.0
+yarl==1.22.0

From e0e7ba89a65f87db1e30ae83ae4b5b034badcdb2 Mon Sep 17 00:00:00 2001
From: SeaUrc <nicksong03@gmail.com>
Date: Sat, 1 Nov 2025 14:26:26 -0400
Subject: [PATCH 09/10] remove unecessary function

---
 .../6-kalshi-bet-predictor/analyst.py         | 23 -------------------
 1 file changed, 23 deletions(-)

diff --git a/8-application-demos/6-kalshi-bet-predictor/analyst.py b/8-application-demos/6-kalshi-bet-predictor/analyst.py
index f50bd3a9..2cfaa146 100644
--- a/8-application-demos/6-kalshi-bet-predictor/analyst.py
+++ b/8-application-demos/6-kalshi-bet-predictor/analyst.py
@@ -1,4 +1,3 @@
-
 from typing import Tuple
 from dotenv import load_dotenv
 import os
@@ -54,30 +53,8 @@ def _generate_response(self, prompt: str, text_format = None):
 
         except Exception as e:
             raise RuntimeError(f"Error during API call: {e}") from e
-    
-    def convert_market_to_resolution(self, rules:str) -> str:
-        prompt = (
-            "A market resolution statement usually describes the conditions under which a market would resolve to \"Yes\" or \"No\". "
-            "Your task is to extract and rewrite the core factual question that determines whether the market will resolve to \"Yes.\"\n"
-            "Input format: A sentence describing how a market will resolve.\n"
-            "Example: \"Will the market resolve to Yes if Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025?\"\n"
-            "Output format: A clear, grammatically correct factual question that reflects the underlying condition of the resolution, removing any meta-language about the market or its resolution.\n"
-            "The condition will start with \"if [event], then the market will resolve to \"Yes\" and you must convert it to \"Will [event]?\"\n"
-            "Examples:\n"
-            "Input: \"If Curtis Sliwa drops out of the NYC Mayoral race before Nov 4, 2025, then the market resolves to Yes\"\n"
-            "Output: \"Will Curtis Sliwa drop out of the NYC Mayoral race before Nov 4, 2025?\"\n"
-            "Input: \"If Donald Trump wins the 2024 U.S. presidential election, then the market resolves to Yes\"\n"
-            "Output: \"Will Donald Trump win the 2024 U.S. presidential election?\"\n"
-            "Only output the rewritten factual question with no explanations or commentary.\n"
-            f"STATEMENT: {rules}"
-        )
-
-        raw_response = self._generate_response(prompt)
-
-        return raw_response
 
     def get_relevant_questions(self, question: str) -> list[str]:
-
         prompt = (
             "Based on the following question, generate a list of 5 relevant questions "
             "that one could search online to gather more information. "

From 1d9ad7287ff396d1ece27778373016bcafa8c135 Mon Sep 17 00:00:00 2001
From: SeaUrc <nicksong03@gmail.com>
Date: Wed, 5 Nov 2025 20:57:52 -0500
Subject: [PATCH 10/10] comments + cleanup

---
 .../6-kalshi-bet-predictor/analyst.py         |  9 ++++++++-
 .../6-kalshi-bet-predictor/compare.py         |  8 ++++++--
 .../find_equiv_markets.py                     |  9 ++++++++-
 .../6-kalshi-bet-predictor/main.py            | 20 ++++++-------------
 4 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/8-application-demos/6-kalshi-bet-predictor/analyst.py b/8-application-demos/6-kalshi-bet-predictor/analyst.py
index 2cfaa146..16131c15 100644
--- a/8-application-demos/6-kalshi-bet-predictor/analyst.py
+++ b/8-application-demos/6-kalshi-bet-predictor/analyst.py
@@ -8,7 +8,7 @@
 
 class BetAnalyst:
     def __init__(self, model_name: str = "gpt-5-nano"):
-        
+        """Initializes the API clients and loads necessary API keys from environment variables"""
         load_dotenv()
         
         exa_api_key = os.environ.get("EXA_API_KEY")
@@ -26,6 +26,7 @@ def __init__(self, model_name: str = "gpt-5-nano"):
         print(f"Using model: {model_name}")
 
     def _generate_response(self, prompt: str, text_format = None):
+        """Sends a prompt to the OpenAI API and optionally parses the output into a structured format"""
         try:
             response = self.client.responses.create(
                 model=self.model_name,
@@ -36,6 +37,7 @@ def _generate_response(self, prompt: str, text_format = None):
             print(f"Generated raw response: {output_text}")
 
             if text_format is not None:
+                # If a Pydantic model (text_format) is provided, re-parse the raw output into that structure.
                 parsed = self.client.responses.parse(
                     model=self.model_name,
                     input=[
@@ -55,6 +57,7 @@ def _generate_response(self, prompt: str, text_format = None):
             raise RuntimeError(f"Error during API call: {e}") from e
 
     def get_relevant_questions(self, question: str) -> list[str]:
+        """Generates a list of related search queries based on an initial user question"""
         prompt = (
             "Based on the following question, generate a list of 5 relevant questions "
             "that one could search online to gather more information. "
@@ -76,6 +79,7 @@ def get_relevant_questions(self, question: str) -> list[str]:
         for line in raw_response.split('\n'):
             line = line.strip()
             if line and line[0].isdigit():
+                # Parse lines like "1. What is..." into "What is..."
                 clean_question = line.split('.', 1)[-1].strip()
                 relevant_questions.append(clean_question)
         
@@ -85,11 +89,13 @@ def get_relevant_questions(self, question: str) -> list[str]:
 
     
     def get_web_info(self, questions):
+        """Uses the Exa API to find answers for a list of questions."""
         results = [self.exa.answer(q, text=True) for q in questions]
         answers = [r.answer for r in results]
         return answers
 
     def get_binary_answer_with_percentage(self, information: str, question: str) -> Tuple[str, str, str]:
+        """Analyzes provided information to return a Yes/No probability and explanation for a given question"""
         prompt = (
             "Analyze the provided information below to answer the given binary question. "
             "Based on the information, determine the probability that the answer is 'Yes' or 'No'.\n\n"
@@ -105,6 +111,7 @@ def get_binary_answer_with_percentage(self, information: str, question: str) ->
             "4. Provide a brief but clear explanation supporting your probabilities.\n\n"
         )
         
+        # Define the expected Pydantic structure for the _generate_response 'text_format' parameter
         class Response(BaseModel):
             yes_percentage: str
             no_percentage: str
diff --git a/8-application-demos/6-kalshi-bet-predictor/compare.py b/8-application-demos/6-kalshi-bet-predictor/compare.py
index c8a184ce..fdb1bde0 100644
--- a/8-application-demos/6-kalshi-bet-predictor/compare.py
+++ b/8-application-demos/6-kalshi-bet-predictor/compare.py
@@ -5,6 +5,7 @@
 import aiohttp
 
 def load_markets(csv_path: str) -> List[Tuple[str, str]]:
+    # Loads market pairs (Kalshi ticker, Polymarket slug) from a CSV file.
     markets = []
     with open(csv_path, 'r') as f:
         reader = csv.reader(f)
@@ -16,7 +17,7 @@ def load_markets(csv_path: str) -> List[Tuple[str, str]]:
 
 async def get_market_data(session: aiohttp.ClientSession, kalshi_ticker: str, 
                          poly_slug: str, endpoint_url: str) -> Dict:
-    
+    # Asynchronously fetches and processes edge data for a single market pair from a specified API endpoint.
     payload = json.dumps({
         'kalshi_ticker': kalshi_ticker,
         'poly_slug': poly_slug
@@ -52,6 +53,7 @@ async def get_market_data(session: aiohttp.ClientSession, kalshi_ticker: str,
         return None
 
 async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]:
+    # Orchestrates the asynchronous fetching of data for all markets listed in the CSV.
     markets = load_markets(csv_path)
     
     print(f"Fetching data for {len(markets)} markets all at once...")
@@ -61,10 +63,12 @@ async def analyze_markets_async(csv_path: str, endpoint_url: str) -> List[Dict]:
                 for kalshi_ticker, poly_slug in markets]
         
         results = await asyncio.gather(*tasks)
-    
+        
+    # Filter out None results from failed requests
     return [r for r in results if r is not None]
 
 def compute_statistics(results: List[Dict]) -> None:
+    # Calculates and prints summary statistics comparing Kalshi and Polymarket edge data.
     print("\n" + "="*80)
     print("STATISTICS")
     print("="*80)
diff --git a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py
index e2806174..97093b71 100644
--- a/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py
+++ b/8-application-demos/6-kalshi-bet-predictor/find_equiv_markets.py
@@ -14,12 +14,13 @@
 OUTPUT_FILE = "markets.csv"
 
 def get_kalshi_markets() -> List[Dict[str, Any]]:
+    # Fetches all active, binary markets from the Kalshi API via pagination
     print("Fetching Kalshi markets...")
     markets_list = []
     cursor = ""
     try:
         while True:
-            params = {'limit': 1000}
+            params = {'limit': 1000} # Request 1000 markets per page.
             if cursor:
                 params['cursor'] = cursor
 
@@ -57,11 +58,13 @@ def get_kalshi_markets() -> List[Dict[str, Any]]:
         return []
     
 def get_kalshi_market(ticker):
+    # Retrieves the full title for a single Kalshi market by its ticker
     title = requests.get(f"{KALSHI_API_URL}/{ticker}")
     title = title.json()
     return title['market']['title']
 
 def get_polymarket_markets() -> List[Dict[str, Any]]:
+    # Fetches all active, non-closed markets from the Polymarket API via pagination
     print("Fetching Polymarket markets...")
     markets_list = []
     next_cursor = None
@@ -94,6 +97,7 @@ def get_polymarket_markets() -> List[Dict[str, Any]]:
             print(f"Found {len(markets_list)} active and open markets")
 
             if len(markets_list) > MAX_MARKET_LIMIT or not next_cursor or next_cursor == 'LTE=':
+                # 'LTE=' is Polymarket's cursor value indicating the end of pagination.
                 break
 
         print(f"Found {len(markets_list)} open markets on Polymarket.")
@@ -105,7 +109,9 @@ def get_polymarket_markets() -> List[Dict[str, Any]]:
 
 
 def find_similar_markets(kalshi_markets, poly_markets, threshold=0.9, top_k=TOP_K):
+    # Uses a sentence transformer and FAISS to find markets with similar titles above a given threshold.
     print("\nLoading NLP model...")
+    # 'all-MiniLM-L6-v2' is a small and fast model
     model = SentenceTransformer('all-MiniLM-L6-v2')
 
     kalshi_titles = [m['title'] for m in kalshi_markets]
@@ -145,6 +151,7 @@ def find_similar_markets(kalshi_markets, poly_markets, threshold=0.9, top_k=TOP_
     return potential_matches
     
 def interactive_save(matches: List[Dict[str, Any]]):
+    # Presents potential matches to the user for manual review and saves confirmed pairs to a CSV file
     print("\n--- Review Mode ---")
     print("Press 'y' to save a match, anything else to skip.\n")
     
diff --git a/8-application-demos/6-kalshi-bet-predictor/main.py b/8-application-demos/6-kalshi-bet-predictor/main.py
index e32a3b29..8106e740 100644
--- a/8-application-demos/6-kalshi-bet-predictor/main.py
+++ b/8-application-demos/6-kalshi-bet-predictor/main.py
@@ -20,6 +20,7 @@ def _fetch_api_data(url: str):
         raise RuntimeError(f"Error fetching Kalshi market data: {e}")
         
 def get_kalshi_market(ticker: str) -> MarketData:
+    # Fetches and standardizes market data for a specific Kalshi market ticker
     url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}"
     raw_data = _fetch_api_data(url)
 
@@ -34,6 +35,7 @@ def get_kalshi_market(ticker: str) -> MarketData:
         raise RuntimeError(f"Error parsing Kalshi data structure: {e}") from e
 
 def get_polymarket_market(slug: str) -> MarketData:
+    # Fetches and standardizes market data for a specific Polymarket market slug
     url = f"https://gamma-api.polymarket.com/markets/slug/{slug}" # slug
     raw_data = _fetch_api_data(url)
     
@@ -48,23 +50,11 @@ def get_polymarket_market(slug: str) -> MarketData:
         )
     except (KeyError, TypeError, ValueError) as e:
         raise RuntimeError(f"Error parsing Kalshi data structure: {e}") from e
-        
-
-def getMarket(is_kalshi, ticker):
-    if is_kalshi: 
-        url = f"https://api.elections.kalshi.com/trade-api/v2/markets/{ticker}" # market ticker
-    else:
-        url = f"https://gamma-api.polymarket.com/markets/slug/{ticker}" # slug
-    try:
-        res = requests.get(url)
-        res.raise_for_status()
-        obj = res.json()
-        return obj
-    except requests.exceptions.RequestException as e:
-        raise RuntimeError(f"Error fetching Kalshi market data: {e}")
 
 
 def evaluate(analyst, question):
+    # Uses the BetAnalyst class to research and form an opinion on a market question.
+    
     # Generate questions using OpenAI API
     relevant_questions = analyst.get_relevant_questions(question)
     # Use Exa semantic search to retrieve answers to questions
@@ -83,6 +73,7 @@ def evaluate(analyst, question):
     return yes, no, explanation
 
 def predict(kalshi_ticker, poly_slug):
+    # Orchestrates the entire process of finding arbitrage opportunities
     kalshi_market = get_kalshi_market(kalshi_ticker)
     poly_market = get_polymarket_market(poly_slug)
     question = poly_market.question # we use polymarket because they have direct question
@@ -92,6 +83,7 @@ def predict(kalshi_ticker, poly_slug):
     analyst = BetAnalyst()    
     pred_yes, pred_no, explanation = evaluate(analyst, question)
 
+    # The model returns probabilities as strings (e.g., "80%"), so we parse the number.
     match_yes = re.search(r"(\d+)%", pred_yes)
     match_no = re.search(r"(\d+)%", pred_no)
     pred_yes = float(match_yes.group(1))