From 48d5f8f93e3df2628eace789750f3b57ae8a7dca Mon Sep 17 00:00:00 2001
From: Samad <Mirzasamadahmedbaig@gmail.com>
Date: Sun, 21 Sep 2025 01:59:04 +0500
Subject: [PATCH 01/13] Refactor react_agent.py to improve security and
 maintainability

---
 WebAgent/WebSailor/src/react_agent.py | 97 ++++++++++++++-------------
 1 file changed, 49 insertions(+), 48 deletions(-)
diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py
index 9e5e1db..2517a18 100644
--- a/WebAgent/WebSailor/src/react_agent.py
+++ b/WebAgent/WebSailor/src/react_agent.py
@@ -38,8 +38,8 @@ def __init__(self,
 
     def call_server(self, msgs, max_tries=10):
         # Set OpenAI API key and base URL using vLLM API server
-        openai_api_key = "EMPTY"
-        openai_api_base = "http://127.0.0.1:6001/v1"
+        openai_api_key = os.getenv("OPENAI_API_KEY", "EMPTY")
+        openai_api_base = os.getenv("OPENAI_API_BASE", "http://127.0.0.1:6001/v1")
 
         client = OpenAI(
             api_key=openai_api_key,
@@ -77,15 +77,53 @@ def count_tokens(self, messages, model="gpt-4o"):
         
         return len(tokenizer.encode(full_prompt))
 
+    def _process_tool_call(self, content, messages):
+        if '<tool_call>' in content and '</tool_call>' in content:
+            tool_call = content.split('<tool_call>')[1].split('</tool_call>')[0]
+            try:
+                tool_call = json.loads(tool_call)
+                tool_name = tool_call.get('name', '')
+                tool_args = tool_call.get('arguments', {})
+                result = self._call_tool(tool_name, tool_args)
+            except:
+                result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.'
+            result = "<tool_response>\n" + result + "\n</tool_response>"
+            messages.append({"role": "user", "content": result})
+        return messages
+
+    def _handle_token_limit(self, messages, question, answer, rollout_id):
+        print(f"Token count exceeds limit")
+        
+        messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:<think>your final thinking</think>\n<answer>your answer</answer>"
+        content = self.call_server(messages)
+        messages.append({"role": "assistant", "content": content.strip()})
+        if '<answer>' in content and '</answer>' in content:
+            prediction = messages[-1]['content'].split('<answer>')[1].split('</answer>')[0]
+            termination = 'generate an answer as token limit reached'
+        else:
+            prediction = messages[-1]['content']
+            termination = 'format error: generate an answer as token limit reached'
+        return self._generate_result(question, answer, rollout_id, messages, prediction, termination)
+
+    def _generate_result(self, question, answer, rollout_id, messages, prediction, termination):
+        return {
+            "question": question,
+            "answer": answer,
+            "rollout_id": rollout_id,
+            "messages": messages,
+            "prediction": prediction,
+            "termination": termination
+        }
+
     def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[Message]]:
         self.model=model
-        try:
-            question = data['item']['question']
-        except: 
-            raw_msg = data['item']['messages'][1]["content"] 
-            question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg 
+        question = data.get('item', {}).get('question', '')
+        if not question:
+            raw_msg = data.get('item', {}).get('messages', [{}, {}])[1].get("content", "")
+            question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg
 
-        answer = data['item']['answer']
+        answer = data.get('item', {}).get('answer', '')
+        rollout_id = data.get('rollout_id', '')
         self.user_prompt = user_prompt
         self.user_prompt = self.user_prompt + question
         messages = [{"role": "system", "content": self.system_message}, {"role": "user", "content": self.user_prompt}]
@@ -100,17 +138,7 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M
                 pos = content.find('<tool_response>')
                 content = content[:pos]
             messages.append({"role": "assistant", "content": content.strip()})
-            if '<tool_call>' in content and '</tool_call>' in content:
-                tool_call = content.split('<tool_call>')[1].split('</tool_call>')[0]
-                try:
-                    tool_call = json.loads(tool_call)
-                    tool_name = tool_call.get('name', '')
-                    tool_args = tool_call.get('arguments', {})
-                    result = self._call_tool(tool_name, tool_args)
-                except:
-                    result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.'
-                result = "<tool_response>\n" + result + "\n</tool_response>"
-                messages.append({"role": "user", "content": result})
+            messages = self._process_tool_call(content, messages)
             if '<answer>' in content and '</answer>' in content:
                 termination = 'answer'
                 break
@@ -122,26 +150,7 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M
             print(f"round: {round}, token count: {token_count}")
 
             if token_count > max_tokens:
-                print(f"Token count exceeds limit: {token_count} > {max_tokens}")
-                
-                messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:<think>your final thinking</think>\n<answer>your answer</answer>"
-                content = self.call_server(messages)
-                messages.append({"role": "assistant", "content": content.strip()})
-                if '<answer>' in content and '</answer>' in content:
-                    prediction = messages[-1]['content'].split('<answer>')[1].split('</answer>')[0]
-                    termination = 'generate an answer as token limit reached'
-                else:
-                    prediction = messages[-1]['content']
-                    termination = 'format error: generate an answer as token limit reached'
-                result = {
-                    "question": question,
-                    "answer": answer,
-                    "rollout_id": data['rollout_id'],
-                    "messages": messages,
-                    "prediction": prediction,
-                    "termination": termination
-                }
-                return result
+                return self._handle_token_limit(messages, question, answer, rollout_id)
 
         if '<answer>' in messages[-1]['content']:
             prediction = messages[-1]['content'].split('<answer>')[1].split('</answer>')[0]
@@ -151,12 +160,4 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M
             termination = 'answer not found'
             if num_llm_calls_available == 0:
                 termination = 'exceed available llm calls'
-        result = {
-            "question": question,
-            "answer": answer,
-            "rollout_id": data['rollout_id'],
-            "messages": messages,
-            "prediction": prediction,
-            "termination": termination
-        }
-        return result
+        return self._generate_result(question, answer, rollout_id, messages, prediction, termination)
\ No newline at end of file

From a9256b7dfa1355f42d04cfef79f08c5c592758ad Mon Sep 17 00:00:00 2001
From: Samad <Mirzasamadahmedbaig@gmail.com>
Date: Sun, 21 Sep 2025 11:34:35 +0500
Subject: [PATCH 02/13] chore(ci): add basic Ruff lint workflow

---
 .github/workflows/ci-lint.yml | 20 ++++++++++++++++++++
 commit_message.txt            |  1 +
 2 files changed, 21 insertions(+)
 create mode 100644 .github/workflows/ci-lint.yml
 create mode 100644 commit_message.txt

diff --git a/.github/workflows/ci-lint.yml b/.github/workflows/ci-lint.yml
new file mode 100644
index 0000000..864e85b
--- /dev/null
+++ b/.github/workflows/ci-lint.yml
@@ -0,0 +1,20 @@
+name: CI - Lint
+
+on:
+  pull_request:
+    branches: ["main"]
+  push:
+    branches: ["chore/add-ci-lint"]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install ruff
+        run: pip install ruff==0.5.6
+      - name: Lint
+        run: ruff check --output-format=github .
\ No newline at end of file
diff --git a/commit_message.txt b/commit_message.txt
new file mode 100644
index 0000000..0e40afe
--- /dev/null
+++ b/commit_message.txt
@@ -0,0 +1 @@
+Refactor react_agent.py to improve security and maintainability
\ No newline at end of file

From ee250218b8b6acd6e596bae47ba0f23bf24dfb84 Mon Sep 17 00:00:00 2001
From: MirzaSamadAhmedBaig
 <89132160+Mirza-Samad-Ahmed-Baig@users.noreply.github.com>
Date: Sun, 21 Sep 2025 11:37:06 +0500
Subject: [PATCH 03/13] deleting cm

---
 commit_message.txt | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 commit_message.txt

diff --git a/commit_message.txt b/commit_message.txt
deleted file mode 100644
index 0e40afe..0000000
--- a/commit_message.txt
+++ /dev/null
@@ -1 +0,0 @@
-Refactor react_agent.py to improve security and maintainability
\ No newline at end of file

From d60f2545369347a48f8d4aa1253f6abb9c021f6c Mon Sep 17 00:00:00 2001
From: Samad <Mirzasamadahmedbaig@gmail.com>
Date: Sun, 21 Sep 2025 11:48:06 +0500
Subject: [PATCH 04/13] Fix E722 ruff errors

---
 .../WebDancer/demos/assistant_qwq_chat.py     |  8 ++-----
 WebAgent/WebDancer/demos/llm/oai.py           |  3 +--
 .../WebDancer/demos/tools/private/search.py   |  6 ++---
 .../WebDancer/demos/tools/private/visit.py    |  4 ++--
 WebAgent/WebDancer/demos/utils/logs.py        |  1 -
 WebAgent/WebSailor/src/evaluate.py            |  4 ++--
 WebAgent/WebSailor/src/react_agent.py         |  8 +++----
 WebAgent/WebSailor/src/tool_search.py         |  7 +++---
 WebAgent/WebSailor/src/tool_visit.py          | 11 +++++-----
 WebAgent/WebWalker/src/agent.py               |  4 ++--
 WebAgent/WebWalker/src/app.py                 |  1 -
 evaluation/evaluate_deepsearch_official.py    | 21 ++++++++----------
 inference/file_tools/file_parser.py           |  7 +++---
 inference/file_tools/idp.py                   |  6 ++---
 inference/file_tools/video_agent.py           | 11 ----------
 inference/react_agent.py                      | 15 +++++--------
 inference/run_multi_react.py                  |  2 --
 inference/tool_file.py                        | 22 ++++---------------
 inference/tool_python.py                      | 15 ++++++-------
 inference/tool_scholar.py                     |  7 +++---
 inference/tool_search.py                      |  9 ++------
 inference/tool_visit.py                       | 10 ++-------
 22 files changed, 62 insertions(+), 120 deletions(-)

diff --git a/WebAgent/WebDancer/demos/assistant_qwq_chat.py b/WebAgent/WebDancer/demos/assistant_qwq_chat.py
index 3fa38ff..b0a48a6 100644
--- a/WebAgent/WebDancer/demos/assistant_qwq_chat.py
+++ b/WebAgent/WebDancer/demos/assistant_qwq_chat.py
@@ -2,15 +2,11 @@
 
 import os
 
-from qwen_agent.agents import Assistant
-from qwen_agent.utils.output_beautify import typewriter_print
 
 from demos.agents.search_agent import SearchAgent
 from demos.llm.oai import TextChatAtOAI
-from demos.llm.qwen_dashscope import QwenChatAtDS
 from demos.gui.web_ui import WebUI
 from demos.utils.date import date2str, get_date_now
-from demos.tools import Visit, Search
 
 
 ROOT_RESOURCE = os.path.join(os.path.dirname(__file__), 'resource')
@@ -50,8 +46,8 @@ def make_system_prompt():
         llm=llm_cfg,
         function_list=tools,
         system_message="",
-        name=f'WebDancer',
-        description=f"I am WebDancer, a web information seeking agent, welcome to try!",
+        name='WebDancer',
+        description="I am WebDancer, a web information seeking agent, welcome to try!",
         extra={
             'reasoning': reasoning,
             'max_llm_calls': max_llm_calls,
diff --git a/WebAgent/WebDancer/demos/llm/oai.py b/WebAgent/WebDancer/demos/llm/oai.py
index 0df27fe..53a261d 100644
--- a/WebAgent/WebDancer/demos/llm/oai.py
+++ b/WebAgent/WebDancer/demos/llm/oai.py
@@ -2,7 +2,6 @@
 import json
 import logging
 import os
-from http import HTTPStatus
 from pprint import pformat
 from typing import Dict, Iterator, List, Optional, Literal, Union
 
@@ -15,7 +14,7 @@
 
 from qwen_agent.llm.base import ModelServiceError, register_llm
 from qwen_agent.llm.function_calling import BaseFnCallModel, simulate_response_completion_with_chat
-from qwen_agent.llm.schema import ASSISTANT, Message, FunctionCall
+from qwen_agent.llm.schema import ASSISTANT, Message
 from qwen_agent.log import logger
 
 
diff --git a/WebAgent/WebDancer/demos/tools/private/search.py b/WebAgent/WebDancer/demos/tools/private/search.py
index 29278ec..7f994ee 100644
--- a/WebAgent/WebDancer/demos/tools/private/search.py
+++ b/WebAgent/WebDancer/demos/tools/private/search.py
@@ -30,7 +30,7 @@ def call(self, params: str, **kwargs) -> str:
         try:
             params = self._verify_json_format_args(params)
             query = params["query"][:MAX_MULTIQUERY_NUM]
-        except:
+        except Exception:
             return "[Search] Invalid request format: Input must be a JSON object containing 'query' field"
 
         if isinstance(query, str):
@@ -57,9 +57,9 @@ def google_search(self, query: str) -> str:
                 response = requests.post(url, headers=headers, data=json.dumps(data))
                 results = response.json()
                 break
-            except Exception as e:
+            except Exception:
                 if i == 4:
-                    return f"Google search Timeout, return None, Please try again later."
+                    return "Google search Timeout, return None, Please try again later."
                 continue
     
         if response.status_code != 200:
diff --git a/WebAgent/WebDancer/demos/tools/private/visit.py b/WebAgent/WebDancer/demos/tools/private/visit.py
index b911cc6..fef4abe 100644
--- a/WebAgent/WebDancer/demos/tools/private/visit.py
+++ b/WebAgent/WebDancer/demos/tools/private/visit.py
@@ -62,7 +62,7 @@ def jina_readpage(url: str) -> str:
             else:
                 print(response.text)
                 raise ValueError("jina readpage error")
-        except Exception as e:
+        except Exception:
             if attempt == max_retries - 1:
                 return "[visit] Failed to read page."
             
@@ -97,7 +97,7 @@ def call(self, params: str, **kwargs) -> str:
             params = self._verify_json_format_args(params)
             url = params["url"]
             goal = params["goal"]
-        except:
+        except Exception:
             return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields"
         if isinstance(url, str):
             response = self.readpage(url, goal)
diff --git a/WebAgent/WebDancer/demos/utils/logs.py b/WebAgent/WebDancer/demos/utils/logs.py
index c8cd432..629814d 100644
--- a/WebAgent/WebDancer/demos/utils/logs.py
+++ b/WebAgent/WebDancer/demos/utils/logs.py
@@ -1,6 +1,5 @@
 # coding=utf-8
 import os
-import sys
 import logging
 
 
diff --git a/WebAgent/WebSailor/src/evaluate.py b/WebAgent/WebSailor/src/evaluate.py
index 6b3a22c..0386a54 100644
--- a/WebAgent/WebSailor/src/evaluate.py
+++ b/WebAgent/WebSailor/src/evaluate.py
@@ -289,7 +289,7 @@ def main():
         for i in [1, 2, 3]
     }
 
-    print(f"===========")
+    print("===========")
     print(f"Avg. Pass@3 {avg_pass_at_3}%") 
     print(f"Best Pass@1 {best_pass_at_1}%")  
     print(f"Pass@3 {pass_at_3}%") 
@@ -297,7 +297,7 @@ def main():
     print(f"# Invalid {aggr_statistics['num_invalid']}  # Extra Length {aggr_statistics['extra_length']}") 
     print(f"Avg. Action {aggr_statistics['avg_action']:.2f}  Avg. Visit Action {aggr_statistics['avg_visit_action']:.2f}  Avg. Search Action {aggr_statistics['avg_search_action']:.2f}  Avg. Other Action {aggr_statistics['avg_other_action']:.2f}") 
     print(f"Avg. Answer Length {aggr_statistics['avg_ans_length']:.2f}  Avg. Thinking Length {aggr_statistics['avg_think_length']:.2f}")  
-    print(f"===========" )
+    print("===========" )
 
     overall_eval_dict = {
         "dataset": dataset, 
diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py
index 2517a18..79df9ac 100644
--- a/WebAgent/WebSailor/src/react_agent.py
+++ b/WebAgent/WebSailor/src/react_agent.py
@@ -61,7 +61,7 @@ def call_server(self, msgs, max_tries=10):
             except Exception as e:
                 if attempt == (max_tries - 1):
                     print(f"SGLang server error {e}")
-                    return f"SGLang server error"
+                    return "SGLang server error"
                 continue
         
         return "SGLang server empty response"
@@ -69,7 +69,7 @@ def call_server(self, msgs, max_tries=10):
     def count_tokens(self, messages, model="gpt-4o"):
         try: 
             tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path) 
-        except Exception as e: 
+        except Exception: 
             tokenizer = tiktoken.encoding_for_model(model)
         
         full_message = [Message(**x) for x in messages]
@@ -85,14 +85,14 @@ def _process_tool_call(self, content, messages):
                 tool_name = tool_call.get('name', '')
                 tool_args = tool_call.get('arguments', {})
                 result = self._call_tool(tool_name, tool_args)
-            except:
+            except Exception:
                 result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.'
             result = "<tool_response>\n" + result + "\n</tool_response>"
             messages.append({"role": "user", "content": result})
         return messages
 
     def _handle_token_limit(self, messages, question, answer, rollout_id):
-        print(f"Token count exceeds limit")
+        print("Token count exceeds limit")
         
         messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:<think>your final thinking</think>\n<answer>your answer</answer>"
         content = self.call_server(messages)
diff --git a/WebAgent/WebSailor/src/tool_search.py b/WebAgent/WebSailor/src/tool_search.py
index 3643c53..ba0da49 100644
--- a/WebAgent/WebSailor/src/tool_search.py
+++ b/WebAgent/WebSailor/src/tool_search.py
@@ -3,7 +3,6 @@
 from concurrent.futures import ThreadPoolExecutor
 from typing import List, Union
 import requests
-from qwen_agent.tools.base import BaseTool, register_tool
 import os
 
 SEARCH_API_URL = os.getenv("SEARCH_API_URL")
@@ -50,7 +49,7 @@ def google_search(self, query: str):
             except Exception as e:
                 print(e)
                 if i == 4:
-                    return f"Google search Timeout, return None, Please try again later."
+                    return "Google search Timeout, return None, Please try again later."
         if response.status_code != 200:
             raise Exception(f"Error: {response.status_code} - {response.text}")
 
@@ -82,7 +81,7 @@ def google_search(self, query: str):
 
             content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets)
             return content
-        except:
+        except Exception:
             return f"No results found for '{query}'. Try with a more general query, or remove the year filter."
 
 
@@ -90,7 +89,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str:
         assert GOOGLE_SEARCH_KEY is not None, "Please set the GOOGLE_SEARCH_KEY environment variable."
         try:
             query = params["query"]
-        except:
+        except Exception:
             return "[Search] Invalid request format: Input must be a JSON object containing 'query' field"
         
         if isinstance(query, str):
diff --git a/WebAgent/WebSailor/src/tool_visit.py b/WebAgent/WebSailor/src/tool_visit.py
index ac8e5e6..fbae5e9 100644
--- a/WebAgent/WebSailor/src/tool_visit.py
+++ b/WebAgent/WebSailor/src/tool_visit.py
@@ -6,7 +6,6 @@
 from prompt import EXTRACTOR_PROMPT 
 import os 
 from openai import OpenAI
-import random
 
 
 WEBCONTENT_MAXLENGTH = int(os.getenv("WEBCONTENT_MAXLENGTH", 150000))
@@ -46,7 +45,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str:
         try:
             url = params["url"]
             goal = params["goal"]
-        except:
+        except Exception:
             return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields"
 
         if isinstance(url, str):
@@ -87,14 +86,14 @@ def call_server(self, msgs, max_tries=10):
                 if content:
                     try:
                         json.loads(content)
-                    except:
+                    except Exception:
                         # extract json from string 
                         left = content.find('{')
                         right = content.rfind('}') 
                         if left != -1 and right != -1 and left <= right: 
                             content = content[left:right+1]
                     return content
-            except:
+            except Exception:
                 if attempt == (max_tries - 1):
                     return ""
                 continue
@@ -129,7 +128,7 @@ def jina_readpage(self, url: str) -> str:
                 else:
                     print(response.text)
                     raise ValueError("jina readpage error")
-            except Exception as e:
+            except Exception:
                 if attempt == max_retries - 1:
                     return "[visit] Failed to read page."
                 
@@ -191,7 +190,7 @@ def readpage(self, url: str, goal: str) -> str:
                         # 尝试 parse json
                         raw = json.loads(raw)
                         break
-                    except:
+                    except Exception:
                         raw = self.call_server(messages)
                         parse_retry_times += 1
                 # parse 失败
diff --git a/WebAgent/WebWalker/src/agent.py b/WebAgent/WebWalker/src/agent.py
index c8e0421..c5274e7 100644
--- a/WebAgent/WebWalker/src/agent.py
+++ b/WebAgent/WebWalker/src/agent.py
@@ -63,7 +63,7 @@ def observation_information_extraction(self, query, observation):
                 if "true" in response.choices[0].message.content:
                     try:
                         return json.loads(response.choices[0].message.content)["information"]
-                    except:
+                    except Exception:
                         return response.choices[0].message.content
                 else:
                     return None
@@ -97,7 +97,7 @@ def critic_information(self, query, memory):
                 if "true" in response.choices[0].message.content:
                     try:
                         return json.loads(response.choices[0].message.content)["answer"]
-                    except:
+                    except Exception:
                         return response.choices[0].message.content
                 else:
                     return None
diff --git a/WebAgent/WebWalker/src/app.py b/WebAgent/WebWalker/src/app.py
index 73bc557..7351368 100644
--- a/WebAgent/WebWalker/src/app.py
+++ b/WebAgent/WebWalker/src/app.py
@@ -3,7 +3,6 @@
 import json5
 from agent import WebWalker
 from qwen_agent.tools.base import BaseTool, register_tool
-import os
 import re
 import json
 import asyncio
diff --git a/evaluation/evaluate_deepsearch_official.py b/evaluation/evaluate_deepsearch_official.py
index 036973b..aaf384c 100644
--- a/evaluation/evaluate_deepsearch_official.py
+++ b/evaluation/evaluate_deepsearch_official.py
@@ -1,7 +1,5 @@
-from pydantic import BaseModel
 from openai import OpenAI
 import concurrent.futures
-from typing import Literal
 import litellm 
 import os 
 import argparse
@@ -9,7 +7,6 @@
 import concurrent 
 from tqdm import tqdm 
 from transformers import AutoTokenizer 
-import re 
 from prompt import * 
 import traceback
 import tiktoken
@@ -178,7 +175,7 @@ def count_tokens_with_tokenizer(text, tokenizer):
             return len(tokenizer.encode(text))
         else:  
             return len(tokenizer.encode(text))
-    except:
+    except Exception:
         
         return len(text) // 4
 
@@ -224,7 +221,7 @@ def single_round_statistics(input_file):
 
     try:
         tokenizer = AutoTokenizer.from_pretrained(os.getenv("Qwen2_5_7B_PATH", ""))
-    except Exception as e: 
+    except Exception: 
         tokenizer = tiktoken.encoding_for_model("gpt-4o")
     
     for item in contents:
@@ -303,7 +300,7 @@ def single_round_statistics(input_file):
         try:
             if len(tokenizer.encode("".join([msg["content"] for msg in messages]))) > 30000:
                 num_extra += 1  
-        except:
+        except Exception:
             pass
     
     total_questions = len(contents)
@@ -329,7 +326,7 @@ def calculate_enhanced_statistics(round_results, round_items):
     
     try:
         tokenizer = AutoTokenizer.from_pretrained(os.getenv("Qwen2_5_7B_PATH", ""))
-    except Exception as e: 
+    except Exception: 
         tokenizer = tiktoken.encoding_for_model("gpt-4o")
     
     enhanced_stats = {}
@@ -345,7 +342,7 @@ def calculate_enhanced_statistics(round_results, round_items):
                 continue
             try:
                 matching_item = [item for item in items if item['messages'][1]['content'] == result['question']]
-            except:
+            except Exception:
                 items = [item for item in items if len(item['messages'])>0]
                 matching_item = [item for item in items if item['messages'][1]['content'] == result['question']]
             if not matching_item:
@@ -530,7 +527,7 @@ def main():
         for i in [1, 2, 3]
     }
 
-    print(f"===========")
+    print("===========")
     print(f"Avg. Pass@3 {avg_pass_at_3}%") 
     print(f"Best Pass@1 {best_pass_at_1}%")  
     print(f"Pass@3 {pass_at_3}%") 
@@ -541,18 +538,18 @@ def main():
     print(f"Avg. Action {aggr_statistics['avg_action']:.2f}  Avg. Visit Action {aggr_statistics['avg_visit_action']:.2f}  Avg. Search Action {aggr_statistics['avg_search_action']:.2f}  Avg. Other Action {aggr_statistics['avg_other_action']:.2f}") 
     print(f"Avg. Answer Length {aggr_statistics['avg_ans_length']:.2f}  Avg. Thinking Length {aggr_statistics['avg_think_length']:.2f}")
     enhanced_statistics = calculate_enhanced_statistics(round_results, round_items)
-    print(f"\n=== ADDITIONAL STATISTICS ===")
+    print("\n=== ADDITIONAL STATISTICS ===")
     print(f"Avg. Tool Calls per Question: {aggr_statistics['avg_tool_calls_per_question']:.2f}")
     print(f"Avg. Tool Calls per Question (Correctly Solved): {enhanced_statistics['avg_tool_calls_per_question_correctly_solved']:.2f}")
     print(f"Avg. Assistant Tokens per Question: {aggr_statistics['avg_assistant_tokens_per_question']:.2f}")
     print(f"Avg. Assistant Tokens per Question (Correctly Solved): {enhanced_statistics['avg_assistant_tokens_per_question_correctly_solved']:.2f}")
     print(f"Avg. Assistant Tokens per Message: {aggr_statistics['avg_assistant_tokens_per_message']:.2f}")
     
-    print(f"\n=== TERMINATION FREQUENCIES ===")
+    print("\n=== TERMINATION FREQUENCIES ===")
     for termination_type, frequency in aggr_statistics['termination_freq'].items():
         print(f"{termination_type}: {frequency:.3f}")
     
-    print(f"===========" )
+    print("===========" )
 
     overall_eval_dict = {
         "dataset": dataset, 
diff --git a/inference/file_tools/file_parser.py b/inference/file_tools/file_parser.py
index ecbace9..3410da7 100644
--- a/inference/file_tools/file_parser.py
+++ b/inference/file_tools/file_parser.py
@@ -4,7 +4,6 @@
 import time
 import zipfile
 import math
-from pathlib import Path
 
 from typing import Any, Dict, List, Optional, Union
 from collections import Counter
@@ -17,7 +16,7 @@
 from tabulate import tabulate
 from qwen_agent.log import logger
 from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS
-from qwen_agent.tools.base import BaseTool, register_tool
+from qwen_agent.tools.base import BaseTool
 from qwen_agent.tools.storage import KeyNotExistsError, Storage
 from file_tools.utils import (get_file_type, hash_sha256, is_http_url, get_basename_from_url, 
                                   sanitize_chrome_file_path, save_url_to_local_work_dir)
@@ -522,7 +521,7 @@ def _process_new_file(self, file_path: str) -> Union[str, list]:
             if USE_IDP and file_type in idp_types:
                 try:
                     results = parse_file_by_idp(file_path=file_path)
-                except Exception as e:
+                except Exception:
                     results = self.parsers[file_type](file_path)
             else:
                 results = self.parsers[file_type](file_path)
@@ -536,7 +535,7 @@ def _process_new_file(self, file_path: str) -> Union[str, list]:
                     tokens += para['token']
 
             if not results or not tokens:
-                logger.error(f"Parsing failed: No information was parsed")
+                logger.error("Parsing failed: No information was parsed")
                 raise FileParserError("Document parsing failed")
             else:
                 self._cache_result(file_path, results)
diff --git a/inference/file_tools/idp.py b/inference/file_tools/idp.py
index 71199cb..b77872b 100644
--- a/inference/file_tools/idp.py
+++ b/inference/file_tools/idp.py
@@ -1,12 +1,10 @@
 import os 
-import json
 
 from alibabacloud_docmind_api20220711.client import Client as docmind_api20220711Client
 from alibabacloud_tea_openapi import models as open_api_models
 from alibabacloud_docmind_api20220711 import models as docmind_api20220711_models
 from alibabacloud_tea_util.client import Client as UtilClient
 from alibabacloud_tea_util import models as util_models
-from alibabacloud_credentials.client import Client as CredClient
 
 key = os.environ.get('IDP_KEY_ID')
 secret = os.environ.get('IDP_KEY_SECRET')
@@ -18,7 +16,7 @@ def __init__(self):
             access_key_id=key,
             access_key_secret=secret
         )
-        config.endpoint = f'docmind-api.cn-hangzhou.aliyuncs.com'
+        config.endpoint = 'docmind-api.cn-hangzhou.aliyuncs.com'
         self.client = docmind_api20220711Client(config)
 
     def file_submit_with_url(self, file_url):
@@ -84,7 +82,7 @@ def file_parser_query(self,fid):
                     responses = result
                 else:
                     responses['layouts'].extend(result['layouts'])
-            except Exception as error:
+            except Exception:
                 return None,status_parse
         return responses,status_parse
   	
\ No newline at end of file
diff --git a/inference/file_tools/video_agent.py b/inference/file_tools/video_agent.py
index 7d9b709..37857d9 100644
--- a/inference/file_tools/video_agent.py
+++ b/inference/file_tools/video_agent.py
@@ -9,22 +9,11 @@
 """
 import sys
 import os
-import re
-import copy
 import json
-from typing import Dict, Iterator, List, Literal, Tuple, Union, Any, Optional
-import json5
 import asyncio
-from openai import OpenAI
 
 from qwen_agent.tools.base import BaseTool, register_tool
-from qwen_agent.agents import Assistant
-from qwen_agent.llm import BaseChatModel
-from qwen_agent.llm.schema import ASSISTANT, USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE, SYSTEM, ROLE
 from qwen_agent.tools import BaseTool
-from qwen_agent.log import logger
-from qwen_agent.utils.tokenization_qwen import count_tokens, tokenizer
-from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS
 
 current_dir = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(os.path.dirname(current_dir))  
diff --git a/inference/react_agent.py b/inference/react_agent.py
index 1824666..6c6e57b 100644
--- a/inference/react_agent.py
+++ b/inference/react_agent.py
@@ -1,7 +1,6 @@
-import json
 import json5
 import os
-from typing import Dict, Iterator, List, Literal, Optional, Tuple, Union
+from typing import Dict, List, Optional, Union
 from qwen_agent.llm.schema import Message
 from qwen_agent.utils.utils import build_text_completion_prompt
 from openai import OpenAI, APIError, APIConnectionError, APITimeoutError
@@ -10,10 +9,8 @@
 from datetime import datetime
 from qwen_agent.agents.fncall_agent import FnCallAgent
 from qwen_agent.llm import BaseChatModel
-from qwen_agent.llm.schema import ASSISTANT, DEFAULT_SYSTEM_MESSAGE, Message
 from qwen_agent.settings import MAX_LLM_CALL_PER_RUN
 from qwen_agent.tools import BaseTool
-from qwen_agent.utils.utils import format_as_text_message, merge_generate_cfgs
 from prompt import *
 import time
 import asyncio
@@ -109,12 +106,12 @@ def call_server(self, msgs, planning_port, max_tries=10):
             else:
                 print("Error: All retry attempts have been exhausted. The call has failed.")
         
-        return f"vllm server error!!!"
+        return "vllm server error!!!"
 
     def count_tokens(self, messages, model="gpt-4o"):
         try: 
             tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path) 
-        except Exception as e: 
+        except Exception: 
             tokenizer = tiktoken.encoding_for_model(model)
         
         full_message = [Message(**x) for x in messages]
@@ -126,7 +123,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]:
         self.model=model
         try:
             question = data['item']['question']
-        except: 
+        except Exception: 
             raw_msg = data['item']['messages'][1]["content"] 
             question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg 
 
@@ -168,7 +165,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]:
                         try:
                             code_raw=content.split('<tool_call>')[1].split('</tool_call>')[0].split('<code>')[1].split('</code>')[0].strip()
                             result = TOOL_MAP['PythonInterpreter'].call(code_raw)
-                        except:
+                        except Exception:
                             result = "[Python Interpreter Error]: Formatting error."
 
                     else:
@@ -177,7 +174,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]:
                         tool_args = tool_call.get('arguments', {})
                         result = self.custom_call_tool(tool_name, tool_args)
 
-                except:
+                except Exception:
                     result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.'
                 result = "<tool_response>\n" + result + "\n</tool_response>"
                 # print(result)
diff --git a/inference/run_multi_react.py b/inference/run_multi_react.py
index 1056a0a..4a517ea 100644
--- a/inference/run_multi_react.py
+++ b/inference/run_multi_react.py
@@ -5,9 +5,7 @@
 import concurrent.futures
 from tqdm import tqdm
 import threading
-from datetime import datetime
 from react_agent import MultiTurnReactAgent
-import time
 import math
 
 if __name__ == "__main__":
diff --git a/inference/tool_file.py b/inference/tool_file.py
index 77c4960..7fb1ce9 100644
--- a/inference/tool_file.py
+++ b/inference/tool_file.py
@@ -9,26 +9,12 @@
 """
 import sys
 import os
-import re
-import time
-import copy
 import json
-from typing import Dict, Iterator, List, Literal, Tuple, Union, Any, Optional
-import json5
-import asyncio
-from openai import OpenAI, AsyncOpenAI
-import pdb
-import bdb
-
-from qwen_agent.tools.base import BaseTool, register_tool
-from qwen_agent.agents import Assistant
-from qwen_agent.llm import BaseChatModel
-from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS
-from qwen_agent.llm.schema import ASSISTANT, USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE, SYSTEM, ROLE
+
+from qwen_agent.tools.base import BaseTool
+from qwen_agent.settings import DEFAULT_MAX_INPUT_TOKENS
 from qwen_agent.tools import BaseTool
-from qwen_agent.log import logger
-from qwen_agent.utils.tokenization_qwen import count_tokens, tokenizer
-from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS
+from qwen_agent.utils.tokenization_qwen import count_tokens
 
 current_dir = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(os.path.dirname(current_dir)) 
diff --git a/inference/tool_python.py b/inference/tool_python.py
index e8e5522..d851ee8 100644
--- a/inference/tool_python.py
+++ b/inference/tool_python.py
@@ -1,14 +1,13 @@
 import re
-from typing import Dict, List, Optional, Union
+from typing import Dict, Optional, Union
 import json5
 from qwen_agent.tools.base import BaseToolWithFileAccess, register_tool
 from qwen_agent.utils.utils import extract_code
-from sandbox_fusion import run_code, RunCodeRequest, RunStatus
+from sandbox_fusion import run_code, RunCodeRequest
 from requests.exceptions import Timeout
 import os
 import random
 import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
 
 # Array of sandbox fusion endpoints
 SANDBOX_FUSION_ENDPOINTS = []
@@ -80,12 +79,12 @@ def call(self, params, files= None, timeout = 50, **kwargs) -> str:
                     if code_result.run_result.stderr:
                         result.append(f"stderr:\n{code_result.run_result.stderr}")
                     if code_result.run_result.execution_time >= timeout-1:
-                        result.append(f"[PythonInterpreter Error] TimeoutError: Execution timed out.")
+                        result.append("[PythonInterpreter Error] TimeoutError: Execution timed out.")
                     result = '\n'.join(result)
                     print('SUCCESS RUNNING TOOL')
                     return result if result.strip() else 'Finished execution.'
 
-                except Timeout as e:
+                except Timeout:
                     last_error = f'[Python Interpreter Error] TimeoutError: Execution timed out on endpoint {endpoint}.'
                     print(f"Timeout on attempt {attempt + 1}: {last_error}")
                     if attempt == 4:  # Last attempt
@@ -137,7 +136,7 @@ def call_specific_endpoint(self, params: Union[str, dict], endpoint: str, timeou
             execution_time = end_time - start_time
             return True, result if result.strip() else 'Finished execution.', execution_time
 
-        except Timeout as e:
-            return False, f'[Python Interpreter Error] TimeoutError: Execution timed out.', None
+        except Timeout:
+            return False, '[Python Interpreter Error] TimeoutError: Execution timed out.', None
         except Exception as e:
-            return False, f'[Python Interpreter Error]: {str(e)}', None
+            return False, f'[Python Interpreter Error]: {str(e)}', None
\ No newline at end of file
diff --git a/inference/tool_scholar.py b/inference/tool_scholar.py
index ae021b3..90e97c5 100644
--- a/inference/tool_scholar.py
+++ b/inference/tool_scholar.py
@@ -1,6 +1,5 @@
 import os
 import json
-import requests
 from typing import Union, List
 from qwen_agent.tools.base import BaseTool, register_tool
 from concurrent.futures import ThreadPoolExecutor
@@ -44,7 +43,7 @@ def google_scholar_with_serp(self, query: str):
             except Exception as e:
                 print(e)
                 if i == 4:
-                    return f"Google Scholar Timeout, return None, Please try again later."
+                    return "Google Scholar Timeout, return None, Please try again later."
                 continue
         
 
@@ -87,7 +86,7 @@ def google_scholar_with_serp(self, query: str):
 
             content = f"A Google scholar for '{query}' found {len(web_snippets)} results:\n\n## Scholar Results\n" + "\n\n".join(web_snippets)
             return content
-        except:
+        except Exception:
             return f"No results found for '{query}'. Try with a more general query."
 
 
@@ -96,7 +95,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str:
         try:
             params = self._verify_json_format_args(params)
             query = params["query"]
-        except:
+        except Exception:
             return "[google_scholar] Invalid request format: Input must be a JSON object containing 'query' field"
         
         if isinstance(query, str):
diff --git a/inference/tool_search.py b/inference/tool_search.py
index 1a3f7b5..ea69340 100644
--- a/inference/tool_search.py
+++ b/inference/tool_search.py
@@ -1,13 +1,8 @@
 import json
-from concurrent.futures import ThreadPoolExecutor
 from typing import List, Union
-import requests
 from qwen_agent.tools.base import BaseTool, register_tool
-import asyncio
-from typing import Dict, List, Optional, Union
-import uuid
+from typing import Optional
 import http.client
-import json
 
 import os
 
@@ -68,7 +63,7 @@ def contains_chinese_basic(text: str) -> bool:
             except Exception as e:
                 print(e)
                 if i == 4:
-                    return f"Google search Timeout, return None, Please try again later."
+                    return "Google search Timeout, return None, Please try again later."
                 continue
     
         data = res.read()
diff --git a/inference/tool_visit.py b/inference/tool_visit.py
index 92e4e3a..cdee8bf 100644
--- a/inference/tool_visit.py
+++ b/inference/tool_visit.py
@@ -1,17 +1,11 @@
 import json
 import os
-import signal
-import threading
-from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import List, Union
 import requests
 from qwen_agent.tools.base import BaseTool, register_tool
 from prompt import EXTRACTOR_PROMPT 
 from openai import OpenAI
-import random
-from urllib.parse import urlparse, unquote
 import time 
-from transformers import AutoTokenizer
 import tiktoken
 
 VISIT_SERVER_TIMEOUT = int(os.getenv("VISIT_SERVER_TIMEOUT", 200))
@@ -122,7 +116,7 @@ def call_server(self, msgs, max_retries=2):
                         if left != -1 and right != -1 and left <= right: 
                             content = content[left:right+1]
                     return content
-            except Exception as e:
+            except Exception:
                 # print(e)
                 if attempt == (max_retries - 1):
                     return ""
@@ -159,7 +153,7 @@ def jina_readpage(self, url: str) -> str:
                 else:
                     print(response.text)
                     raise ValueError("jina readpage error")
-            except Exception as e:
+            except Exception:
                 time.sleep(0.5)
                 if attempt == max_retries - 1:
                     return "[visit] Failed to read page."

From ff94125461682fed4c23b3527559d4eb2466c54d Mon Sep 17 00:00:00 2001
From: Samad <Mirzasamadahmedbaig@gmail.com>
Date: Sun, 21 Sep 2025 11:49:20 +0500
Subject: [PATCH 05/13] Fix remaining E722 ruff errors

---
 commit_message.txt       | 1 +
 inference/tool_search.py | 4 ++--
 inference/tool_visit.py  | 6 +++---
 3 files changed, 6 insertions(+), 5 deletions(-)
 create mode 100644 commit_message.txt

diff --git a/commit_message.txt b/commit_message.txt
new file mode 100644
index 0000000..8f1b83e
--- /dev/null
+++ b/commit_message.txt
@@ -0,0 +1 @@
+Fix remaining E722 ruff errors
\ No newline at end of file
diff --git a/inference/tool_search.py b/inference/tool_search.py
index ea69340..499ff0c 100644
--- a/inference/tool_search.py
+++ b/inference/tool_search.py
@@ -96,7 +96,7 @@ def contains_chinese_basic(text: str) -> bool:
 
             content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets)
             return content
-        except:
+        except Exception:
             return f"No results found for '{query}'. Try with a more general query."
 
 
@@ -108,7 +108,7 @@ def search_with_serp(self, query: str):
     def call(self, params: Union[str, dict], **kwargs) -> str:
         try:
             query = params["query"]
-        except:
+        except Exception:
             return "[Search] Invalid request format: Input must be a JSON object containing 'query' field"
         
         if isinstance(query, str):
diff --git a/inference/tool_visit.py b/inference/tool_visit.py
index cdee8bf..97284ff 100644
--- a/inference/tool_visit.py
+++ b/inference/tool_visit.py
@@ -59,7 +59,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str:
         try:
             url = params["url"]
             goal = params["goal"]
-        except:
+        except Exception:
             return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields"
 
         start_time = time.time()
@@ -109,7 +109,7 @@ def call_server(self, msgs, max_retries=2):
                 if content:
                     try:
                         json.loads(content)
-                    except:
+                    except Exception:
                         # extract json from string 
                         left = content.find('{')
                         right = content.rfind('}') 
@@ -221,7 +221,7 @@ def readpage_jina(self, url: str, goal: str) -> str:
                 try:
                     raw = json.loads(raw)
                     break
-                except:
+                except Exception:
                     raw = summary_page_func(messages, max_retries=max_retries)
                     parse_retry_times += 1
             

From 3f517c712e752ef444dd5127f7f82e98b30ba93e Mon Sep 17 00:00:00 2001
From: Samad <Mirzasamadahmedbaig@gmail.com>
Date: Sun, 21 Sep 2025 11:51:19 +0500
Subject: [PATCH 06/13] Fix F403 and F405 ruff errors

---
 WebAgent/WebSailor/src/react_agent.py      | 2 +-
 WebAgent/WebSailor/src/run_multi_react.py  | 2 +-
 WebAgent/WebWalker/src/agent.py            | 2 +-
 WebAgent/WebWalker/src/app.py              | 2 +-
 commit_message.txt                         | 2 +-
 evaluation/evaluate_deepsearch_official.py | 2 +-
 inference/react_agent.py                   | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py
index 79df9ac..f6f1fc6 100644
--- a/WebAgent/WebSailor/src/react_agent.py
+++ b/WebAgent/WebSailor/src/react_agent.py
@@ -12,7 +12,7 @@
 from qwen_agent.tools import BaseTool
 
 
-MAX_LLM_CALL_PER_RUN = int(os.getenv('MAX_LLM_CALL_PER_RUN', 40))
+
 MAX_TOKEN_LENGTH = int(os.getenv('MAX_LENGTH', 31 * 1024 - 500))
 
 print(f'Running with MAX_LLM_CALL_PER_RUN = {MAX_LLM_CALL_PER_RUN}')
diff --git a/WebAgent/WebSailor/src/run_multi_react.py b/WebAgent/WebSailor/src/run_multi_react.py
index d86489a..7057a07 100644
--- a/WebAgent/WebSailor/src/run_multi_react.py
+++ b/WebAgent/WebSailor/src/run_multi_react.py
@@ -8,7 +8,7 @@
 from react_agent import MultiTurnReactAgent
 from prompt import SYSTEM_PROMPT_MULTI, USER_PROMPT
 from tool_search import *
-from tool_visit import * 
+from tool_visit import Visit 
 
 
 if __name__ == "__main__":
diff --git a/WebAgent/WebWalker/src/agent.py b/WebAgent/WebWalker/src/agent.py
index c5274e7..5ec7a82 100644
--- a/WebAgent/WebWalker/src/agent.py
+++ b/WebAgent/WebWalker/src/agent.py
@@ -9,7 +9,7 @@
 from qwen_agent.utils.utils import format_as_text_message, merge_generate_cfgs
 from openai import OpenAI
 import time
-from prompts import *
+from prompts import STSTEM_CRITIIC_INFORMATION, STSTEM_CRITIIC_ANSWER, SYSTEM_EXPLORER
 
 
 TOOL_DESC = (
diff --git a/WebAgent/WebWalker/src/app.py b/WebAgent/WebWalker/src/app.py
index 7351368..5b122e2 100644
--- a/WebAgent/WebWalker/src/app.py
+++ b/WebAgent/WebWalker/src/app.py
@@ -6,7 +6,7 @@
 import re
 import json
 import asyncio
-from utils import *
+from utils import process_url, get_info, get_content_between_a_b
 import base64
 from PIL import Image
 from bs4 import BeautifulSoup
diff --git a/commit_message.txt b/commit_message.txt
index 8f1b83e..baecec5 100644
--- a/commit_message.txt
+++ b/commit_message.txt
@@ -1 +1 @@
-Fix remaining E722 ruff errors
\ No newline at end of file
+Fix F403 and F405 ruff errors
\ No newline at end of file
diff --git a/evaluation/evaluate_deepsearch_official.py b/evaluation/evaluate_deepsearch_official.py
index aaf384c..5b3e6d9 100644
--- a/evaluation/evaluate_deepsearch_official.py
+++ b/evaluation/evaluate_deepsearch_official.py
@@ -7,7 +7,7 @@
 import concurrent 
 from tqdm import tqdm 
 from transformers import AutoTokenizer 
-from prompt import * 
+from prompt import JUDGE_PROMPT_GAIA, JUDGE_PROMPT_XBENCH, JUDGE_PROMPT_BROWSECOMP_OFFICIAL 
 import traceback
 import tiktoken
 import time
diff --git a/inference/react_agent.py b/inference/react_agent.py
index 6c6e57b..bd54192 100644
--- a/inference/react_agent.py
+++ b/inference/react_agent.py
@@ -19,7 +19,7 @@
 from tool_scholar import *
 from tool_python import *
 from tool_search import *
-from tool_visit import *
+from tool_visit import Visit
 
 OBS_START = '<tool_response>'
 OBS_END = '\n</tool_response>'

From d9c0bd453fb7355fe7a26be49e642e837a3d514d Mon Sep 17 00:00:00 2001
From: Samad <Mirzasamadahmedbaig@gmail.com>
Date: Sun, 21 Sep 2025 12:01:47 +0500
Subject: [PATCH 07/13] Revert "Refactor react_agent.py to improve security and
 maintainability"

This reverts commit 48d5f8f93e3df2628eace789750f3b57ae8a7dca.
---
 WebAgent/WebSailor/src/react_agent.py | 99 +++++++++++++--------------
 1 file changed, 49 insertions(+), 50 deletions(-)

diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py
index f6f1fc6..1ec9352 100644
--- a/WebAgent/WebSailor/src/react_agent.py
+++ b/WebAgent/WebSailor/src/react_agent.py
@@ -12,7 +12,7 @@
 from qwen_agent.tools import BaseTool
 
 
-
+MAX_LLM_CALL_PER_RUN = int(os.getenv('MAX_LLM_CALL_PER_RUN', 40))
 MAX_TOKEN_LENGTH = int(os.getenv('MAX_LENGTH', 31 * 1024 - 500))
 
 print(f'Running with MAX_LLM_CALL_PER_RUN = {MAX_LLM_CALL_PER_RUN}')
@@ -38,8 +38,8 @@ def __init__(self,
 
     def call_server(self, msgs, max_tries=10):
         # Set OpenAI API key and base URL using vLLM API server
-        openai_api_key = os.getenv("OPENAI_API_KEY", "EMPTY")
-        openai_api_base = os.getenv("OPENAI_API_BASE", "http://127.0.0.1:6001/v1")
+        openai_api_key = "EMPTY"
+        openai_api_base = "http://127.0.0.1:6001/v1"
 
         client = OpenAI(
             api_key=openai_api_key,
@@ -77,53 +77,15 @@ def count_tokens(self, messages, model="gpt-4o"):
         
         return len(tokenizer.encode(full_prompt))
 
-    def _process_tool_call(self, content, messages):
-        if '<tool_call>' in content and '</tool_call>' in content:
-            tool_call = content.split('<tool_call>')[1].split('</tool_call>')[0]
-            try:
-                tool_call = json.loads(tool_call)
-                tool_name = tool_call.get('name', '')
-                tool_args = tool_call.get('arguments', {})
-                result = self._call_tool(tool_name, tool_args)
-            except Exception:
-                result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.'
-            result = "<tool_response>\n" + result + "\n</tool_response>"
-            messages.append({"role": "user", "content": result})
-        return messages
-
-    def _handle_token_limit(self, messages, question, answer, rollout_id):
-        print("Token count exceeds limit")
-        
-        messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:<think>your final thinking</think>\n<answer>your answer</answer>"
-        content = self.call_server(messages)
-        messages.append({"role": "assistant", "content": content.strip()})
-        if '<answer>' in content and '</answer>' in content:
-            prediction = messages[-1]['content'].split('<answer>')[1].split('</answer>')[0]
-            termination = 'generate an answer as token limit reached'
-        else:
-            prediction = messages[-1]['content']
-            termination = 'format error: generate an answer as token limit reached'
-        return self._generate_result(question, answer, rollout_id, messages, prediction, termination)
-
-    def _generate_result(self, question, answer, rollout_id, messages, prediction, termination):
-        return {
-            "question": question,
-            "answer": answer,
-            "rollout_id": rollout_id,
-            "messages": messages,
-            "prediction": prediction,
-            "termination": termination
-        }
-
     def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[Message]]:
         self.model=model
-        question = data.get('item', {}).get('question', '')
-        if not question:
-            raw_msg = data.get('item', {}).get('messages', [{}, {}])[1].get("content", "")
-            question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg
+        try:
+            question = data['item']['question']
+        except: 
+            raw_msg = data['item']['messages'][1]["content"] 
+            question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg 
 
-        answer = data.get('item', {}).get('answer', '')
-        rollout_id = data.get('rollout_id', '')
+        answer = data['item']['answer']
         self.user_prompt = user_prompt
         self.user_prompt = self.user_prompt + question
         messages = [{"role": "system", "content": self.system_message}, {"role": "user", "content": self.user_prompt}]
@@ -138,7 +100,17 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M
                 pos = content.find('<tool_response>')
                 content = content[:pos]
             messages.append({"role": "assistant", "content": content.strip()})
-            messages = self._process_tool_call(content, messages)
+            if '<tool_call>' in content and '</tool_call>' in content:
+                tool_call = content.split('<tool_call>')[1].split('</tool_call>')[0]
+                try:
+                    tool_call = json.loads(tool_call)
+                    tool_name = tool_call.get('name', '')
+                    tool_args = tool_call.get('arguments', {})
+                    result = self._call_tool(tool_name, tool_args)
+                except:
+                    result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.'
+                result = "<tool_response>\n" + result + "\n</tool_response>"
+                messages.append({"role": "user", "content": result})
             if '<answer>' in content and '</answer>' in content:
                 termination = 'answer'
                 break
@@ -150,7 +122,26 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M
             print(f"round: {round}, token count: {token_count}")
 
             if token_count > max_tokens:
-                return self._handle_token_limit(messages, question, answer, rollout_id)
+                print(f"Token count exceeds limit: {token_count} > {max_tokens}")
+                
+                messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:<think>your final thinking</think>\n<answer>your answer</answer>"
+                content = self.call_server(messages)
+                messages.append({"role": "assistant", "content": content.strip()})
+                if '<answer>' in content and '</answer>' in content:
+                    prediction = messages[-1]['content'].split('<answer>')[1].split('</answer>')[0]
+                    termination = 'generate an answer as token limit reached'
+                else:
+                    prediction = messages[-1]['content']
+                    termination = 'format error: generate an answer as token limit reached'
+                result = {
+                    "question": question,
+                    "answer": answer,
+                    "rollout_id": data['rollout_id'],
+                    "messages": messages,
+                    "prediction": prediction,
+                    "termination": termination
+                }
+                return result
 
         if '<answer>' in messages[-1]['content']:
             prediction = messages[-1]['content'].split('<answer>')[1].split('</answer>')[0]
@@ -160,4 +151,12 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M
             termination = 'answer not found'
             if num_llm_calls_available == 0:
                 termination = 'exceed available llm calls'
-        return self._generate_result(question, answer, rollout_id, messages, prediction, termination)
\ No newline at end of file
+        result = {
+            "question": question,
+            "answer": answer,
+            "rollout_id": data['rollout_id'],
+            "messages": messages,
+            "prediction": prediction,
+            "termination": termination
+        }
+        return result
\ No newline at end of file

From 2c82bf9850c0455f2953f6f109fb96ed66cba978 Mon Sep 17 00:00:00 2001
From: Samad <Mirzasamadahmedbaig@gmail.com>
Date: Sun, 21 Sep 2025 12:02:18 +0500
Subject: [PATCH 08/13] Revert "chore(ci): add basic Ruff lint workflow"

This reverts commit a9256b7dfa1355f42d04cfef79f08c5c592758ad.
---
 .github/workflows/ci-lint.yml | 20 --------------------
 commit_message.txt            |  1 -
 2 files changed, 21 deletions(-)
 delete mode 100644 .github/workflows/ci-lint.yml
 delete mode 100644 commit_message.txt

diff --git a/.github/workflows/ci-lint.yml b/.github/workflows/ci-lint.yml
deleted file mode 100644
index 864e85b..0000000
--- a/.github/workflows/ci-lint.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-name: CI - Lint
-
-on:
-  pull_request:
-    branches: ["main"]
-  push:
-    branches: ["chore/add-ci-lint"]
-
-jobs:
-  lint:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-      - name: Install ruff
-        run: pip install ruff==0.5.6
-      - name: Lint
-        run: ruff check --output-format=github .
\ No newline at end of file
diff --git a/commit_message.txt b/commit_message.txt
deleted file mode 100644
index baecec5..0000000
--- a/commit_message.txt
+++ /dev/null
@@ -1 +0,0 @@
-Fix F403 and F405 ruff errors
\ No newline at end of file

From 1348ac5b28344b6dea29e577e6ae3905c535818b Mon Sep 17 00:00:00 2001
From: Samad <Mirzasamadahmedbaig@gmail.com>
Date: Sun, 21 Sep 2025 12:07:59 +0500
Subject: [PATCH 09/13] Revert "Fix E722 ruff errors"

This reverts commit d60f2545369347a48f8d4aa1253f6abb9c021f6c.
---
 .../WebDancer/demos/assistant_qwq_chat.py     |  8 +++++--
 WebAgent/WebDancer/demos/llm/oai.py           |  3 ++-
 .../WebDancer/demos/tools/private/search.py   |  6 ++---
 .../WebDancer/demos/tools/private/visit.py    |  4 ++--
 WebAgent/WebDancer/demos/utils/logs.py        |  1 +
 WebAgent/WebSailor/src/evaluate.py            |  4 ++--
 WebAgent/WebSailor/src/react_agent.py         |  4 ++--
 WebAgent/WebSailor/src/tool_search.py         |  7 +++---
 WebAgent/WebSailor/src/tool_visit.py          | 11 +++++----
 WebAgent/WebWalker/src/agent.py               | 10 ++++----
 WebAgent/WebWalker/src/app.py                 |  5 ++--
 evaluation/evaluate_deepsearch_official.py    | 23 ++++++++++---------
 inference/file_tools/file_parser.py           |  7 +++---
 inference/file_tools/idp.py                   |  6 +++--
 inference/file_tools/video_agent.py           | 11 +++++++++
 inference/react_agent.py                      | 15 +++++++-----
 inference/run_multi_react.py                  |  2 ++
 inference/tool_file.py                        | 22 ++++++++++++++----
 inference/tool_python.py                      | 15 ++++++------
 inference/tool_scholar.py                     |  7 +++---
 inference/tool_search.py                      |  9 ++++++--
 inference/tool_visit.py                       | 10 ++++++--
 22 files changed, 123 insertions(+), 67 deletions(-)

diff --git a/WebAgent/WebDancer/demos/assistant_qwq_chat.py b/WebAgent/WebDancer/demos/assistant_qwq_chat.py
index b0a48a6..3fa38ff 100644
--- a/WebAgent/WebDancer/demos/assistant_qwq_chat.py
+++ b/WebAgent/WebDancer/demos/assistant_qwq_chat.py
@@ -2,11 +2,15 @@
 
 import os
 
+from qwen_agent.agents import Assistant
+from qwen_agent.utils.output_beautify import typewriter_print
 
 from demos.agents.search_agent import SearchAgent
 from demos.llm.oai import TextChatAtOAI
+from demos.llm.qwen_dashscope import QwenChatAtDS
 from demos.gui.web_ui import WebUI
 from demos.utils.date import date2str, get_date_now
+from demos.tools import Visit, Search
 
 
 ROOT_RESOURCE = os.path.join(os.path.dirname(__file__), 'resource')
@@ -46,8 +50,8 @@ def make_system_prompt():
         llm=llm_cfg,
         function_list=tools,
         system_message="",
-        name='WebDancer',
-        description="I am WebDancer, a web information seeking agent, welcome to try!",
+        name=f'WebDancer',
+        description=f"I am WebDancer, a web information seeking agent, welcome to try!",
         extra={
             'reasoning': reasoning,
             'max_llm_calls': max_llm_calls,
diff --git a/WebAgent/WebDancer/demos/llm/oai.py b/WebAgent/WebDancer/demos/llm/oai.py
index 53a261d..0df27fe 100644
--- a/WebAgent/WebDancer/demos/llm/oai.py
+++ b/WebAgent/WebDancer/demos/llm/oai.py
@@ -2,6 +2,7 @@
 import json
 import logging
 import os
+from http import HTTPStatus
 from pprint import pformat
 from typing import Dict, Iterator, List, Optional, Literal, Union
 
@@ -14,7 +15,7 @@
 
 from qwen_agent.llm.base import ModelServiceError, register_llm
 from qwen_agent.llm.function_calling import BaseFnCallModel, simulate_response_completion_with_chat
-from qwen_agent.llm.schema import ASSISTANT, Message
+from qwen_agent.llm.schema import ASSISTANT, Message, FunctionCall
 from qwen_agent.log import logger
 
 
diff --git a/WebAgent/WebDancer/demos/tools/private/search.py b/WebAgent/WebDancer/demos/tools/private/search.py
index 7f994ee..29278ec 100644
--- a/WebAgent/WebDancer/demos/tools/private/search.py
+++ b/WebAgent/WebDancer/demos/tools/private/search.py
@@ -30,7 +30,7 @@ def call(self, params: str, **kwargs) -> str:
         try:
             params = self._verify_json_format_args(params)
             query = params["query"][:MAX_MULTIQUERY_NUM]
-        except Exception:
+        except:
             return "[Search] Invalid request format: Input must be a JSON object containing 'query' field"
 
         if isinstance(query, str):
@@ -57,9 +57,9 @@ def google_search(self, query: str) -> str:
                 response = requests.post(url, headers=headers, data=json.dumps(data))
                 results = response.json()
                 break
-            except Exception:
+            except Exception as e:
                 if i == 4:
-                    return "Google search Timeout, return None, Please try again later."
+                    return f"Google search Timeout, return None, Please try again later."
                 continue
     
         if response.status_code != 200:
diff --git a/WebAgent/WebDancer/demos/tools/private/visit.py b/WebAgent/WebDancer/demos/tools/private/visit.py
index fef4abe..b911cc6 100644
--- a/WebAgent/WebDancer/demos/tools/private/visit.py
+++ b/WebAgent/WebDancer/demos/tools/private/visit.py
@@ -62,7 +62,7 @@ def jina_readpage(url: str) -> str:
             else:
                 print(response.text)
                 raise ValueError("jina readpage error")
-        except Exception:
+        except Exception as e:
             if attempt == max_retries - 1:
                 return "[visit] Failed to read page."
             
@@ -97,7 +97,7 @@ def call(self, params: str, **kwargs) -> str:
             params = self._verify_json_format_args(params)
             url = params["url"]
             goal = params["goal"]
-        except Exception:
+        except:
             return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields"
         if isinstance(url, str):
             response = self.readpage(url, goal)
diff --git a/WebAgent/WebDancer/demos/utils/logs.py b/WebAgent/WebDancer/demos/utils/logs.py
index 629814d..c8cd432 100644
--- a/WebAgent/WebDancer/demos/utils/logs.py
+++ b/WebAgent/WebDancer/demos/utils/logs.py
@@ -1,5 +1,6 @@
 # coding=utf-8
 import os
+import sys
 import logging
 
 
diff --git a/WebAgent/WebSailor/src/evaluate.py b/WebAgent/WebSailor/src/evaluate.py
index 0386a54..6b3a22c 100644
--- a/WebAgent/WebSailor/src/evaluate.py
+++ b/WebAgent/WebSailor/src/evaluate.py
@@ -289,7 +289,7 @@ def main():
         for i in [1, 2, 3]
     }
 
-    print("===========")
+    print(f"===========")
     print(f"Avg. Pass@3 {avg_pass_at_3}%") 
     print(f"Best Pass@1 {best_pass_at_1}%")  
     print(f"Pass@3 {pass_at_3}%") 
@@ -297,7 +297,7 @@ def main():
     print(f"# Invalid {aggr_statistics['num_invalid']}  # Extra Length {aggr_statistics['extra_length']}") 
     print(f"Avg. Action {aggr_statistics['avg_action']:.2f}  Avg. Visit Action {aggr_statistics['avg_visit_action']:.2f}  Avg. Search Action {aggr_statistics['avg_search_action']:.2f}  Avg. Other Action {aggr_statistics['avg_other_action']:.2f}") 
     print(f"Avg. Answer Length {aggr_statistics['avg_ans_length']:.2f}  Avg. Thinking Length {aggr_statistics['avg_think_length']:.2f}")  
-    print("===========" )
+    print(f"===========" )
 
     overall_eval_dict = {
         "dataset": dataset, 
diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py
index 1ec9352..7a95051 100644
--- a/WebAgent/WebSailor/src/react_agent.py
+++ b/WebAgent/WebSailor/src/react_agent.py
@@ -61,7 +61,7 @@ def call_server(self, msgs, max_tries=10):
             except Exception as e:
                 if attempt == (max_tries - 1):
                     print(f"SGLang server error {e}")
-                    return "SGLang server error"
+                    return f"SGLang server error"
                 continue
         
         return "SGLang server empty response"
@@ -69,7 +69,7 @@ def call_server(self, msgs, max_tries=10):
     def count_tokens(self, messages, model="gpt-4o"):
         try: 
             tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path) 
-        except Exception: 
+        except: 
             tokenizer = tiktoken.encoding_for_model(model)
         
         full_message = [Message(**x) for x in messages]
diff --git a/WebAgent/WebSailor/src/tool_search.py b/WebAgent/WebSailor/src/tool_search.py
index ba0da49..3643c53 100644
--- a/WebAgent/WebSailor/src/tool_search.py
+++ b/WebAgent/WebSailor/src/tool_search.py
@@ -3,6 +3,7 @@
 from concurrent.futures import ThreadPoolExecutor
 from typing import List, Union
 import requests
+from qwen_agent.tools.base import BaseTool, register_tool
 import os
 
 SEARCH_API_URL = os.getenv("SEARCH_API_URL")
@@ -49,7 +50,7 @@ def google_search(self, query: str):
             except Exception as e:
                 print(e)
                 if i == 4:
-                    return "Google search Timeout, return None, Please try again later."
+                    return f"Google search Timeout, return None, Please try again later."
         if response.status_code != 200:
             raise Exception(f"Error: {response.status_code} - {response.text}")
 
@@ -81,7 +82,7 @@ def google_search(self, query: str):
 
             content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets)
             return content
-        except Exception:
+        except:
             return f"No results found for '{query}'. Try with a more general query, or remove the year filter."
 
 
@@ -89,7 +90,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str:
         assert GOOGLE_SEARCH_KEY is not None, "Please set the GOOGLE_SEARCH_KEY environment variable."
         try:
             query = params["query"]
-        except Exception:
+        except:
             return "[Search] Invalid request format: Input must be a JSON object containing 'query' field"
         
         if isinstance(query, str):
diff --git a/WebAgent/WebSailor/src/tool_visit.py b/WebAgent/WebSailor/src/tool_visit.py
index fbae5e9..ac8e5e6 100644
--- a/WebAgent/WebSailor/src/tool_visit.py
+++ b/WebAgent/WebSailor/src/tool_visit.py
@@ -6,6 +6,7 @@
 from prompt import EXTRACTOR_PROMPT 
 import os 
 from openai import OpenAI
+import random
 
 
 WEBCONTENT_MAXLENGTH = int(os.getenv("WEBCONTENT_MAXLENGTH", 150000))
@@ -45,7 +46,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str:
         try:
             url = params["url"]
             goal = params["goal"]
-        except Exception:
+        except:
             return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields"
 
         if isinstance(url, str):
@@ -86,14 +87,14 @@ def call_server(self, msgs, max_tries=10):
                 if content:
                     try:
                         json.loads(content)
-                    except Exception:
+                    except:
                         # extract json from string 
                         left = content.find('{')
                         right = content.rfind('}') 
                         if left != -1 and right != -1 and left <= right: 
                             content = content[left:right+1]
                     return content
-            except Exception:
+            except:
                 if attempt == (max_tries - 1):
                     return ""
                 continue
@@ -128,7 +129,7 @@ def jina_readpage(self, url: str) -> str:
                 else:
                     print(response.text)
                     raise ValueError("jina readpage error")
-            except Exception:
+            except Exception as e:
                 if attempt == max_retries - 1:
                     return "[visit] Failed to read page."
                 
@@ -190,7 +191,7 @@ def readpage(self, url: str, goal: str) -> str:
                         # 尝试 parse json
                         raw = json.loads(raw)
                         break
-                    except Exception:
+                    except:
                         raw = self.call_server(messages)
                         parse_retry_times += 1
                 # parse 失败
diff --git a/WebAgent/WebWalker/src/agent.py b/WebAgent/WebWalker/src/agent.py
index 5ec7a82..02ffeb2 100644
--- a/WebAgent/WebWalker/src/agent.py
+++ b/WebAgent/WebWalker/src/agent.py
@@ -63,7 +63,7 @@ def observation_information_extraction(self, query, observation):
                 if "true" in response.choices[0].message.content:
                     try:
                         return json.loads(response.choices[0].message.content)["information"]
-                    except Exception:
+                    except:
                         return response.choices[0].message.content
                 else:
                     return None
@@ -97,7 +97,7 @@ def critic_information(self, query, memory):
                 if "true" in response.choices[0].message.content:
                     try:
                         return json.loads(response.choices[0].message.content)["answer"]
-                    except Exception:
+                    except:
                         return response.choices[0].message.content
                 else:
                     return None
@@ -140,9 +140,9 @@ def _run(self, messages: List[Message], lang: Literal['en', 'zh'] = 'en', **kwar
             if stage1:
                 self.momery.append(stage1+"\n")
                 if len(self.momery) > 1:
-                    yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"}")]
+                    yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"")}]
                 else:
-                    yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"}")]
+                    yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"")}]
                 stage2 = self.critic_information(query, self.momery)
                 if stage2:
                     response = f'Final Answer: {stage2}'
@@ -205,4 +205,4 @@ def _detect_tool(self, text: str) -> Tuple[bool, str, str, str]:
             func_name = text[i + len(special_func_token):j].strip()
             func_args = text[j + len(special_args_token):k].strip()
             text = text[:i]  # Return the response before tool call, i.e., `Thought`
-        return (func_name is not None), func_name, func_args, text
+        return (func_name is not None), func_name, func_args, text
\ No newline at end of file
diff --git a/WebAgent/WebWalker/src/app.py b/WebAgent/WebWalker/src/app.py
index 5b122e2..f7fba53 100644
--- a/WebAgent/WebWalker/src/app.py
+++ b/WebAgent/WebWalker/src/app.py
@@ -3,10 +3,11 @@
 import json5
 from agent import WebWalker
 from qwen_agent.tools.base import BaseTool, register_tool
+import os
 import re
 import json
 import asyncio
-from utils import process_url, get_info, get_content_between_a_b
+from utils import *
 import base64
 from PIL import Image
 from bs4 import BeautifulSoup
@@ -267,4 +268,4 @@ def call(self, params: str, **kwargs) -> str:
             else:
                 return "The button can not be clicked, please retry a new botton!"
         else:
-            return "Your input is invalid, plase output the action input correctly!"
+            return "Your input is invalid, plase output the action input correctly!"}
\ No newline at end of file
diff --git a/evaluation/evaluate_deepsearch_official.py b/evaluation/evaluate_deepsearch_official.py
index 5b3e6d9..b348f66 100644
--- a/evaluation/evaluate_deepsearch_official.py
+++ b/evaluation/evaluate_deepsearch_official.py
@@ -7,7 +7,8 @@
 import concurrent 
 from tqdm import tqdm 
 from transformers import AutoTokenizer 
-from prompt import JUDGE_PROMPT_GAIA, JUDGE_PROMPT_XBENCH, JUDGE_PROMPT_BROWSECOMP_OFFICIAL 
+import re 
+from prompt import * 
 import traceback
 import tiktoken
 import time
@@ -175,7 +176,7 @@ def count_tokens_with_tokenizer(text, tokenizer):
             return len(tokenizer.encode(text))
         else:  
             return len(tokenizer.encode(text))
-    except Exception:
+    except:
         
         return len(text) // 4
 
@@ -186,7 +187,7 @@ def aggregate_statistics(round1_file, round2_file, round3_file):
     round3_stats = single_round_statistics(round3_file)
     
     keys = round1_stats.keys()  
-    avg_stats = {} 
+    avg_stats = {}
     for key in keys: 
         if isinstance(round1_stats[key], dict):
             
@@ -300,7 +301,7 @@ def single_round_statistics(input_file):
         try:
             if len(tokenizer.encode("".join([msg["content"] for msg in messages]))) > 30000:
                 num_extra += 1  
-        except Exception:
+        except:
             pass
     
     total_questions = len(contents)
@@ -342,7 +343,7 @@ def calculate_enhanced_statistics(round_results, round_items):
                 continue
             try:
                 matching_item = [item for item in items if item['messages'][1]['content'] == result['question']]
-            except Exception:
+            except:
                 items = [item for item in items if len(item['messages'])>0]
                 matching_item = [item for item in items if item['messages'][1]['content'] == result['question']]
             if not matching_item:
@@ -416,7 +417,7 @@ def calculate_best_pass_at_1(query_results):
     round_correct = {round_name: 0 for round_name in ["round1", "round2", "round3"]}
 
     for query, results in query_results.items():
-        for round_name in ["round1", "round2", "round3"]: 
+        for round_name in ["round1", "round2", "round3"]:
             if results[round_name] == "Correct":  
                 round_correct[round_name] += 1 
 
@@ -527,7 +528,7 @@ def main():
         for i in [1, 2, 3]
     }
 
-    print("===========")
+    print(f"===========")
     print(f"Avg. Pass@3 {avg_pass_at_3}%") 
     print(f"Best Pass@1 {best_pass_at_1}%")  
     print(f"Pass@3 {pass_at_3}%") 
@@ -538,18 +539,18 @@ def main():
     print(f"Avg. Action {aggr_statistics['avg_action']:.2f}  Avg. Visit Action {aggr_statistics['avg_visit_action']:.2f}  Avg. Search Action {aggr_statistics['avg_search_action']:.2f}  Avg. Other Action {aggr_statistics['avg_other_action']:.2f}") 
     print(f"Avg. Answer Length {aggr_statistics['avg_ans_length']:.2f}  Avg. Thinking Length {aggr_statistics['avg_think_length']:.2f}")
     enhanced_statistics = calculate_enhanced_statistics(round_results, round_items)
-    print("\n=== ADDITIONAL STATISTICS ===")
+    print(f"\n=== ADDITIONAL STATISTICS ===")
     print(f"Avg. Tool Calls per Question: {aggr_statistics['avg_tool_calls_per_question']:.2f}")
     print(f"Avg. Tool Calls per Question (Correctly Solved): {enhanced_statistics['avg_tool_calls_per_question_correctly_solved']:.2f}")
     print(f"Avg. Assistant Tokens per Question: {aggr_statistics['avg_assistant_tokens_per_question']:.2f}")
     print(f"Avg. Assistant Tokens per Question (Correctly Solved): {enhanced_statistics['avg_assistant_tokens_per_question_correctly_solved']:.2f}")
     print(f"Avg. Assistant Tokens per Message: {aggr_statistics['avg_assistant_tokens_per_message']:.2f}")
     
-    print("\n=== TERMINATION FREQUENCIES ===")
+    print(f"\n=== TERMINATION FREQUENCIES ===")
     for termination_type, frequency in aggr_statistics['termination_freq'].items():
         print(f"{termination_type}: {frequency:.3f}")
     
-    print("===========" )
+    print(f"===========" )
 
     overall_eval_dict = {
         "dataset": dataset, 
@@ -578,4 +579,4 @@ def main():
     except Exception as e:
         error_str = traceback.format_exc()
         print(f"Evaluation Failed: {e}") 
-        print("Trace Back", error_str)
+        print("Trace Back", error_str)
\ No newline at end of file
diff --git a/inference/file_tools/file_parser.py b/inference/file_tools/file_parser.py
index 3410da7..ecbace9 100644
--- a/inference/file_tools/file_parser.py
+++ b/inference/file_tools/file_parser.py
@@ -4,6 +4,7 @@
 import time
 import zipfile
 import math
+from pathlib import Path
 
 from typing import Any, Dict, List, Optional, Union
 from collections import Counter
@@ -16,7 +17,7 @@
 from tabulate import tabulate
 from qwen_agent.log import logger
 from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS
-from qwen_agent.tools.base import BaseTool
+from qwen_agent.tools.base import BaseTool, register_tool
 from qwen_agent.tools.storage import KeyNotExistsError, Storage
 from file_tools.utils import (get_file_type, hash_sha256, is_http_url, get_basename_from_url, 
                                   sanitize_chrome_file_path, save_url_to_local_work_dir)
@@ -521,7 +522,7 @@ def _process_new_file(self, file_path: str) -> Union[str, list]:
             if USE_IDP and file_type in idp_types:
                 try:
                     results = parse_file_by_idp(file_path=file_path)
-                except Exception:
+                except Exception as e:
                     results = self.parsers[file_type](file_path)
             else:
                 results = self.parsers[file_type](file_path)
@@ -535,7 +536,7 @@ def _process_new_file(self, file_path: str) -> Union[str, list]:
                     tokens += para['token']
 
             if not results or not tokens:
-                logger.error("Parsing failed: No information was parsed")
+                logger.error(f"Parsing failed: No information was parsed")
                 raise FileParserError("Document parsing failed")
             else:
                 self._cache_result(file_path, results)
diff --git a/inference/file_tools/idp.py b/inference/file_tools/idp.py
index b77872b..71199cb 100644
--- a/inference/file_tools/idp.py
+++ b/inference/file_tools/idp.py
@@ -1,10 +1,12 @@
 import os 
+import json
 
 from alibabacloud_docmind_api20220711.client import Client as docmind_api20220711Client
 from alibabacloud_tea_openapi import models as open_api_models
 from alibabacloud_docmind_api20220711 import models as docmind_api20220711_models
 from alibabacloud_tea_util.client import Client as UtilClient
 from alibabacloud_tea_util import models as util_models
+from alibabacloud_credentials.client import Client as CredClient
 
 key = os.environ.get('IDP_KEY_ID')
 secret = os.environ.get('IDP_KEY_SECRET')
@@ -16,7 +18,7 @@ def __init__(self):
             access_key_id=key,
             access_key_secret=secret
         )
-        config.endpoint = 'docmind-api.cn-hangzhou.aliyuncs.com'
+        config.endpoint = f'docmind-api.cn-hangzhou.aliyuncs.com'
         self.client = docmind_api20220711Client(config)
 
     def file_submit_with_url(self, file_url):
@@ -82,7 +84,7 @@ def file_parser_query(self,fid):
                     responses = result
                 else:
                     responses['layouts'].extend(result['layouts'])
-            except Exception:
+            except Exception as error:
                 return None,status_parse
         return responses,status_parse
   	
\ No newline at end of file
diff --git a/inference/file_tools/video_agent.py b/inference/file_tools/video_agent.py
index 37857d9..7d9b709 100644
--- a/inference/file_tools/video_agent.py
+++ b/inference/file_tools/video_agent.py
@@ -9,11 +9,22 @@
 """
 import sys
 import os
+import re
+import copy
 import json
+from typing import Dict, Iterator, List, Literal, Tuple, Union, Any, Optional
+import json5
 import asyncio
+from openai import OpenAI
 
 from qwen_agent.tools.base import BaseTool, register_tool
+from qwen_agent.agents import Assistant
+from qwen_agent.llm import BaseChatModel
+from qwen_agent.llm.schema import ASSISTANT, USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE, SYSTEM, ROLE
 from qwen_agent.tools import BaseTool
+from qwen_agent.log import logger
+from qwen_agent.utils.tokenization_qwen import count_tokens, tokenizer
+from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS
 
 current_dir = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(os.path.dirname(current_dir))  
diff --git a/inference/react_agent.py b/inference/react_agent.py
index bd54192..8c26a35 100644
--- a/inference/react_agent.py
+++ b/inference/react_agent.py
@@ -1,6 +1,7 @@
+import json
 import json5
 import os
-from typing import Dict, List, Optional, Union
+from typing import Dict, Iterator, List, Literal, Optional, Tuple, Union
 from qwen_agent.llm.schema import Message
 from qwen_agent.utils.utils import build_text_completion_prompt
 from openai import OpenAI, APIError, APIConnectionError, APITimeoutError
@@ -9,8 +10,10 @@
 from datetime import datetime
 from qwen_agent.agents.fncall_agent import FnCallAgent
 from qwen_agent.llm import BaseChatModel
+from qwen_agent.llm.schema import ASSISTANT, DEFAULT_SYSTEM_MESSAGE, Message
 from qwen_agent.settings import MAX_LLM_CALL_PER_RUN
 from qwen_agent.tools import BaseTool
+from qwen_agent.utils.utils import format_as_text_message, merge_generate_cfgs
 from prompt import *
 import time
 import asyncio
@@ -106,12 +109,12 @@ def call_server(self, msgs, planning_port, max_tries=10):
             else:
                 print("Error: All retry attempts have been exhausted. The call has failed.")
         
-        return "vllm server error!!!"
+        return f"vllm server error!!!"
 
     def count_tokens(self, messages, model="gpt-4o"):
         try: 
             tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path) 
-        except Exception: 
+        except Exception as e: 
             tokenizer = tiktoken.encoding_for_model(model)
         
         full_message = [Message(**x) for x in messages]
@@ -123,7 +126,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]:
         self.model=model
         try:
             question = data['item']['question']
-        except Exception: 
+        except: 
             raw_msg = data['item']['messages'][1]["content"] 
             question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg 
 
@@ -165,7 +168,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]:
                         try:
                             code_raw=content.split('<tool_call>')[1].split('</tool_call>')[0].split('<code>')[1].split('</code>')[0].strip()
                             result = TOOL_MAP['PythonInterpreter'].call(code_raw)
-                        except Exception:
+                        except:
                             result = "[Python Interpreter Error]: Formatting error."
 
                     else:
@@ -174,7 +177,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]:
                         tool_args = tool_call.get('arguments', {})
                         result = self.custom_call_tool(tool_name, tool_args)
 
-                except Exception:
+                except:
                     result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.'
                 result = "<tool_response>\n" + result + "\n</tool_response>"
                 # print(result)
diff --git a/inference/run_multi_react.py b/inference/run_multi_react.py
index 4a517ea..1056a0a 100644
--- a/inference/run_multi_react.py
+++ b/inference/run_multi_react.py
@@ -5,7 +5,9 @@
 import concurrent.futures
 from tqdm import tqdm
 import threading
+from datetime import datetime
 from react_agent import MultiTurnReactAgent
+import time
 import math
 
 if __name__ == "__main__":
diff --git a/inference/tool_file.py b/inference/tool_file.py
index 7fb1ce9..77c4960 100644
--- a/inference/tool_file.py
+++ b/inference/tool_file.py
@@ -9,12 +9,26 @@
 """
 import sys
 import os
+import re
+import time
+import copy
 import json
-
-from qwen_agent.tools.base import BaseTool
-from qwen_agent.settings import DEFAULT_MAX_INPUT_TOKENS
+from typing import Dict, Iterator, List, Literal, Tuple, Union, Any, Optional
+import json5
+import asyncio
+from openai import OpenAI, AsyncOpenAI
+import pdb
+import bdb
+
+from qwen_agent.tools.base import BaseTool, register_tool
+from qwen_agent.agents import Assistant
+from qwen_agent.llm import BaseChatModel
+from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS
+from qwen_agent.llm.schema import ASSISTANT, USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE, SYSTEM, ROLE
 from qwen_agent.tools import BaseTool
-from qwen_agent.utils.tokenization_qwen import count_tokens
+from qwen_agent.log import logger
+from qwen_agent.utils.tokenization_qwen import count_tokens, tokenizer
+from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS
 
 current_dir = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(os.path.dirname(current_dir)) 
diff --git a/inference/tool_python.py b/inference/tool_python.py
index d851ee8..e8e5522 100644
--- a/inference/tool_python.py
+++ b/inference/tool_python.py
@@ -1,13 +1,14 @@
 import re
-from typing import Dict, Optional, Union
+from typing import Dict, List, Optional, Union
 import json5
 from qwen_agent.tools.base import BaseToolWithFileAccess, register_tool
 from qwen_agent.utils.utils import extract_code
-from sandbox_fusion import run_code, RunCodeRequest
+from sandbox_fusion import run_code, RunCodeRequest, RunStatus
 from requests.exceptions import Timeout
 import os
 import random
 import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
 
 # Array of sandbox fusion endpoints
 SANDBOX_FUSION_ENDPOINTS = []
@@ -79,12 +80,12 @@ def call(self, params, files= None, timeout = 50, **kwargs) -> str:
                     if code_result.run_result.stderr:
                         result.append(f"stderr:\n{code_result.run_result.stderr}")
                     if code_result.run_result.execution_time >= timeout-1:
-                        result.append("[PythonInterpreter Error] TimeoutError: Execution timed out.")
+                        result.append(f"[PythonInterpreter Error] TimeoutError: Execution timed out.")
                     result = '\n'.join(result)
                     print('SUCCESS RUNNING TOOL')
                     return result if result.strip() else 'Finished execution.'
 
-                except Timeout:
+                except Timeout as e:
                     last_error = f'[Python Interpreter Error] TimeoutError: Execution timed out on endpoint {endpoint}.'
                     print(f"Timeout on attempt {attempt + 1}: {last_error}")
                     if attempt == 4:  # Last attempt
@@ -136,7 +137,7 @@ def call_specific_endpoint(self, params: Union[str, dict], endpoint: str, timeou
             execution_time = end_time - start_time
             return True, result if result.strip() else 'Finished execution.', execution_time
 
-        except Timeout:
-            return False, '[Python Interpreter Error] TimeoutError: Execution timed out.', None
+        except Timeout as e:
+            return False, f'[Python Interpreter Error] TimeoutError: Execution timed out.', None
         except Exception as e:
-            return False, f'[Python Interpreter Error]: {str(e)}', None
\ No newline at end of file
+            return False, f'[Python Interpreter Error]: {str(e)}', None
diff --git a/inference/tool_scholar.py b/inference/tool_scholar.py
index 90e97c5..ae021b3 100644
--- a/inference/tool_scholar.py
+++ b/inference/tool_scholar.py
@@ -1,5 +1,6 @@
 import os
 import json
+import requests
 from typing import Union, List
 from qwen_agent.tools.base import BaseTool, register_tool
 from concurrent.futures import ThreadPoolExecutor
@@ -43,7 +44,7 @@ def google_scholar_with_serp(self, query: str):
             except Exception as e:
                 print(e)
                 if i == 4:
-                    return "Google Scholar Timeout, return None, Please try again later."
+                    return f"Google Scholar Timeout, return None, Please try again later."
                 continue
         
 
@@ -86,7 +87,7 @@ def google_scholar_with_serp(self, query: str):
 
             content = f"A Google scholar for '{query}' found {len(web_snippets)} results:\n\n## Scholar Results\n" + "\n\n".join(web_snippets)
             return content
-        except Exception:
+        except:
             return f"No results found for '{query}'. Try with a more general query."
 
 
@@ -95,7 +96,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str:
         try:
             params = self._verify_json_format_args(params)
             query = params["query"]
-        except Exception:
+        except:
             return "[google_scholar] Invalid request format: Input must be a JSON object containing 'query' field"
         
         if isinstance(query, str):
diff --git a/inference/tool_search.py b/inference/tool_search.py
index 499ff0c..d2289df 100644
--- a/inference/tool_search.py
+++ b/inference/tool_search.py
@@ -1,8 +1,13 @@
 import json
+from concurrent.futures import ThreadPoolExecutor
 from typing import List, Union
+import requests
 from qwen_agent.tools.base import BaseTool, register_tool
-from typing import Optional
+import asyncio
+from typing import Dict, List, Optional, Union
+import uuid
 import http.client
+import json
 
 import os
 
@@ -63,7 +68,7 @@ def contains_chinese_basic(text: str) -> bool:
             except Exception as e:
                 print(e)
                 if i == 4:
-                    return "Google search Timeout, return None, Please try again later."
+                    return f"Google search Timeout, return None, Please try again later."
                 continue
     
         data = res.read()
diff --git a/inference/tool_visit.py b/inference/tool_visit.py
index 97284ff..4981a2c 100644
--- a/inference/tool_visit.py
+++ b/inference/tool_visit.py
@@ -1,11 +1,17 @@
 import json
 import os
+import signal
+import threading
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import List, Union
 import requests
 from qwen_agent.tools.base import BaseTool, register_tool
 from prompt import EXTRACTOR_PROMPT 
 from openai import OpenAI
+import random
+from urllib.parse import urlparse, unquote
 import time 
+from transformers import AutoTokenizer
 import tiktoken
 
 VISIT_SERVER_TIMEOUT = int(os.getenv("VISIT_SERVER_TIMEOUT", 200))
@@ -116,7 +122,7 @@ def call_server(self, msgs, max_retries=2):
                         if left != -1 and right != -1 and left <= right: 
                             content = content[left:right+1]
                     return content
-            except Exception:
+            except Exception as e:
                 # print(e)
                 if attempt == (max_retries - 1):
                     return ""
@@ -153,7 +159,7 @@ def jina_readpage(self, url: str) -> str:
                 else:
                     print(response.text)
                     raise ValueError("jina readpage error")
-            except Exception:
+            except Exception as e:
                 time.sleep(0.5)
                 if attempt == max_retries - 1:
                     return "[visit] Failed to read page."

From fac9a0725a9855bc565de6f43e7fdd5167107133 Mon Sep 17 00:00:00 2001
From: Samad <Mirzasamadahmedbaig@gmail.com>
Date: Sun, 21 Sep 2025 12:08:11 +0500
Subject: [PATCH 10/13] Revert "Fix remaining E722 ruff errors"

This reverts commit ff94125461682fed4c23b3527559d4eb2466c54d.
---
 inference/tool_search.py | 4 ++--
 inference/tool_visit.py  | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/inference/tool_search.py b/inference/tool_search.py
index d2289df..1a3f7b5 100644
--- a/inference/tool_search.py
+++ b/inference/tool_search.py
@@ -101,7 +101,7 @@ def contains_chinese_basic(text: str) -> bool:
 
             content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets)
             return content
-        except Exception:
+        except:
             return f"No results found for '{query}'. Try with a more general query."
 
 
@@ -113,7 +113,7 @@ def search_with_serp(self, query: str):
     def call(self, params: Union[str, dict], **kwargs) -> str:
         try:
             query = params["query"]
-        except Exception:
+        except:
             return "[Search] Invalid request format: Input must be a JSON object containing 'query' field"
         
         if isinstance(query, str):
diff --git a/inference/tool_visit.py b/inference/tool_visit.py
index 4981a2c..92e4e3a 100644
--- a/inference/tool_visit.py
+++ b/inference/tool_visit.py
@@ -65,7 +65,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str:
         try:
             url = params["url"]
             goal = params["goal"]
-        except Exception:
+        except:
             return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields"
 
         start_time = time.time()
@@ -115,7 +115,7 @@ def call_server(self, msgs, max_retries=2):
                 if content:
                     try:
                         json.loads(content)
-                    except Exception:
+                    except:
                         # extract json from string 
                         left = content.find('{')
                         right = content.rfind('}') 
@@ -227,7 +227,7 @@ def readpage_jina(self, url: str, goal: str) -> str:
                 try:
                     raw = json.loads(raw)
                     break
-                except Exception:
+                except:
                     raw = summary_page_func(messages, max_retries=max_retries)
                     parse_retry_times += 1
             

From 33278d416578654d9b9660b27fe2483bd7369297 Mon Sep 17 00:00:00 2001
From: Samad <Mirzasamadahmedbaig@gmail.com>
Date: Sun, 21 Sep 2025 12:15:38 +0500
Subject: [PATCH 11/13] Revert "Fix F403 and F405 ruff errors"

This reverts commit 3f517c712e752ef444dd5127f7f82e98b30ba93e.
---
 WebAgent/WebSailor/src/run_multi_react.py  | 2 +-
 WebAgent/WebWalker/src/agent.py            | 6 +++---
 evaluation/evaluate_deepsearch_official.py | 6 +++---
 inference/react_agent.py                   | 7 ++++---
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/WebAgent/WebSailor/src/run_multi_react.py b/WebAgent/WebSailor/src/run_multi_react.py
index 7057a07..d86489a 100644
--- a/WebAgent/WebSailor/src/run_multi_react.py
+++ b/WebAgent/WebSailor/src/run_multi_react.py
@@ -8,7 +8,7 @@
 from react_agent import MultiTurnReactAgent
 from prompt import SYSTEM_PROMPT_MULTI, USER_PROMPT
 from tool_search import *
-from tool_visit import Visit 
+from tool_visit import * 
 
 
 if __name__ == "__main__":
diff --git a/WebAgent/WebWalker/src/agent.py b/WebAgent/WebWalker/src/agent.py
index 02ffeb2..fc035ee 100644
--- a/WebAgent/WebWalker/src/agent.py
+++ b/WebAgent/WebWalker/src/agent.py
@@ -9,7 +9,7 @@
 from qwen_agent.utils.utils import format_as_text_message, merge_generate_cfgs
 from openai import OpenAI
 import time
-from prompts import STSTEM_CRITIIC_INFORMATION, STSTEM_CRITIIC_ANSWER, SYSTEM_EXPLORER
+from prompts import *
 
 
 TOOL_DESC = (
@@ -140,9 +140,9 @@ def _run(self, messages: List[Message], lang: Literal['en', 'zh'] = 'en', **kwar
             if stage1:
                 self.momery.append(stage1+"\n")
                 if len(self.momery) > 1:
-                    yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"")}]
+                    yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"")]
                 else:
-                    yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"")}]
+                    yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"")]
                 stage2 = self.critic_information(query, self.momery)
                 if stage2:
                     response = f'Final Answer: {stage2}'
diff --git a/evaluation/evaluate_deepsearch_official.py b/evaluation/evaluate_deepsearch_official.py
index b348f66..d5aed58 100644
--- a/evaluation/evaluate_deepsearch_official.py
+++ b/evaluation/evaluate_deepsearch_official.py
@@ -457,10 +457,10 @@ def main():
     args = parser.parse_args()
     
     dataset = args.dataset  
-    if dataset in ["gaia", "webwalker"]: 
+    if dataset in ["gaia", "webwalker"]:
         judge_model = "openai/qwen2.5-72b-instruct"
         judge_prompt = JUDGE_PROMPT_GAIA 
-    elif dataset in ["xbench-deepsearch"]: 
+    elif dataset in ["xbench-deepsearch"]:
         judge_prompt = JUDGE_PROMPT_XBENCH
         judge_model = "google/gemini-2.0-flash-001"
     elif dataset.startswith("browsecomp_zh"):
@@ -579,4 +579,4 @@ def main():
     except Exception as e:
         error_str = traceback.format_exc()
         print(f"Evaluation Failed: {e}") 
-        print("Trace Back", error_str)
\ No newline at end of file
+        print("Trace Back", error_str)
diff --git a/inference/react_agent.py b/inference/react_agent.py
index 8c26a35..ec3aa26 100644
--- a/inference/react_agent.py
+++ b/inference/react_agent.py
@@ -22,10 +22,11 @@
 from tool_scholar import *
 from tool_python import *
 from tool_search import *
-from tool_visit import Visit
+from tool_visit import *
 
 OBS_START = '<tool_response>'
-OBS_END = '\n</tool_response>'
+OBS_END = '
+</tool_response>'
 
 MAX_LLM_CALL_PER_RUN = int(os.getenv('MAX_LLM_CALL_PER_RUN', 100))
 
@@ -249,4 +250,4 @@ def custom_call_tool(self, tool_name: str, tool_args: dict, **kwargs):
             return result
 
         else:
-            return f"Error: Tool {tool_name} not found"
+            return f"Error: Tool {tool_name} not found"}
\ No newline at end of file

From 3ff09e330bc22513031c111db7e10336d0eef557 Mon Sep 17 00:00:00 2001
From: MirzaSamadAhmedBaig
 <89132160+Mirza-Samad-Ahmed-Baig@users.noreply.github.com>
Date: Mon, 22 Sep 2025 14:45:19 +0500
Subject: [PATCH 12/13] Update app.py

---
 WebAgent/WebWalker/src/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/WebAgent/WebWalker/src/app.py b/WebAgent/WebWalker/src/app.py
index f7fba53..73bc557 100644
--- a/WebAgent/WebWalker/src/app.py
+++ b/WebAgent/WebWalker/src/app.py
@@ -268,4 +268,4 @@ def call(self, params: str, **kwargs) -> str:
             else:
                 return "The button can not be clicked, please retry a new botton!"
         else:
-            return "Your input is invalid, plase output the action input correctly!"}
\ No newline at end of file
+            return "Your input is invalid, plase output the action input correctly!"

From 68cb1dec596872cf98d20309b2543fd140b5aa49 Mon Sep 17 00:00:00 2001
From: MirzaSamadAhmedBaig
 <89132160+Mirza-Samad-Ahmed-Baig@users.noreply.github.com>
Date: Mon, 22 Sep 2025 15:31:21 +0500
Subject: [PATCH 13/13] Update react_agent.py

---
 inference/react_agent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inference/react_agent.py b/inference/react_agent.py
index ec3aa26..2e1dee8 100644
--- a/inference/react_agent.py
+++ b/inference/react_agent.py
@@ -250,4 +250,4 @@ def custom_call_tool(self, tool_name: str, tool_args: dict, **kwargs):
             return result
 
         else:
-            return f"Error: Tool {tool_name} not found"}
\ No newline at end of file
+            return f"Error: Tool {tool_name} not found"