From 48d5f8f93e3df2628eace789750f3b57ae8a7dca Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 01:59:04 +0500 Subject: [PATCH 01/13] Refactor react_agent.py to improve security and maintainability --- WebAgent/WebSailor/src/react_agent.py | 97 ++++++++++++++------------- 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py index 9e5e1db..2517a18 100644 --- a/WebAgent/WebSailor/src/react_agent.py +++ b/WebAgent/WebSailor/src/react_agent.py @@ -38,8 +38,8 @@ def __init__(self, def call_server(self, msgs, max_tries=10): # Set OpenAI API key and base URL using vLLM API server - openai_api_key = "EMPTY" - openai_api_base = "http://127.0.0.1:6001/v1" + openai_api_key = os.getenv("OPENAI_API_KEY", "EMPTY") + openai_api_base = os.getenv("OPENAI_API_BASE", "http://127.0.0.1:6001/v1") client = OpenAI( api_key=openai_api_key, @@ -77,15 +77,53 @@ def count_tokens(self, messages, model="gpt-4o"): return len(tokenizer.encode(full_prompt)) + def _process_tool_call(self, content, messages): + if '' in content and '' in content: + tool_call = content.split('')[1].split('')[0] + try: + tool_call = json.loads(tool_call) + tool_name = tool_call.get('name', '') + tool_args = tool_call.get('arguments', {}) + result = self._call_tool(tool_name, tool_args) + except: + result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' + result = "\n" + result + "\n" + messages.append({"role": "user", "content": result}) + return messages + + def _handle_token_limit(self, messages, question, answer, rollout_id): + print(f"Token count exceeds limit") + + messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:your final thinking\nyour answer" + content = self.call_server(messages) + messages.append({"role": "assistant", "content": content.strip()}) + if '' in content and '' in content: + prediction = messages[-1]['content'].split('')[1].split('')[0] + termination = 'generate an answer as token limit reached' + else: + prediction = messages[-1]['content'] + termination = 'format error: generate an answer as token limit reached' + return self._generate_result(question, answer, rollout_id, messages, prediction, termination) + + def _generate_result(self, question, answer, rollout_id, messages, prediction, termination): + return { + "question": question, + "answer": answer, + "rollout_id": rollout_id, + "messages": messages, + "prediction": prediction, + "termination": termination + } + def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[Message]]: self.model=model - try: - question = data['item']['question'] - except: - raw_msg = data['item']['messages'][1]["content"] - question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg + question = data.get('item', {}).get('question', '') + if not question: + raw_msg = data.get('item', {}).get('messages', [{}, {}])[1].get("content", "") + question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg - answer = data['item']['answer'] + answer = data.get('item', {}).get('answer', '') + rollout_id = data.get('rollout_id', '') self.user_prompt = user_prompt self.user_prompt = self.user_prompt + question messages = [{"role": "system", "content": self.system_message}, {"role": "user", "content": self.user_prompt}] @@ -100,17 +138,7 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M pos = content.find('') content = content[:pos] messages.append({"role": "assistant", "content": content.strip()}) - if '' in content and '' in content: - tool_call = content.split('')[1].split('')[0] - try: - tool_call = json.loads(tool_call) - tool_name = tool_call.get('name', '') - tool_args = tool_call.get('arguments', {}) - result = self._call_tool(tool_name, tool_args) - except: - result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' - result = "\n" + result + "\n" - messages.append({"role": "user", "content": result}) + messages = self._process_tool_call(content, messages) if '' in content and '' in content: termination = 'answer' break @@ -122,26 +150,7 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M print(f"round: {round}, token count: {token_count}") if token_count > max_tokens: - print(f"Token count exceeds limit: {token_count} > {max_tokens}") - - messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:your final thinking\nyour answer" - content = self.call_server(messages) - messages.append({"role": "assistant", "content": content.strip()}) - if '' in content and '' in content: - prediction = messages[-1]['content'].split('')[1].split('')[0] - termination = 'generate an answer as token limit reached' - else: - prediction = messages[-1]['content'] - termination = 'format error: generate an answer as token limit reached' - result = { - "question": question, - "answer": answer, - "rollout_id": data['rollout_id'], - "messages": messages, - "prediction": prediction, - "termination": termination - } - return result + return self._handle_token_limit(messages, question, answer, rollout_id) if '' in messages[-1]['content']: prediction = messages[-1]['content'].split('')[1].split('')[0] @@ -151,12 +160,4 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M termination = 'answer not found' if num_llm_calls_available == 0: termination = 'exceed available llm calls' - result = { - "question": question, - "answer": answer, - "rollout_id": data['rollout_id'], - "messages": messages, - "prediction": prediction, - "termination": termination - } - return result + return self._generate_result(question, answer, rollout_id, messages, prediction, termination) \ No newline at end of file From a9256b7dfa1355f42d04cfef79f08c5c592758ad Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 11:34:35 +0500 Subject: [PATCH 02/13] chore(ci): add basic Ruff lint workflow --- .github/workflows/ci-lint.yml | 20 ++++++++++++++++++++ commit_message.txt | 1 + 2 files changed, 21 insertions(+) create mode 100644 .github/workflows/ci-lint.yml create mode 100644 commit_message.txt diff --git a/.github/workflows/ci-lint.yml b/.github/workflows/ci-lint.yml new file mode 100644 index 0000000..864e85b --- /dev/null +++ b/.github/workflows/ci-lint.yml @@ -0,0 +1,20 @@ +name: CI - Lint + +on: + pull_request: + branches: ["main"] + push: + branches: ["chore/add-ci-lint"] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install ruff + run: pip install ruff==0.5.6 + - name: Lint + run: ruff check --output-format=github . \ No newline at end of file diff --git a/commit_message.txt b/commit_message.txt new file mode 100644 index 0000000..0e40afe --- /dev/null +++ b/commit_message.txt @@ -0,0 +1 @@ +Refactor react_agent.py to improve security and maintainability \ No newline at end of file From ee250218b8b6acd6e596bae47ba0f23bf24dfb84 Mon Sep 17 00:00:00 2001 From: MirzaSamadAhmedBaig <89132160+Mirza-Samad-Ahmed-Baig@users.noreply.github.com> Date: Sun, 21 Sep 2025 11:37:06 +0500 Subject: [PATCH 03/13] deleting cm --- commit_message.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 commit_message.txt diff --git a/commit_message.txt b/commit_message.txt deleted file mode 100644 index 0e40afe..0000000 --- a/commit_message.txt +++ /dev/null @@ -1 +0,0 @@ -Refactor react_agent.py to improve security and maintainability \ No newline at end of file From d60f2545369347a48f8d4aa1253f6abb9c021f6c Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 11:48:06 +0500 Subject: [PATCH 04/13] Fix E722 ruff errors --- .../WebDancer/demos/assistant_qwq_chat.py | 8 ++----- WebAgent/WebDancer/demos/llm/oai.py | 3 +-- .../WebDancer/demos/tools/private/search.py | 6 ++--- .../WebDancer/demos/tools/private/visit.py | 4 ++-- WebAgent/WebDancer/demos/utils/logs.py | 1 - WebAgent/WebSailor/src/evaluate.py | 4 ++-- WebAgent/WebSailor/src/react_agent.py | 8 +++---- WebAgent/WebSailor/src/tool_search.py | 7 +++--- WebAgent/WebSailor/src/tool_visit.py | 11 +++++----- WebAgent/WebWalker/src/agent.py | 4 ++-- WebAgent/WebWalker/src/app.py | 1 - evaluation/evaluate_deepsearch_official.py | 21 ++++++++---------- inference/file_tools/file_parser.py | 7 +++--- inference/file_tools/idp.py | 6 ++--- inference/file_tools/video_agent.py | 11 ---------- inference/react_agent.py | 15 +++++-------- inference/run_multi_react.py | 2 -- inference/tool_file.py | 22 ++++--------------- inference/tool_python.py | 15 ++++++------- inference/tool_scholar.py | 7 +++--- inference/tool_search.py | 9 ++------ inference/tool_visit.py | 10 ++------- 22 files changed, 62 insertions(+), 120 deletions(-) diff --git a/WebAgent/WebDancer/demos/assistant_qwq_chat.py b/WebAgent/WebDancer/demos/assistant_qwq_chat.py index 3fa38ff..b0a48a6 100644 --- a/WebAgent/WebDancer/demos/assistant_qwq_chat.py +++ b/WebAgent/WebDancer/demos/assistant_qwq_chat.py @@ -2,15 +2,11 @@ import os -from qwen_agent.agents import Assistant -from qwen_agent.utils.output_beautify import typewriter_print from demos.agents.search_agent import SearchAgent from demos.llm.oai import TextChatAtOAI -from demos.llm.qwen_dashscope import QwenChatAtDS from demos.gui.web_ui import WebUI from demos.utils.date import date2str, get_date_now -from demos.tools import Visit, Search ROOT_RESOURCE = os.path.join(os.path.dirname(__file__), 'resource') @@ -50,8 +46,8 @@ def make_system_prompt(): llm=llm_cfg, function_list=tools, system_message="", - name=f'WebDancer', - description=f"I am WebDancer, a web information seeking agent, welcome to try!", + name='WebDancer', + description="I am WebDancer, a web information seeking agent, welcome to try!", extra={ 'reasoning': reasoning, 'max_llm_calls': max_llm_calls, diff --git a/WebAgent/WebDancer/demos/llm/oai.py b/WebAgent/WebDancer/demos/llm/oai.py index 0df27fe..53a261d 100644 --- a/WebAgent/WebDancer/demos/llm/oai.py +++ b/WebAgent/WebDancer/demos/llm/oai.py @@ -2,7 +2,6 @@ import json import logging import os -from http import HTTPStatus from pprint import pformat from typing import Dict, Iterator, List, Optional, Literal, Union @@ -15,7 +14,7 @@ from qwen_agent.llm.base import ModelServiceError, register_llm from qwen_agent.llm.function_calling import BaseFnCallModel, simulate_response_completion_with_chat -from qwen_agent.llm.schema import ASSISTANT, Message, FunctionCall +from qwen_agent.llm.schema import ASSISTANT, Message from qwen_agent.log import logger diff --git a/WebAgent/WebDancer/demos/tools/private/search.py b/WebAgent/WebDancer/demos/tools/private/search.py index 29278ec..7f994ee 100644 --- a/WebAgent/WebDancer/demos/tools/private/search.py +++ b/WebAgent/WebDancer/demos/tools/private/search.py @@ -30,7 +30,7 @@ def call(self, params: str, **kwargs) -> str: try: params = self._verify_json_format_args(params) query = params["query"][:MAX_MULTIQUERY_NUM] - except: + except Exception: return "[Search] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): @@ -57,9 +57,9 @@ def google_search(self, query: str) -> str: response = requests.post(url, headers=headers, data=json.dumps(data)) results = response.json() break - except Exception as e: + except Exception: if i == 4: - return f"Google search Timeout, return None, Please try again later." + return "Google search Timeout, return None, Please try again later." continue if response.status_code != 200: diff --git a/WebAgent/WebDancer/demos/tools/private/visit.py b/WebAgent/WebDancer/demos/tools/private/visit.py index b911cc6..fef4abe 100644 --- a/WebAgent/WebDancer/demos/tools/private/visit.py +++ b/WebAgent/WebDancer/demos/tools/private/visit.py @@ -62,7 +62,7 @@ def jina_readpage(url: str) -> str: else: print(response.text) raise ValueError("jina readpage error") - except Exception as e: + except Exception: if attempt == max_retries - 1: return "[visit] Failed to read page." @@ -97,7 +97,7 @@ def call(self, params: str, **kwargs) -> str: params = self._verify_json_format_args(params) url = params["url"] goal = params["goal"] - except: + except Exception: return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields" if isinstance(url, str): response = self.readpage(url, goal) diff --git a/WebAgent/WebDancer/demos/utils/logs.py b/WebAgent/WebDancer/demos/utils/logs.py index c8cd432..629814d 100644 --- a/WebAgent/WebDancer/demos/utils/logs.py +++ b/WebAgent/WebDancer/demos/utils/logs.py @@ -1,6 +1,5 @@ # coding=utf-8 import os -import sys import logging diff --git a/WebAgent/WebSailor/src/evaluate.py b/WebAgent/WebSailor/src/evaluate.py index 6b3a22c..0386a54 100644 --- a/WebAgent/WebSailor/src/evaluate.py +++ b/WebAgent/WebSailor/src/evaluate.py @@ -289,7 +289,7 @@ def main(): for i in [1, 2, 3] } - print(f"===========") + print("===========") print(f"Avg. Pass@3 {avg_pass_at_3}%") print(f"Best Pass@1 {best_pass_at_1}%") print(f"Pass@3 {pass_at_3}%") @@ -297,7 +297,7 @@ def main(): print(f"# Invalid {aggr_statistics['num_invalid']} # Extra Length {aggr_statistics['extra_length']}") print(f"Avg. Action {aggr_statistics['avg_action']:.2f} Avg. Visit Action {aggr_statistics['avg_visit_action']:.2f} Avg. Search Action {aggr_statistics['avg_search_action']:.2f} Avg. Other Action {aggr_statistics['avg_other_action']:.2f}") print(f"Avg. Answer Length {aggr_statistics['avg_ans_length']:.2f} Avg. Thinking Length {aggr_statistics['avg_think_length']:.2f}") - print(f"===========" ) + print("===========" ) overall_eval_dict = { "dataset": dataset, diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py index 2517a18..79df9ac 100644 --- a/WebAgent/WebSailor/src/react_agent.py +++ b/WebAgent/WebSailor/src/react_agent.py @@ -61,7 +61,7 @@ def call_server(self, msgs, max_tries=10): except Exception as e: if attempt == (max_tries - 1): print(f"SGLang server error {e}") - return f"SGLang server error" + return "SGLang server error" continue return "SGLang server empty response" @@ -69,7 +69,7 @@ def call_server(self, msgs, max_tries=10): def count_tokens(self, messages, model="gpt-4o"): try: tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path) - except Exception as e: + except Exception: tokenizer = tiktoken.encoding_for_model(model) full_message = [Message(**x) for x in messages] @@ -85,14 +85,14 @@ def _process_tool_call(self, content, messages): tool_name = tool_call.get('name', '') tool_args = tool_call.get('arguments', {}) result = self._call_tool(tool_name, tool_args) - except: + except Exception: result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' result = "\n" + result + "\n" messages.append({"role": "user", "content": result}) return messages def _handle_token_limit(self, messages, question, answer, rollout_id): - print(f"Token count exceeds limit") + print("Token count exceeds limit") messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:your final thinking\nyour answer" content = self.call_server(messages) diff --git a/WebAgent/WebSailor/src/tool_search.py b/WebAgent/WebSailor/src/tool_search.py index 3643c53..ba0da49 100644 --- a/WebAgent/WebSailor/src/tool_search.py +++ b/WebAgent/WebSailor/src/tool_search.py @@ -3,7 +3,6 @@ from concurrent.futures import ThreadPoolExecutor from typing import List, Union import requests -from qwen_agent.tools.base import BaseTool, register_tool import os SEARCH_API_URL = os.getenv("SEARCH_API_URL") @@ -50,7 +49,7 @@ def google_search(self, query: str): except Exception as e: print(e) if i == 4: - return f"Google search Timeout, return None, Please try again later." + return "Google search Timeout, return None, Please try again later." if response.status_code != 200: raise Exception(f"Error: {response.status_code} - {response.text}") @@ -82,7 +81,7 @@ def google_search(self, query: str): content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets) return content - except: + except Exception: return f"No results found for '{query}'. Try with a more general query, or remove the year filter." @@ -90,7 +89,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: assert GOOGLE_SEARCH_KEY is not None, "Please set the GOOGLE_SEARCH_KEY environment variable." try: query = params["query"] - except: + except Exception: return "[Search] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): diff --git a/WebAgent/WebSailor/src/tool_visit.py b/WebAgent/WebSailor/src/tool_visit.py index ac8e5e6..fbae5e9 100644 --- a/WebAgent/WebSailor/src/tool_visit.py +++ b/WebAgent/WebSailor/src/tool_visit.py @@ -6,7 +6,6 @@ from prompt import EXTRACTOR_PROMPT import os from openai import OpenAI -import random WEBCONTENT_MAXLENGTH = int(os.getenv("WEBCONTENT_MAXLENGTH", 150000)) @@ -46,7 +45,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: try: url = params["url"] goal = params["goal"] - except: + except Exception: return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields" if isinstance(url, str): @@ -87,14 +86,14 @@ def call_server(self, msgs, max_tries=10): if content: try: json.loads(content) - except: + except Exception: # extract json from string left = content.find('{') right = content.rfind('}') if left != -1 and right != -1 and left <= right: content = content[left:right+1] return content - except: + except Exception: if attempt == (max_tries - 1): return "" continue @@ -129,7 +128,7 @@ def jina_readpage(self, url: str) -> str: else: print(response.text) raise ValueError("jina readpage error") - except Exception as e: + except Exception: if attempt == max_retries - 1: return "[visit] Failed to read page." @@ -191,7 +190,7 @@ def readpage(self, url: str, goal: str) -> str: # 尝试 parse json raw = json.loads(raw) break - except: + except Exception: raw = self.call_server(messages) parse_retry_times += 1 # parse 失败 diff --git a/WebAgent/WebWalker/src/agent.py b/WebAgent/WebWalker/src/agent.py index c8e0421..c5274e7 100644 --- a/WebAgent/WebWalker/src/agent.py +++ b/WebAgent/WebWalker/src/agent.py @@ -63,7 +63,7 @@ def observation_information_extraction(self, query, observation): if "true" in response.choices[0].message.content: try: return json.loads(response.choices[0].message.content)["information"] - except: + except Exception: return response.choices[0].message.content else: return None @@ -97,7 +97,7 @@ def critic_information(self, query, memory): if "true" in response.choices[0].message.content: try: return json.loads(response.choices[0].message.content)["answer"] - except: + except Exception: return response.choices[0].message.content else: return None diff --git a/WebAgent/WebWalker/src/app.py b/WebAgent/WebWalker/src/app.py index 73bc557..7351368 100644 --- a/WebAgent/WebWalker/src/app.py +++ b/WebAgent/WebWalker/src/app.py @@ -3,7 +3,6 @@ import json5 from agent import WebWalker from qwen_agent.tools.base import BaseTool, register_tool -import os import re import json import asyncio diff --git a/evaluation/evaluate_deepsearch_official.py b/evaluation/evaluate_deepsearch_official.py index 036973b..aaf384c 100644 --- a/evaluation/evaluate_deepsearch_official.py +++ b/evaluation/evaluate_deepsearch_official.py @@ -1,7 +1,5 @@ -from pydantic import BaseModel from openai import OpenAI import concurrent.futures -from typing import Literal import litellm import os import argparse @@ -9,7 +7,6 @@ import concurrent from tqdm import tqdm from transformers import AutoTokenizer -import re from prompt import * import traceback import tiktoken @@ -178,7 +175,7 @@ def count_tokens_with_tokenizer(text, tokenizer): return len(tokenizer.encode(text)) else: return len(tokenizer.encode(text)) - except: + except Exception: return len(text) // 4 @@ -224,7 +221,7 @@ def single_round_statistics(input_file): try: tokenizer = AutoTokenizer.from_pretrained(os.getenv("Qwen2_5_7B_PATH", "")) - except Exception as e: + except Exception: tokenizer = tiktoken.encoding_for_model("gpt-4o") for item in contents: @@ -303,7 +300,7 @@ def single_round_statistics(input_file): try: if len(tokenizer.encode("".join([msg["content"] for msg in messages]))) > 30000: num_extra += 1 - except: + except Exception: pass total_questions = len(contents) @@ -329,7 +326,7 @@ def calculate_enhanced_statistics(round_results, round_items): try: tokenizer = AutoTokenizer.from_pretrained(os.getenv("Qwen2_5_7B_PATH", "")) - except Exception as e: + except Exception: tokenizer = tiktoken.encoding_for_model("gpt-4o") enhanced_stats = {} @@ -345,7 +342,7 @@ def calculate_enhanced_statistics(round_results, round_items): continue try: matching_item = [item for item in items if item['messages'][1]['content'] == result['question']] - except: + except Exception: items = [item for item in items if len(item['messages'])>0] matching_item = [item for item in items if item['messages'][1]['content'] == result['question']] if not matching_item: @@ -530,7 +527,7 @@ def main(): for i in [1, 2, 3] } - print(f"===========") + print("===========") print(f"Avg. Pass@3 {avg_pass_at_3}%") print(f"Best Pass@1 {best_pass_at_1}%") print(f"Pass@3 {pass_at_3}%") @@ -541,18 +538,18 @@ def main(): print(f"Avg. Action {aggr_statistics['avg_action']:.2f} Avg. Visit Action {aggr_statistics['avg_visit_action']:.2f} Avg. Search Action {aggr_statistics['avg_search_action']:.2f} Avg. Other Action {aggr_statistics['avg_other_action']:.2f}") print(f"Avg. Answer Length {aggr_statistics['avg_ans_length']:.2f} Avg. Thinking Length {aggr_statistics['avg_think_length']:.2f}") enhanced_statistics = calculate_enhanced_statistics(round_results, round_items) - print(f"\n=== ADDITIONAL STATISTICS ===") + print("\n=== ADDITIONAL STATISTICS ===") print(f"Avg. Tool Calls per Question: {aggr_statistics['avg_tool_calls_per_question']:.2f}") print(f"Avg. Tool Calls per Question (Correctly Solved): {enhanced_statistics['avg_tool_calls_per_question_correctly_solved']:.2f}") print(f"Avg. Assistant Tokens per Question: {aggr_statistics['avg_assistant_tokens_per_question']:.2f}") print(f"Avg. Assistant Tokens per Question (Correctly Solved): {enhanced_statistics['avg_assistant_tokens_per_question_correctly_solved']:.2f}") print(f"Avg. Assistant Tokens per Message: {aggr_statistics['avg_assistant_tokens_per_message']:.2f}") - print(f"\n=== TERMINATION FREQUENCIES ===") + print("\n=== TERMINATION FREQUENCIES ===") for termination_type, frequency in aggr_statistics['termination_freq'].items(): print(f"{termination_type}: {frequency:.3f}") - print(f"===========" ) + print("===========" ) overall_eval_dict = { "dataset": dataset, diff --git a/inference/file_tools/file_parser.py b/inference/file_tools/file_parser.py index ecbace9..3410da7 100644 --- a/inference/file_tools/file_parser.py +++ b/inference/file_tools/file_parser.py @@ -4,7 +4,6 @@ import time import zipfile import math -from pathlib import Path from typing import Any, Dict, List, Optional, Union from collections import Counter @@ -17,7 +16,7 @@ from tabulate import tabulate from qwen_agent.log import logger from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS -from qwen_agent.tools.base import BaseTool, register_tool +from qwen_agent.tools.base import BaseTool from qwen_agent.tools.storage import KeyNotExistsError, Storage from file_tools.utils import (get_file_type, hash_sha256, is_http_url, get_basename_from_url, sanitize_chrome_file_path, save_url_to_local_work_dir) @@ -522,7 +521,7 @@ def _process_new_file(self, file_path: str) -> Union[str, list]: if USE_IDP and file_type in idp_types: try: results = parse_file_by_idp(file_path=file_path) - except Exception as e: + except Exception: results = self.parsers[file_type](file_path) else: results = self.parsers[file_type](file_path) @@ -536,7 +535,7 @@ def _process_new_file(self, file_path: str) -> Union[str, list]: tokens += para['token'] if not results or not tokens: - logger.error(f"Parsing failed: No information was parsed") + logger.error("Parsing failed: No information was parsed") raise FileParserError("Document parsing failed") else: self._cache_result(file_path, results) diff --git a/inference/file_tools/idp.py b/inference/file_tools/idp.py index 71199cb..b77872b 100644 --- a/inference/file_tools/idp.py +++ b/inference/file_tools/idp.py @@ -1,12 +1,10 @@ import os -import json from alibabacloud_docmind_api20220711.client import Client as docmind_api20220711Client from alibabacloud_tea_openapi import models as open_api_models from alibabacloud_docmind_api20220711 import models as docmind_api20220711_models from alibabacloud_tea_util.client import Client as UtilClient from alibabacloud_tea_util import models as util_models -from alibabacloud_credentials.client import Client as CredClient key = os.environ.get('IDP_KEY_ID') secret = os.environ.get('IDP_KEY_SECRET') @@ -18,7 +16,7 @@ def __init__(self): access_key_id=key, access_key_secret=secret ) - config.endpoint = f'docmind-api.cn-hangzhou.aliyuncs.com' + config.endpoint = 'docmind-api.cn-hangzhou.aliyuncs.com' self.client = docmind_api20220711Client(config) def file_submit_with_url(self, file_url): @@ -84,7 +82,7 @@ def file_parser_query(self,fid): responses = result else: responses['layouts'].extend(result['layouts']) - except Exception as error: + except Exception: return None,status_parse return responses,status_parse \ No newline at end of file diff --git a/inference/file_tools/video_agent.py b/inference/file_tools/video_agent.py index 7d9b709..37857d9 100644 --- a/inference/file_tools/video_agent.py +++ b/inference/file_tools/video_agent.py @@ -9,22 +9,11 @@ """ import sys import os -import re -import copy import json -from typing import Dict, Iterator, List, Literal, Tuple, Union, Any, Optional -import json5 import asyncio -from openai import OpenAI from qwen_agent.tools.base import BaseTool, register_tool -from qwen_agent.agents import Assistant -from qwen_agent.llm import BaseChatModel -from qwen_agent.llm.schema import ASSISTANT, USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE, SYSTEM, ROLE from qwen_agent.tools import BaseTool -from qwen_agent.log import logger -from qwen_agent.utils.tokenization_qwen import count_tokens, tokenizer -from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS current_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.dirname(current_dir)) diff --git a/inference/react_agent.py b/inference/react_agent.py index 1824666..6c6e57b 100644 --- a/inference/react_agent.py +++ b/inference/react_agent.py @@ -1,7 +1,6 @@ -import json import json5 import os -from typing import Dict, Iterator, List, Literal, Optional, Tuple, Union +from typing import Dict, List, Optional, Union from qwen_agent.llm.schema import Message from qwen_agent.utils.utils import build_text_completion_prompt from openai import OpenAI, APIError, APIConnectionError, APITimeoutError @@ -10,10 +9,8 @@ from datetime import datetime from qwen_agent.agents.fncall_agent import FnCallAgent from qwen_agent.llm import BaseChatModel -from qwen_agent.llm.schema import ASSISTANT, DEFAULT_SYSTEM_MESSAGE, Message from qwen_agent.settings import MAX_LLM_CALL_PER_RUN from qwen_agent.tools import BaseTool -from qwen_agent.utils.utils import format_as_text_message, merge_generate_cfgs from prompt import * import time import asyncio @@ -109,12 +106,12 @@ def call_server(self, msgs, planning_port, max_tries=10): else: print("Error: All retry attempts have been exhausted. The call has failed.") - return f"vllm server error!!!" + return "vllm server error!!!" def count_tokens(self, messages, model="gpt-4o"): try: tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path) - except Exception as e: + except Exception: tokenizer = tiktoken.encoding_for_model(model) full_message = [Message(**x) for x in messages] @@ -126,7 +123,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]: self.model=model try: question = data['item']['question'] - except: + except Exception: raw_msg = data['item']['messages'][1]["content"] question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg @@ -168,7 +165,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]: try: code_raw=content.split('')[1].split('')[0].split('')[1].split('')[0].strip() result = TOOL_MAP['PythonInterpreter'].call(code_raw) - except: + except Exception: result = "[Python Interpreter Error]: Formatting error." else: @@ -177,7 +174,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]: tool_args = tool_call.get('arguments', {}) result = self.custom_call_tool(tool_name, tool_args) - except: + except Exception: result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' result = "\n" + result + "\n" # print(result) diff --git a/inference/run_multi_react.py b/inference/run_multi_react.py index 1056a0a..4a517ea 100644 --- a/inference/run_multi_react.py +++ b/inference/run_multi_react.py @@ -5,9 +5,7 @@ import concurrent.futures from tqdm import tqdm import threading -from datetime import datetime from react_agent import MultiTurnReactAgent -import time import math if __name__ == "__main__": diff --git a/inference/tool_file.py b/inference/tool_file.py index 77c4960..7fb1ce9 100644 --- a/inference/tool_file.py +++ b/inference/tool_file.py @@ -9,26 +9,12 @@ """ import sys import os -import re -import time -import copy import json -from typing import Dict, Iterator, List, Literal, Tuple, Union, Any, Optional -import json5 -import asyncio -from openai import OpenAI, AsyncOpenAI -import pdb -import bdb - -from qwen_agent.tools.base import BaseTool, register_tool -from qwen_agent.agents import Assistant -from qwen_agent.llm import BaseChatModel -from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS -from qwen_agent.llm.schema import ASSISTANT, USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE, SYSTEM, ROLE + +from qwen_agent.tools.base import BaseTool +from qwen_agent.settings import DEFAULT_MAX_INPUT_TOKENS from qwen_agent.tools import BaseTool -from qwen_agent.log import logger -from qwen_agent.utils.tokenization_qwen import count_tokens, tokenizer -from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS +from qwen_agent.utils.tokenization_qwen import count_tokens current_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.dirname(current_dir)) diff --git a/inference/tool_python.py b/inference/tool_python.py index e8e5522..d851ee8 100644 --- a/inference/tool_python.py +++ b/inference/tool_python.py @@ -1,14 +1,13 @@ import re -from typing import Dict, List, Optional, Union +from typing import Dict, Optional, Union import json5 from qwen_agent.tools.base import BaseToolWithFileAccess, register_tool from qwen_agent.utils.utils import extract_code -from sandbox_fusion import run_code, RunCodeRequest, RunStatus +from sandbox_fusion import run_code, RunCodeRequest from requests.exceptions import Timeout import os import random import time -from concurrent.futures import ThreadPoolExecutor, as_completed # Array of sandbox fusion endpoints SANDBOX_FUSION_ENDPOINTS = [] @@ -80,12 +79,12 @@ def call(self, params, files= None, timeout = 50, **kwargs) -> str: if code_result.run_result.stderr: result.append(f"stderr:\n{code_result.run_result.stderr}") if code_result.run_result.execution_time >= timeout-1: - result.append(f"[PythonInterpreter Error] TimeoutError: Execution timed out.") + result.append("[PythonInterpreter Error] TimeoutError: Execution timed out.") result = '\n'.join(result) print('SUCCESS RUNNING TOOL') return result if result.strip() else 'Finished execution.' - except Timeout as e: + except Timeout: last_error = f'[Python Interpreter Error] TimeoutError: Execution timed out on endpoint {endpoint}.' print(f"Timeout on attempt {attempt + 1}: {last_error}") if attempt == 4: # Last attempt @@ -137,7 +136,7 @@ def call_specific_endpoint(self, params: Union[str, dict], endpoint: str, timeou execution_time = end_time - start_time return True, result if result.strip() else 'Finished execution.', execution_time - except Timeout as e: - return False, f'[Python Interpreter Error] TimeoutError: Execution timed out.', None + except Timeout: + return False, '[Python Interpreter Error] TimeoutError: Execution timed out.', None except Exception as e: - return False, f'[Python Interpreter Error]: {str(e)}', None + return False, f'[Python Interpreter Error]: {str(e)}', None \ No newline at end of file diff --git a/inference/tool_scholar.py b/inference/tool_scholar.py index ae021b3..90e97c5 100644 --- a/inference/tool_scholar.py +++ b/inference/tool_scholar.py @@ -1,6 +1,5 @@ import os import json -import requests from typing import Union, List from qwen_agent.tools.base import BaseTool, register_tool from concurrent.futures import ThreadPoolExecutor @@ -44,7 +43,7 @@ def google_scholar_with_serp(self, query: str): except Exception as e: print(e) if i == 4: - return f"Google Scholar Timeout, return None, Please try again later." + return "Google Scholar Timeout, return None, Please try again later." continue @@ -87,7 +86,7 @@ def google_scholar_with_serp(self, query: str): content = f"A Google scholar for '{query}' found {len(web_snippets)} results:\n\n## Scholar Results\n" + "\n\n".join(web_snippets) return content - except: + except Exception: return f"No results found for '{query}'. Try with a more general query." @@ -96,7 +95,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: try: params = self._verify_json_format_args(params) query = params["query"] - except: + except Exception: return "[google_scholar] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): diff --git a/inference/tool_search.py b/inference/tool_search.py index 1a3f7b5..ea69340 100644 --- a/inference/tool_search.py +++ b/inference/tool_search.py @@ -1,13 +1,8 @@ import json -from concurrent.futures import ThreadPoolExecutor from typing import List, Union -import requests from qwen_agent.tools.base import BaseTool, register_tool -import asyncio -from typing import Dict, List, Optional, Union -import uuid +from typing import Optional import http.client -import json import os @@ -68,7 +63,7 @@ def contains_chinese_basic(text: str) -> bool: except Exception as e: print(e) if i == 4: - return f"Google search Timeout, return None, Please try again later." + return "Google search Timeout, return None, Please try again later." continue data = res.read() diff --git a/inference/tool_visit.py b/inference/tool_visit.py index 92e4e3a..cdee8bf 100644 --- a/inference/tool_visit.py +++ b/inference/tool_visit.py @@ -1,17 +1,11 @@ import json import os -import signal -import threading -from concurrent.futures import ThreadPoolExecutor, as_completed from typing import List, Union import requests from qwen_agent.tools.base import BaseTool, register_tool from prompt import EXTRACTOR_PROMPT from openai import OpenAI -import random -from urllib.parse import urlparse, unquote import time -from transformers import AutoTokenizer import tiktoken VISIT_SERVER_TIMEOUT = int(os.getenv("VISIT_SERVER_TIMEOUT", 200)) @@ -122,7 +116,7 @@ def call_server(self, msgs, max_retries=2): if left != -1 and right != -1 and left <= right: content = content[left:right+1] return content - except Exception as e: + except Exception: # print(e) if attempt == (max_retries - 1): return "" @@ -159,7 +153,7 @@ def jina_readpage(self, url: str) -> str: else: print(response.text) raise ValueError("jina readpage error") - except Exception as e: + except Exception: time.sleep(0.5) if attempt == max_retries - 1: return "[visit] Failed to read page." From ff94125461682fed4c23b3527559d4eb2466c54d Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 11:49:20 +0500 Subject: [PATCH 05/13] Fix remaining E722 ruff errors --- commit_message.txt | 1 + inference/tool_search.py | 4 ++-- inference/tool_visit.py | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 commit_message.txt diff --git a/commit_message.txt b/commit_message.txt new file mode 100644 index 0000000..8f1b83e --- /dev/null +++ b/commit_message.txt @@ -0,0 +1 @@ +Fix remaining E722 ruff errors \ No newline at end of file diff --git a/inference/tool_search.py b/inference/tool_search.py index ea69340..499ff0c 100644 --- a/inference/tool_search.py +++ b/inference/tool_search.py @@ -96,7 +96,7 @@ def contains_chinese_basic(text: str) -> bool: content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets) return content - except: + except Exception: return f"No results found for '{query}'. Try with a more general query." @@ -108,7 +108,7 @@ def search_with_serp(self, query: str): def call(self, params: Union[str, dict], **kwargs) -> str: try: query = params["query"] - except: + except Exception: return "[Search] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): diff --git a/inference/tool_visit.py b/inference/tool_visit.py index cdee8bf..97284ff 100644 --- a/inference/tool_visit.py +++ b/inference/tool_visit.py @@ -59,7 +59,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: try: url = params["url"] goal = params["goal"] - except: + except Exception: return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields" start_time = time.time() @@ -109,7 +109,7 @@ def call_server(self, msgs, max_retries=2): if content: try: json.loads(content) - except: + except Exception: # extract json from string left = content.find('{') right = content.rfind('}') @@ -221,7 +221,7 @@ def readpage_jina(self, url: str, goal: str) -> str: try: raw = json.loads(raw) break - except: + except Exception: raw = summary_page_func(messages, max_retries=max_retries) parse_retry_times += 1 From 3f517c712e752ef444dd5127f7f82e98b30ba93e Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 11:51:19 +0500 Subject: [PATCH 06/13] Fix F403 and F405 ruff errors --- WebAgent/WebSailor/src/react_agent.py | 2 +- WebAgent/WebSailor/src/run_multi_react.py | 2 +- WebAgent/WebWalker/src/agent.py | 2 +- WebAgent/WebWalker/src/app.py | 2 +- commit_message.txt | 2 +- evaluation/evaluate_deepsearch_official.py | 2 +- inference/react_agent.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py index 79df9ac..f6f1fc6 100644 --- a/WebAgent/WebSailor/src/react_agent.py +++ b/WebAgent/WebSailor/src/react_agent.py @@ -12,7 +12,7 @@ from qwen_agent.tools import BaseTool -MAX_LLM_CALL_PER_RUN = int(os.getenv('MAX_LLM_CALL_PER_RUN', 40)) + MAX_TOKEN_LENGTH = int(os.getenv('MAX_LENGTH', 31 * 1024 - 500)) print(f'Running with MAX_LLM_CALL_PER_RUN = {MAX_LLM_CALL_PER_RUN}') diff --git a/WebAgent/WebSailor/src/run_multi_react.py b/WebAgent/WebSailor/src/run_multi_react.py index d86489a..7057a07 100644 --- a/WebAgent/WebSailor/src/run_multi_react.py +++ b/WebAgent/WebSailor/src/run_multi_react.py @@ -8,7 +8,7 @@ from react_agent import MultiTurnReactAgent from prompt import SYSTEM_PROMPT_MULTI, USER_PROMPT from tool_search import * -from tool_visit import * +from tool_visit import Visit if __name__ == "__main__": diff --git a/WebAgent/WebWalker/src/agent.py b/WebAgent/WebWalker/src/agent.py index c5274e7..5ec7a82 100644 --- a/WebAgent/WebWalker/src/agent.py +++ b/WebAgent/WebWalker/src/agent.py @@ -9,7 +9,7 @@ from qwen_agent.utils.utils import format_as_text_message, merge_generate_cfgs from openai import OpenAI import time -from prompts import * +from prompts import STSTEM_CRITIIC_INFORMATION, STSTEM_CRITIIC_ANSWER, SYSTEM_EXPLORER TOOL_DESC = ( diff --git a/WebAgent/WebWalker/src/app.py b/WebAgent/WebWalker/src/app.py index 7351368..5b122e2 100644 --- a/WebAgent/WebWalker/src/app.py +++ b/WebAgent/WebWalker/src/app.py @@ -6,7 +6,7 @@ import re import json import asyncio -from utils import * +from utils import process_url, get_info, get_content_between_a_b import base64 from PIL import Image from bs4 import BeautifulSoup diff --git a/commit_message.txt b/commit_message.txt index 8f1b83e..baecec5 100644 --- a/commit_message.txt +++ b/commit_message.txt @@ -1 +1 @@ -Fix remaining E722 ruff errors \ No newline at end of file +Fix F403 and F405 ruff errors \ No newline at end of file diff --git a/evaluation/evaluate_deepsearch_official.py b/evaluation/evaluate_deepsearch_official.py index aaf384c..5b3e6d9 100644 --- a/evaluation/evaluate_deepsearch_official.py +++ b/evaluation/evaluate_deepsearch_official.py @@ -7,7 +7,7 @@ import concurrent from tqdm import tqdm from transformers import AutoTokenizer -from prompt import * +from prompt import JUDGE_PROMPT_GAIA, JUDGE_PROMPT_XBENCH, JUDGE_PROMPT_BROWSECOMP_OFFICIAL import traceback import tiktoken import time diff --git a/inference/react_agent.py b/inference/react_agent.py index 6c6e57b..bd54192 100644 --- a/inference/react_agent.py +++ b/inference/react_agent.py @@ -19,7 +19,7 @@ from tool_scholar import * from tool_python import * from tool_search import * -from tool_visit import * +from tool_visit import Visit OBS_START = '' OBS_END = '\n' From d9c0bd453fb7355fe7a26be49e642e837a3d514d Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 12:01:47 +0500 Subject: [PATCH 07/13] Revert "Refactor react_agent.py to improve security and maintainability" This reverts commit 48d5f8f93e3df2628eace789750f3b57ae8a7dca. --- WebAgent/WebSailor/src/react_agent.py | 99 +++++++++++++-------------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py index f6f1fc6..1ec9352 100644 --- a/WebAgent/WebSailor/src/react_agent.py +++ b/WebAgent/WebSailor/src/react_agent.py @@ -12,7 +12,7 @@ from qwen_agent.tools import BaseTool - +MAX_LLM_CALL_PER_RUN = int(os.getenv('MAX_LLM_CALL_PER_RUN', 40)) MAX_TOKEN_LENGTH = int(os.getenv('MAX_LENGTH', 31 * 1024 - 500)) print(f'Running with MAX_LLM_CALL_PER_RUN = {MAX_LLM_CALL_PER_RUN}') @@ -38,8 +38,8 @@ def __init__(self, def call_server(self, msgs, max_tries=10): # Set OpenAI API key and base URL using vLLM API server - openai_api_key = os.getenv("OPENAI_API_KEY", "EMPTY") - openai_api_base = os.getenv("OPENAI_API_BASE", "http://127.0.0.1:6001/v1") + openai_api_key = "EMPTY" + openai_api_base = "http://127.0.0.1:6001/v1" client = OpenAI( api_key=openai_api_key, @@ -77,53 +77,15 @@ def count_tokens(self, messages, model="gpt-4o"): return len(tokenizer.encode(full_prompt)) - def _process_tool_call(self, content, messages): - if '' in content and '' in content: - tool_call = content.split('')[1].split('')[0] - try: - tool_call = json.loads(tool_call) - tool_name = tool_call.get('name', '') - tool_args = tool_call.get('arguments', {}) - result = self._call_tool(tool_name, tool_args) - except Exception: - result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' - result = "\n" + result + "\n" - messages.append({"role": "user", "content": result}) - return messages - - def _handle_token_limit(self, messages, question, answer, rollout_id): - print("Token count exceeds limit") - - messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:your final thinking\nyour answer" - content = self.call_server(messages) - messages.append({"role": "assistant", "content": content.strip()}) - if '' in content and '' in content: - prediction = messages[-1]['content'].split('')[1].split('')[0] - termination = 'generate an answer as token limit reached' - else: - prediction = messages[-1]['content'] - termination = 'format error: generate an answer as token limit reached' - return self._generate_result(question, answer, rollout_id, messages, prediction, termination) - - def _generate_result(self, question, answer, rollout_id, messages, prediction, termination): - return { - "question": question, - "answer": answer, - "rollout_id": rollout_id, - "messages": messages, - "prediction": prediction, - "termination": termination - } - def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[Message]]: self.model=model - question = data.get('item', {}).get('question', '') - if not question: - raw_msg = data.get('item', {}).get('messages', [{}, {}])[1].get("content", "") - question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg + try: + question = data['item']['question'] + except: + raw_msg = data['item']['messages'][1]["content"] + question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg - answer = data.get('item', {}).get('answer', '') - rollout_id = data.get('rollout_id', '') + answer = data['item']['answer'] self.user_prompt = user_prompt self.user_prompt = self.user_prompt + question messages = [{"role": "system", "content": self.system_message}, {"role": "user", "content": self.user_prompt}] @@ -138,7 +100,17 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M pos = content.find('') content = content[:pos] messages.append({"role": "assistant", "content": content.strip()}) - messages = self._process_tool_call(content, messages) + if '' in content and '' in content: + tool_call = content.split('')[1].split('')[0] + try: + tool_call = json.loads(tool_call) + tool_name = tool_call.get('name', '') + tool_args = tool_call.get('arguments', {}) + result = self._call_tool(tool_name, tool_args) + except: + result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' + result = "\n" + result + "\n" + messages.append({"role": "user", "content": result}) if '' in content and '' in content: termination = 'answer' break @@ -150,7 +122,26 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M print(f"round: {round}, token count: {token_count}") if token_count > max_tokens: - return self._handle_token_limit(messages, question, answer, rollout_id) + print(f"Token count exceeds limit: {token_count} > {max_tokens}") + + messages[-1]['content'] = "You have now reached the maximum context length you can handle. You should stop making tool calls and, based on all the information above, think again and provide what you consider the most likely answer in the following format:your final thinking\nyour answer" + content = self.call_server(messages) + messages.append({"role": "assistant", "content": content.strip()}) + if '' in content and '' in content: + prediction = messages[-1]['content'].split('')[1].split('')[0] + termination = 'generate an answer as token limit reached' + else: + prediction = messages[-1]['content'] + termination = 'format error: generate an answer as token limit reached' + result = { + "question": question, + "answer": answer, + "rollout_id": data['rollout_id'], + "messages": messages, + "prediction": prediction, + "termination": termination + } + return result if '' in messages[-1]['content']: prediction = messages[-1]['content'].split('')[1].split('')[0] @@ -160,4 +151,12 @@ def _run(self, data: str, model: str, user_prompt: str, **kwargs) -> List[List[M termination = 'answer not found' if num_llm_calls_available == 0: termination = 'exceed available llm calls' - return self._generate_result(question, answer, rollout_id, messages, prediction, termination) \ No newline at end of file + result = { + "question": question, + "answer": answer, + "rollout_id": data['rollout_id'], + "messages": messages, + "prediction": prediction, + "termination": termination + } + return result \ No newline at end of file From 2c82bf9850c0455f2953f6f109fb96ed66cba978 Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 12:02:18 +0500 Subject: [PATCH 08/13] Revert "chore(ci): add basic Ruff lint workflow" This reverts commit a9256b7dfa1355f42d04cfef79f08c5c592758ad. --- .github/workflows/ci-lint.yml | 20 -------------------- commit_message.txt | 1 - 2 files changed, 21 deletions(-) delete mode 100644 .github/workflows/ci-lint.yml delete mode 100644 commit_message.txt diff --git a/.github/workflows/ci-lint.yml b/.github/workflows/ci-lint.yml deleted file mode 100644 index 864e85b..0000000 --- a/.github/workflows/ci-lint.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: CI - Lint - -on: - pull_request: - branches: ["main"] - push: - branches: ["chore/add-ci-lint"] - -jobs: - lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: Install ruff - run: pip install ruff==0.5.6 - - name: Lint - run: ruff check --output-format=github . \ No newline at end of file diff --git a/commit_message.txt b/commit_message.txt deleted file mode 100644 index baecec5..0000000 --- a/commit_message.txt +++ /dev/null @@ -1 +0,0 @@ -Fix F403 and F405 ruff errors \ No newline at end of file From 1348ac5b28344b6dea29e577e6ae3905c535818b Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 12:07:59 +0500 Subject: [PATCH 09/13] Revert "Fix E722 ruff errors" This reverts commit d60f2545369347a48f8d4aa1253f6abb9c021f6c. --- .../WebDancer/demos/assistant_qwq_chat.py | 8 +++++-- WebAgent/WebDancer/demos/llm/oai.py | 3 ++- .../WebDancer/demos/tools/private/search.py | 6 ++--- .../WebDancer/demos/tools/private/visit.py | 4 ++-- WebAgent/WebDancer/demos/utils/logs.py | 1 + WebAgent/WebSailor/src/evaluate.py | 4 ++-- WebAgent/WebSailor/src/react_agent.py | 4 ++-- WebAgent/WebSailor/src/tool_search.py | 7 +++--- WebAgent/WebSailor/src/tool_visit.py | 11 +++++---- WebAgent/WebWalker/src/agent.py | 10 ++++---- WebAgent/WebWalker/src/app.py | 5 ++-- evaluation/evaluate_deepsearch_official.py | 23 ++++++++++--------- inference/file_tools/file_parser.py | 7 +++--- inference/file_tools/idp.py | 6 +++-- inference/file_tools/video_agent.py | 11 +++++++++ inference/react_agent.py | 15 +++++++----- inference/run_multi_react.py | 2 ++ inference/tool_file.py | 22 ++++++++++++++---- inference/tool_python.py | 15 ++++++------ inference/tool_scholar.py | 7 +++--- inference/tool_search.py | 9 ++++++-- inference/tool_visit.py | 10 ++++++-- 22 files changed, 123 insertions(+), 67 deletions(-) diff --git a/WebAgent/WebDancer/demos/assistant_qwq_chat.py b/WebAgent/WebDancer/demos/assistant_qwq_chat.py index b0a48a6..3fa38ff 100644 --- a/WebAgent/WebDancer/demos/assistant_qwq_chat.py +++ b/WebAgent/WebDancer/demos/assistant_qwq_chat.py @@ -2,11 +2,15 @@ import os +from qwen_agent.agents import Assistant +from qwen_agent.utils.output_beautify import typewriter_print from demos.agents.search_agent import SearchAgent from demos.llm.oai import TextChatAtOAI +from demos.llm.qwen_dashscope import QwenChatAtDS from demos.gui.web_ui import WebUI from demos.utils.date import date2str, get_date_now +from demos.tools import Visit, Search ROOT_RESOURCE = os.path.join(os.path.dirname(__file__), 'resource') @@ -46,8 +50,8 @@ def make_system_prompt(): llm=llm_cfg, function_list=tools, system_message="", - name='WebDancer', - description="I am WebDancer, a web information seeking agent, welcome to try!", + name=f'WebDancer', + description=f"I am WebDancer, a web information seeking agent, welcome to try!", extra={ 'reasoning': reasoning, 'max_llm_calls': max_llm_calls, diff --git a/WebAgent/WebDancer/demos/llm/oai.py b/WebAgent/WebDancer/demos/llm/oai.py index 53a261d..0df27fe 100644 --- a/WebAgent/WebDancer/demos/llm/oai.py +++ b/WebAgent/WebDancer/demos/llm/oai.py @@ -2,6 +2,7 @@ import json import logging import os +from http import HTTPStatus from pprint import pformat from typing import Dict, Iterator, List, Optional, Literal, Union @@ -14,7 +15,7 @@ from qwen_agent.llm.base import ModelServiceError, register_llm from qwen_agent.llm.function_calling import BaseFnCallModel, simulate_response_completion_with_chat -from qwen_agent.llm.schema import ASSISTANT, Message +from qwen_agent.llm.schema import ASSISTANT, Message, FunctionCall from qwen_agent.log import logger diff --git a/WebAgent/WebDancer/demos/tools/private/search.py b/WebAgent/WebDancer/demos/tools/private/search.py index 7f994ee..29278ec 100644 --- a/WebAgent/WebDancer/demos/tools/private/search.py +++ b/WebAgent/WebDancer/demos/tools/private/search.py @@ -30,7 +30,7 @@ def call(self, params: str, **kwargs) -> str: try: params = self._verify_json_format_args(params) query = params["query"][:MAX_MULTIQUERY_NUM] - except Exception: + except: return "[Search] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): @@ -57,9 +57,9 @@ def google_search(self, query: str) -> str: response = requests.post(url, headers=headers, data=json.dumps(data)) results = response.json() break - except Exception: + except Exception as e: if i == 4: - return "Google search Timeout, return None, Please try again later." + return f"Google search Timeout, return None, Please try again later." continue if response.status_code != 200: diff --git a/WebAgent/WebDancer/demos/tools/private/visit.py b/WebAgent/WebDancer/demos/tools/private/visit.py index fef4abe..b911cc6 100644 --- a/WebAgent/WebDancer/demos/tools/private/visit.py +++ b/WebAgent/WebDancer/demos/tools/private/visit.py @@ -62,7 +62,7 @@ def jina_readpage(url: str) -> str: else: print(response.text) raise ValueError("jina readpage error") - except Exception: + except Exception as e: if attempt == max_retries - 1: return "[visit] Failed to read page." @@ -97,7 +97,7 @@ def call(self, params: str, **kwargs) -> str: params = self._verify_json_format_args(params) url = params["url"] goal = params["goal"] - except Exception: + except: return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields" if isinstance(url, str): response = self.readpage(url, goal) diff --git a/WebAgent/WebDancer/demos/utils/logs.py b/WebAgent/WebDancer/demos/utils/logs.py index 629814d..c8cd432 100644 --- a/WebAgent/WebDancer/demos/utils/logs.py +++ b/WebAgent/WebDancer/demos/utils/logs.py @@ -1,5 +1,6 @@ # coding=utf-8 import os +import sys import logging diff --git a/WebAgent/WebSailor/src/evaluate.py b/WebAgent/WebSailor/src/evaluate.py index 0386a54..6b3a22c 100644 --- a/WebAgent/WebSailor/src/evaluate.py +++ b/WebAgent/WebSailor/src/evaluate.py @@ -289,7 +289,7 @@ def main(): for i in [1, 2, 3] } - print("===========") + print(f"===========") print(f"Avg. Pass@3 {avg_pass_at_3}%") print(f"Best Pass@1 {best_pass_at_1}%") print(f"Pass@3 {pass_at_3}%") @@ -297,7 +297,7 @@ def main(): print(f"# Invalid {aggr_statistics['num_invalid']} # Extra Length {aggr_statistics['extra_length']}") print(f"Avg. Action {aggr_statistics['avg_action']:.2f} Avg. Visit Action {aggr_statistics['avg_visit_action']:.2f} Avg. Search Action {aggr_statistics['avg_search_action']:.2f} Avg. Other Action {aggr_statistics['avg_other_action']:.2f}") print(f"Avg. Answer Length {aggr_statistics['avg_ans_length']:.2f} Avg. Thinking Length {aggr_statistics['avg_think_length']:.2f}") - print("===========" ) + print(f"===========" ) overall_eval_dict = { "dataset": dataset, diff --git a/WebAgent/WebSailor/src/react_agent.py b/WebAgent/WebSailor/src/react_agent.py index 1ec9352..7a95051 100644 --- a/WebAgent/WebSailor/src/react_agent.py +++ b/WebAgent/WebSailor/src/react_agent.py @@ -61,7 +61,7 @@ def call_server(self, msgs, max_tries=10): except Exception as e: if attempt == (max_tries - 1): print(f"SGLang server error {e}") - return "SGLang server error" + return f"SGLang server error" continue return "SGLang server empty response" @@ -69,7 +69,7 @@ def call_server(self, msgs, max_tries=10): def count_tokens(self, messages, model="gpt-4o"): try: tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path) - except Exception: + except: tokenizer = tiktoken.encoding_for_model(model) full_message = [Message(**x) for x in messages] diff --git a/WebAgent/WebSailor/src/tool_search.py b/WebAgent/WebSailor/src/tool_search.py index ba0da49..3643c53 100644 --- a/WebAgent/WebSailor/src/tool_search.py +++ b/WebAgent/WebSailor/src/tool_search.py @@ -3,6 +3,7 @@ from concurrent.futures import ThreadPoolExecutor from typing import List, Union import requests +from qwen_agent.tools.base import BaseTool, register_tool import os SEARCH_API_URL = os.getenv("SEARCH_API_URL") @@ -49,7 +50,7 @@ def google_search(self, query: str): except Exception as e: print(e) if i == 4: - return "Google search Timeout, return None, Please try again later." + return f"Google search Timeout, return None, Please try again later." if response.status_code != 200: raise Exception(f"Error: {response.status_code} - {response.text}") @@ -81,7 +82,7 @@ def google_search(self, query: str): content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets) return content - except Exception: + except: return f"No results found for '{query}'. Try with a more general query, or remove the year filter." @@ -89,7 +90,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: assert GOOGLE_SEARCH_KEY is not None, "Please set the GOOGLE_SEARCH_KEY environment variable." try: query = params["query"] - except Exception: + except: return "[Search] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): diff --git a/WebAgent/WebSailor/src/tool_visit.py b/WebAgent/WebSailor/src/tool_visit.py index fbae5e9..ac8e5e6 100644 --- a/WebAgent/WebSailor/src/tool_visit.py +++ b/WebAgent/WebSailor/src/tool_visit.py @@ -6,6 +6,7 @@ from prompt import EXTRACTOR_PROMPT import os from openai import OpenAI +import random WEBCONTENT_MAXLENGTH = int(os.getenv("WEBCONTENT_MAXLENGTH", 150000)) @@ -45,7 +46,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: try: url = params["url"] goal = params["goal"] - except Exception: + except: return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields" if isinstance(url, str): @@ -86,14 +87,14 @@ def call_server(self, msgs, max_tries=10): if content: try: json.loads(content) - except Exception: + except: # extract json from string left = content.find('{') right = content.rfind('}') if left != -1 and right != -1 and left <= right: content = content[left:right+1] return content - except Exception: + except: if attempt == (max_tries - 1): return "" continue @@ -128,7 +129,7 @@ def jina_readpage(self, url: str) -> str: else: print(response.text) raise ValueError("jina readpage error") - except Exception: + except Exception as e: if attempt == max_retries - 1: return "[visit] Failed to read page." @@ -190,7 +191,7 @@ def readpage(self, url: str, goal: str) -> str: # 尝试 parse json raw = json.loads(raw) break - except Exception: + except: raw = self.call_server(messages) parse_retry_times += 1 # parse 失败 diff --git a/WebAgent/WebWalker/src/agent.py b/WebAgent/WebWalker/src/agent.py index 5ec7a82..02ffeb2 100644 --- a/WebAgent/WebWalker/src/agent.py +++ b/WebAgent/WebWalker/src/agent.py @@ -63,7 +63,7 @@ def observation_information_extraction(self, query, observation): if "true" in response.choices[0].message.content: try: return json.loads(response.choices[0].message.content)["information"] - except Exception: + except: return response.choices[0].message.content else: return None @@ -97,7 +97,7 @@ def critic_information(self, query, memory): if "true" in response.choices[0].message.content: try: return json.loads(response.choices[0].message.content)["answer"] - except Exception: + except: return response.choices[0].message.content else: return None @@ -140,9 +140,9 @@ def _run(self, messages: List[Message], lang: Literal['en', 'zh'] = 'en', **kwar if stage1: self.momery.append(stage1+"\n") if len(self.momery) > 1: - yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"}")] + yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"")}] else: - yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"}")] + yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"")}] stage2 = self.critic_information(query, self.momery) if stage2: response = f'Final Answer: {stage2}' @@ -205,4 +205,4 @@ def _detect_tool(self, text: str) -> Tuple[bool, str, str, str]: func_name = text[i + len(special_func_token):j].strip() func_args = text[j + len(special_args_token):k].strip() text = text[:i] # Return the response before tool call, i.e., `Thought` - return (func_name is not None), func_name, func_args, text + return (func_name is not None), func_name, func_args, text \ No newline at end of file diff --git a/WebAgent/WebWalker/src/app.py b/WebAgent/WebWalker/src/app.py index 5b122e2..f7fba53 100644 --- a/WebAgent/WebWalker/src/app.py +++ b/WebAgent/WebWalker/src/app.py @@ -3,10 +3,11 @@ import json5 from agent import WebWalker from qwen_agent.tools.base import BaseTool, register_tool +import os import re import json import asyncio -from utils import process_url, get_info, get_content_between_a_b +from utils import * import base64 from PIL import Image from bs4 import BeautifulSoup @@ -267,4 +268,4 @@ def call(self, params: str, **kwargs) -> str: else: return "The button can not be clicked, please retry a new botton!" else: - return "Your input is invalid, plase output the action input correctly!" + return "Your input is invalid, plase output the action input correctly!"} \ No newline at end of file diff --git a/evaluation/evaluate_deepsearch_official.py b/evaluation/evaluate_deepsearch_official.py index 5b3e6d9..b348f66 100644 --- a/evaluation/evaluate_deepsearch_official.py +++ b/evaluation/evaluate_deepsearch_official.py @@ -7,7 +7,8 @@ import concurrent from tqdm import tqdm from transformers import AutoTokenizer -from prompt import JUDGE_PROMPT_GAIA, JUDGE_PROMPT_XBENCH, JUDGE_PROMPT_BROWSECOMP_OFFICIAL +import re +from prompt import * import traceback import tiktoken import time @@ -175,7 +176,7 @@ def count_tokens_with_tokenizer(text, tokenizer): return len(tokenizer.encode(text)) else: return len(tokenizer.encode(text)) - except Exception: + except: return len(text) // 4 @@ -186,7 +187,7 @@ def aggregate_statistics(round1_file, round2_file, round3_file): round3_stats = single_round_statistics(round3_file) keys = round1_stats.keys() - avg_stats = {} + avg_stats = {} for key in keys: if isinstance(round1_stats[key], dict): @@ -300,7 +301,7 @@ def single_round_statistics(input_file): try: if len(tokenizer.encode("".join([msg["content"] for msg in messages]))) > 30000: num_extra += 1 - except Exception: + except: pass total_questions = len(contents) @@ -342,7 +343,7 @@ def calculate_enhanced_statistics(round_results, round_items): continue try: matching_item = [item for item in items if item['messages'][1]['content'] == result['question']] - except Exception: + except: items = [item for item in items if len(item['messages'])>0] matching_item = [item for item in items if item['messages'][1]['content'] == result['question']] if not matching_item: @@ -416,7 +417,7 @@ def calculate_best_pass_at_1(query_results): round_correct = {round_name: 0 for round_name in ["round1", "round2", "round3"]} for query, results in query_results.items(): - for round_name in ["round1", "round2", "round3"]: + for round_name in ["round1", "round2", "round3"]: if results[round_name] == "Correct": round_correct[round_name] += 1 @@ -527,7 +528,7 @@ def main(): for i in [1, 2, 3] } - print("===========") + print(f"===========") print(f"Avg. Pass@3 {avg_pass_at_3}%") print(f"Best Pass@1 {best_pass_at_1}%") print(f"Pass@3 {pass_at_3}%") @@ -538,18 +539,18 @@ def main(): print(f"Avg. Action {aggr_statistics['avg_action']:.2f} Avg. Visit Action {aggr_statistics['avg_visit_action']:.2f} Avg. Search Action {aggr_statistics['avg_search_action']:.2f} Avg. Other Action {aggr_statistics['avg_other_action']:.2f}") print(f"Avg. Answer Length {aggr_statistics['avg_ans_length']:.2f} Avg. Thinking Length {aggr_statistics['avg_think_length']:.2f}") enhanced_statistics = calculate_enhanced_statistics(round_results, round_items) - print("\n=== ADDITIONAL STATISTICS ===") + print(f"\n=== ADDITIONAL STATISTICS ===") print(f"Avg. Tool Calls per Question: {aggr_statistics['avg_tool_calls_per_question']:.2f}") print(f"Avg. Tool Calls per Question (Correctly Solved): {enhanced_statistics['avg_tool_calls_per_question_correctly_solved']:.2f}") print(f"Avg. Assistant Tokens per Question: {aggr_statistics['avg_assistant_tokens_per_question']:.2f}") print(f"Avg. Assistant Tokens per Question (Correctly Solved): {enhanced_statistics['avg_assistant_tokens_per_question_correctly_solved']:.2f}") print(f"Avg. Assistant Tokens per Message: {aggr_statistics['avg_assistant_tokens_per_message']:.2f}") - print("\n=== TERMINATION FREQUENCIES ===") + print(f"\n=== TERMINATION FREQUENCIES ===") for termination_type, frequency in aggr_statistics['termination_freq'].items(): print(f"{termination_type}: {frequency:.3f}") - print("===========" ) + print(f"===========" ) overall_eval_dict = { "dataset": dataset, @@ -578,4 +579,4 @@ def main(): except Exception as e: error_str = traceback.format_exc() print(f"Evaluation Failed: {e}") - print("Trace Back", error_str) + print("Trace Back", error_str) \ No newline at end of file diff --git a/inference/file_tools/file_parser.py b/inference/file_tools/file_parser.py index 3410da7..ecbace9 100644 --- a/inference/file_tools/file_parser.py +++ b/inference/file_tools/file_parser.py @@ -4,6 +4,7 @@ import time import zipfile import math +from pathlib import Path from typing import Any, Dict, List, Optional, Union from collections import Counter @@ -16,7 +17,7 @@ from tabulate import tabulate from qwen_agent.log import logger from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS -from qwen_agent.tools.base import BaseTool +from qwen_agent.tools.base import BaseTool, register_tool from qwen_agent.tools.storage import KeyNotExistsError, Storage from file_tools.utils import (get_file_type, hash_sha256, is_http_url, get_basename_from_url, sanitize_chrome_file_path, save_url_to_local_work_dir) @@ -521,7 +522,7 @@ def _process_new_file(self, file_path: str) -> Union[str, list]: if USE_IDP and file_type in idp_types: try: results = parse_file_by_idp(file_path=file_path) - except Exception: + except Exception as e: results = self.parsers[file_type](file_path) else: results = self.parsers[file_type](file_path) @@ -535,7 +536,7 @@ def _process_new_file(self, file_path: str) -> Union[str, list]: tokens += para['token'] if not results or not tokens: - logger.error("Parsing failed: No information was parsed") + logger.error(f"Parsing failed: No information was parsed") raise FileParserError("Document parsing failed") else: self._cache_result(file_path, results) diff --git a/inference/file_tools/idp.py b/inference/file_tools/idp.py index b77872b..71199cb 100644 --- a/inference/file_tools/idp.py +++ b/inference/file_tools/idp.py @@ -1,10 +1,12 @@ import os +import json from alibabacloud_docmind_api20220711.client import Client as docmind_api20220711Client from alibabacloud_tea_openapi import models as open_api_models from alibabacloud_docmind_api20220711 import models as docmind_api20220711_models from alibabacloud_tea_util.client import Client as UtilClient from alibabacloud_tea_util import models as util_models +from alibabacloud_credentials.client import Client as CredClient key = os.environ.get('IDP_KEY_ID') secret = os.environ.get('IDP_KEY_SECRET') @@ -16,7 +18,7 @@ def __init__(self): access_key_id=key, access_key_secret=secret ) - config.endpoint = 'docmind-api.cn-hangzhou.aliyuncs.com' + config.endpoint = f'docmind-api.cn-hangzhou.aliyuncs.com' self.client = docmind_api20220711Client(config) def file_submit_with_url(self, file_url): @@ -82,7 +84,7 @@ def file_parser_query(self,fid): responses = result else: responses['layouts'].extend(result['layouts']) - except Exception: + except Exception as error: return None,status_parse return responses,status_parse \ No newline at end of file diff --git a/inference/file_tools/video_agent.py b/inference/file_tools/video_agent.py index 37857d9..7d9b709 100644 --- a/inference/file_tools/video_agent.py +++ b/inference/file_tools/video_agent.py @@ -9,11 +9,22 @@ """ import sys import os +import re +import copy import json +from typing import Dict, Iterator, List, Literal, Tuple, Union, Any, Optional +import json5 import asyncio +from openai import OpenAI from qwen_agent.tools.base import BaseTool, register_tool +from qwen_agent.agents import Assistant +from qwen_agent.llm import BaseChatModel +from qwen_agent.llm.schema import ASSISTANT, USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE, SYSTEM, ROLE from qwen_agent.tools import BaseTool +from qwen_agent.log import logger +from qwen_agent.utils.tokenization_qwen import count_tokens, tokenizer +from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS current_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.dirname(current_dir)) diff --git a/inference/react_agent.py b/inference/react_agent.py index bd54192..8c26a35 100644 --- a/inference/react_agent.py +++ b/inference/react_agent.py @@ -1,6 +1,7 @@ +import json import json5 import os -from typing import Dict, List, Optional, Union +from typing import Dict, Iterator, List, Literal, Optional, Tuple, Union from qwen_agent.llm.schema import Message from qwen_agent.utils.utils import build_text_completion_prompt from openai import OpenAI, APIError, APIConnectionError, APITimeoutError @@ -9,8 +10,10 @@ from datetime import datetime from qwen_agent.agents.fncall_agent import FnCallAgent from qwen_agent.llm import BaseChatModel +from qwen_agent.llm.schema import ASSISTANT, DEFAULT_SYSTEM_MESSAGE, Message from qwen_agent.settings import MAX_LLM_CALL_PER_RUN from qwen_agent.tools import BaseTool +from qwen_agent.utils.utils import format_as_text_message, merge_generate_cfgs from prompt import * import time import asyncio @@ -106,12 +109,12 @@ def call_server(self, msgs, planning_port, max_tries=10): else: print("Error: All retry attempts have been exhausted. The call has failed.") - return "vllm server error!!!" + return f"vllm server error!!!" def count_tokens(self, messages, model="gpt-4o"): try: tokenizer = AutoTokenizer.from_pretrained(self.llm_local_path) - except Exception: + except Exception as e: tokenizer = tiktoken.encoding_for_model(model) full_message = [Message(**x) for x in messages] @@ -123,7 +126,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]: self.model=model try: question = data['item']['question'] - except Exception: + except: raw_msg = data['item']['messages'][1]["content"] question = raw_msg.split("User:")[1].strip() if "User:" in raw_msg else raw_msg @@ -165,7 +168,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]: try: code_raw=content.split('')[1].split('')[0].split('')[1].split('')[0].strip() result = TOOL_MAP['PythonInterpreter'].call(code_raw) - except Exception: + except: result = "[Python Interpreter Error]: Formatting error." else: @@ -174,7 +177,7 @@ def _run(self, data: str, model: str, **kwargs) -> List[List[Message]]: tool_args = tool_call.get('arguments', {}) result = self.custom_call_tool(tool_name, tool_args) - except Exception: + except: result = 'Error: Tool call is not a valid JSON. Tool call must contain a valid "name" and "arguments" field.' result = "\n" + result + "\n" # print(result) diff --git a/inference/run_multi_react.py b/inference/run_multi_react.py index 4a517ea..1056a0a 100644 --- a/inference/run_multi_react.py +++ b/inference/run_multi_react.py @@ -5,7 +5,9 @@ import concurrent.futures from tqdm import tqdm import threading +from datetime import datetime from react_agent import MultiTurnReactAgent +import time import math if __name__ == "__main__": diff --git a/inference/tool_file.py b/inference/tool_file.py index 7fb1ce9..77c4960 100644 --- a/inference/tool_file.py +++ b/inference/tool_file.py @@ -9,12 +9,26 @@ """ import sys import os +import re +import time +import copy import json - -from qwen_agent.tools.base import BaseTool -from qwen_agent.settings import DEFAULT_MAX_INPUT_TOKENS +from typing import Dict, Iterator, List, Literal, Tuple, Union, Any, Optional +import json5 +import asyncio +from openai import OpenAI, AsyncOpenAI +import pdb +import bdb + +from qwen_agent.tools.base import BaseTool, register_tool +from qwen_agent.agents import Assistant +from qwen_agent.llm import BaseChatModel +from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS +from qwen_agent.llm.schema import ASSISTANT, USER, FUNCTION, Message, DEFAULT_SYSTEM_MESSAGE, SYSTEM, ROLE from qwen_agent.tools import BaseTool -from qwen_agent.utils.tokenization_qwen import count_tokens +from qwen_agent.log import logger +from qwen_agent.utils.tokenization_qwen import count_tokens, tokenizer +from qwen_agent.settings import DEFAULT_WORKSPACE, DEFAULT_MAX_INPUT_TOKENS current_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.append(os.path.dirname(current_dir)) diff --git a/inference/tool_python.py b/inference/tool_python.py index d851ee8..e8e5522 100644 --- a/inference/tool_python.py +++ b/inference/tool_python.py @@ -1,13 +1,14 @@ import re -from typing import Dict, Optional, Union +from typing import Dict, List, Optional, Union import json5 from qwen_agent.tools.base import BaseToolWithFileAccess, register_tool from qwen_agent.utils.utils import extract_code -from sandbox_fusion import run_code, RunCodeRequest +from sandbox_fusion import run_code, RunCodeRequest, RunStatus from requests.exceptions import Timeout import os import random import time +from concurrent.futures import ThreadPoolExecutor, as_completed # Array of sandbox fusion endpoints SANDBOX_FUSION_ENDPOINTS = [] @@ -79,12 +80,12 @@ def call(self, params, files= None, timeout = 50, **kwargs) -> str: if code_result.run_result.stderr: result.append(f"stderr:\n{code_result.run_result.stderr}") if code_result.run_result.execution_time >= timeout-1: - result.append("[PythonInterpreter Error] TimeoutError: Execution timed out.") + result.append(f"[PythonInterpreter Error] TimeoutError: Execution timed out.") result = '\n'.join(result) print('SUCCESS RUNNING TOOL') return result if result.strip() else 'Finished execution.' - except Timeout: + except Timeout as e: last_error = f'[Python Interpreter Error] TimeoutError: Execution timed out on endpoint {endpoint}.' print(f"Timeout on attempt {attempt + 1}: {last_error}") if attempt == 4: # Last attempt @@ -136,7 +137,7 @@ def call_specific_endpoint(self, params: Union[str, dict], endpoint: str, timeou execution_time = end_time - start_time return True, result if result.strip() else 'Finished execution.', execution_time - except Timeout: - return False, '[Python Interpreter Error] TimeoutError: Execution timed out.', None + except Timeout as e: + return False, f'[Python Interpreter Error] TimeoutError: Execution timed out.', None except Exception as e: - return False, f'[Python Interpreter Error]: {str(e)}', None \ No newline at end of file + return False, f'[Python Interpreter Error]: {str(e)}', None diff --git a/inference/tool_scholar.py b/inference/tool_scholar.py index 90e97c5..ae021b3 100644 --- a/inference/tool_scholar.py +++ b/inference/tool_scholar.py @@ -1,5 +1,6 @@ import os import json +import requests from typing import Union, List from qwen_agent.tools.base import BaseTool, register_tool from concurrent.futures import ThreadPoolExecutor @@ -43,7 +44,7 @@ def google_scholar_with_serp(self, query: str): except Exception as e: print(e) if i == 4: - return "Google Scholar Timeout, return None, Please try again later." + return f"Google Scholar Timeout, return None, Please try again later." continue @@ -86,7 +87,7 @@ def google_scholar_with_serp(self, query: str): content = f"A Google scholar for '{query}' found {len(web_snippets)} results:\n\n## Scholar Results\n" + "\n\n".join(web_snippets) return content - except Exception: + except: return f"No results found for '{query}'. Try with a more general query." @@ -95,7 +96,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: try: params = self._verify_json_format_args(params) query = params["query"] - except Exception: + except: return "[google_scholar] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): diff --git a/inference/tool_search.py b/inference/tool_search.py index 499ff0c..d2289df 100644 --- a/inference/tool_search.py +++ b/inference/tool_search.py @@ -1,8 +1,13 @@ import json +from concurrent.futures import ThreadPoolExecutor from typing import List, Union +import requests from qwen_agent.tools.base import BaseTool, register_tool -from typing import Optional +import asyncio +from typing import Dict, List, Optional, Union +import uuid import http.client +import json import os @@ -63,7 +68,7 @@ def contains_chinese_basic(text: str) -> bool: except Exception as e: print(e) if i == 4: - return "Google search Timeout, return None, Please try again later." + return f"Google search Timeout, return None, Please try again later." continue data = res.read() diff --git a/inference/tool_visit.py b/inference/tool_visit.py index 97284ff..4981a2c 100644 --- a/inference/tool_visit.py +++ b/inference/tool_visit.py @@ -1,11 +1,17 @@ import json import os +import signal +import threading +from concurrent.futures import ThreadPoolExecutor, as_completed from typing import List, Union import requests from qwen_agent.tools.base import BaseTool, register_tool from prompt import EXTRACTOR_PROMPT from openai import OpenAI +import random +from urllib.parse import urlparse, unquote import time +from transformers import AutoTokenizer import tiktoken VISIT_SERVER_TIMEOUT = int(os.getenv("VISIT_SERVER_TIMEOUT", 200)) @@ -116,7 +122,7 @@ def call_server(self, msgs, max_retries=2): if left != -1 and right != -1 and left <= right: content = content[left:right+1] return content - except Exception: + except Exception as e: # print(e) if attempt == (max_retries - 1): return "" @@ -153,7 +159,7 @@ def jina_readpage(self, url: str) -> str: else: print(response.text) raise ValueError("jina readpage error") - except Exception: + except Exception as e: time.sleep(0.5) if attempt == max_retries - 1: return "[visit] Failed to read page." From fac9a0725a9855bc565de6f43e7fdd5167107133 Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 12:08:11 +0500 Subject: [PATCH 10/13] Revert "Fix remaining E722 ruff errors" This reverts commit ff94125461682fed4c23b3527559d4eb2466c54d. --- inference/tool_search.py | 4 ++-- inference/tool_visit.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/inference/tool_search.py b/inference/tool_search.py index d2289df..1a3f7b5 100644 --- a/inference/tool_search.py +++ b/inference/tool_search.py @@ -101,7 +101,7 @@ def contains_chinese_basic(text: str) -> bool: content = f"A Google search for '{query}' found {len(web_snippets)} results:\n\n## Web Results\n" + "\n\n".join(web_snippets) return content - except Exception: + except: return f"No results found for '{query}'. Try with a more general query." @@ -113,7 +113,7 @@ def search_with_serp(self, query: str): def call(self, params: Union[str, dict], **kwargs) -> str: try: query = params["query"] - except Exception: + except: return "[Search] Invalid request format: Input must be a JSON object containing 'query' field" if isinstance(query, str): diff --git a/inference/tool_visit.py b/inference/tool_visit.py index 4981a2c..92e4e3a 100644 --- a/inference/tool_visit.py +++ b/inference/tool_visit.py @@ -65,7 +65,7 @@ def call(self, params: Union[str, dict], **kwargs) -> str: try: url = params["url"] goal = params["goal"] - except Exception: + except: return "[Visit] Invalid request format: Input must be a JSON object containing 'url' and 'goal' fields" start_time = time.time() @@ -115,7 +115,7 @@ def call_server(self, msgs, max_retries=2): if content: try: json.loads(content) - except Exception: + except: # extract json from string left = content.find('{') right = content.rfind('}') @@ -227,7 +227,7 @@ def readpage_jina(self, url: str, goal: str) -> str: try: raw = json.loads(raw) break - except Exception: + except: raw = summary_page_func(messages, max_retries=max_retries) parse_retry_times += 1 From 33278d416578654d9b9660b27fe2483bd7369297 Mon Sep 17 00:00:00 2001 From: Samad Date: Sun, 21 Sep 2025 12:15:38 +0500 Subject: [PATCH 11/13] Revert "Fix F403 and F405 ruff errors" This reverts commit 3f517c712e752ef444dd5127f7f82e98b30ba93e. --- WebAgent/WebSailor/src/run_multi_react.py | 2 +- WebAgent/WebWalker/src/agent.py | 6 +++--- evaluation/evaluate_deepsearch_official.py | 6 +++--- inference/react_agent.py | 7 ++++--- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/WebAgent/WebSailor/src/run_multi_react.py b/WebAgent/WebSailor/src/run_multi_react.py index 7057a07..d86489a 100644 --- a/WebAgent/WebSailor/src/run_multi_react.py +++ b/WebAgent/WebSailor/src/run_multi_react.py @@ -8,7 +8,7 @@ from react_agent import MultiTurnReactAgent from prompt import SYSTEM_PROMPT_MULTI, USER_PROMPT from tool_search import * -from tool_visit import Visit +from tool_visit import * if __name__ == "__main__": diff --git a/WebAgent/WebWalker/src/agent.py b/WebAgent/WebWalker/src/agent.py index 02ffeb2..fc035ee 100644 --- a/WebAgent/WebWalker/src/agent.py +++ b/WebAgent/WebWalker/src/agent.py @@ -9,7 +9,7 @@ from qwen_agent.utils.utils import format_as_text_message, merge_generate_cfgs from openai import OpenAI import time -from prompts import STSTEM_CRITIIC_INFORMATION, STSTEM_CRITIIC_ANSWER, SYSTEM_EXPLORER +from prompts import * TOOL_DESC = ( @@ -140,9 +140,9 @@ def _run(self, messages: List[Message], lang: Literal['en', 'zh'] = 'en', **kwar if stage1: self.momery.append(stage1+"\n") if len(self.momery) > 1: - yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"")}] + yield [Message(role=ASSISTANT, content= "Memory:\n" + "-".join(self.momery)+"\"")] else: - yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"")}] + yield [Message(role=ASSISTANT, content= "Memory:\n" + "-" + self.momery[0]+"\"")] stage2 = self.critic_information(query, self.momery) if stage2: response = f'Final Answer: {stage2}' diff --git a/evaluation/evaluate_deepsearch_official.py b/evaluation/evaluate_deepsearch_official.py index b348f66..d5aed58 100644 --- a/evaluation/evaluate_deepsearch_official.py +++ b/evaluation/evaluate_deepsearch_official.py @@ -457,10 +457,10 @@ def main(): args = parser.parse_args() dataset = args.dataset - if dataset in ["gaia", "webwalker"]: + if dataset in ["gaia", "webwalker"]: judge_model = "openai/qwen2.5-72b-instruct" judge_prompt = JUDGE_PROMPT_GAIA - elif dataset in ["xbench-deepsearch"]: + elif dataset in ["xbench-deepsearch"]: judge_prompt = JUDGE_PROMPT_XBENCH judge_model = "google/gemini-2.0-flash-001" elif dataset.startswith("browsecomp_zh"): @@ -579,4 +579,4 @@ def main(): except Exception as e: error_str = traceback.format_exc() print(f"Evaluation Failed: {e}") - print("Trace Back", error_str) \ No newline at end of file + print("Trace Back", error_str) diff --git a/inference/react_agent.py b/inference/react_agent.py index 8c26a35..ec3aa26 100644 --- a/inference/react_agent.py +++ b/inference/react_agent.py @@ -22,10 +22,11 @@ from tool_scholar import * from tool_python import * from tool_search import * -from tool_visit import Visit +from tool_visit import * OBS_START = '' -OBS_END = '\n' +OBS_END = ' +' MAX_LLM_CALL_PER_RUN = int(os.getenv('MAX_LLM_CALL_PER_RUN', 100)) @@ -249,4 +250,4 @@ def custom_call_tool(self, tool_name: str, tool_args: dict, **kwargs): return result else: - return f"Error: Tool {tool_name} not found" + return f"Error: Tool {tool_name} not found"} \ No newline at end of file From 3ff09e330bc22513031c111db7e10336d0eef557 Mon Sep 17 00:00:00 2001 From: MirzaSamadAhmedBaig <89132160+Mirza-Samad-Ahmed-Baig@users.noreply.github.com> Date: Mon, 22 Sep 2025 14:45:19 +0500 Subject: [PATCH 12/13] Update app.py --- WebAgent/WebWalker/src/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/WebAgent/WebWalker/src/app.py b/WebAgent/WebWalker/src/app.py index f7fba53..73bc557 100644 --- a/WebAgent/WebWalker/src/app.py +++ b/WebAgent/WebWalker/src/app.py @@ -268,4 +268,4 @@ def call(self, params: str, **kwargs) -> str: else: return "The button can not be clicked, please retry a new botton!" else: - return "Your input is invalid, plase output the action input correctly!"} \ No newline at end of file + return "Your input is invalid, plase output the action input correctly!" From 68cb1dec596872cf98d20309b2543fd140b5aa49 Mon Sep 17 00:00:00 2001 From: MirzaSamadAhmedBaig <89132160+Mirza-Samad-Ahmed-Baig@users.noreply.github.com> Date: Mon, 22 Sep 2025 15:31:21 +0500 Subject: [PATCH 13/13] Update react_agent.py --- inference/react_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference/react_agent.py b/inference/react_agent.py index ec3aa26..2e1dee8 100644 --- a/inference/react_agent.py +++ b/inference/react_agent.py @@ -250,4 +250,4 @@ def custom_call_tool(self, tool_name: str, tool_args: dict, **kwargs): return result else: - return f"Error: Tool {tool_name} not found"} \ No newline at end of file + return f"Error: Tool {tool_name} not found"