From af584ff011e24dcf20fc05c45dff98be94cb8be9 Mon Sep 17 00:00:00 2001 From: paperTII <2293564561@qq.com> Date: Mon, 20 Oct 2025 19:28:53 +0800 Subject: [PATCH 1/5] Performance test Performance test --- test/config/uc_performance_config.yaml | 24 + test/test_uc_performance | 947 +++++++++++++++++++++++++ 2 files changed, 971 insertions(+) create mode 100644 test/config/uc_performance_config.yaml create mode 100644 test/test_uc_performance diff --git a/test/config/uc_performance_config.yaml b/test/config/uc_performance_config.yaml new file mode 100644 index 00000000..f1c4c5f1 --- /dev/null +++ b/test/config/uc_performance_config.yaml @@ -0,0 +1,24 @@ +# 测试用例列表 +server_config: + model: "qwen3" + server_url: "http://141.111.32.70:9382" + tokenizer_path: "/home/models/QwQ-32B" + +test_cases: + - mean_input_tokens: 600 + stddev_input_tokens: 0 + mean_output_tokens: 300 + stddev_output_tokens: 0 + max_num_completed_requests: 1 + num_concurrent_requests: 1 + additional_sampling_params: "{}" + hit_rate: 0 + + - mean_input_tokens: 600 + stddev_input_tokens: 0 + mean_output_tokens: 300 + stddev_output_tokens: 0 + max_num_completed_requests: 1 + num_concurrent_requests: 1 + additional_sampling_params: "{}" + hit_rate: 0 diff --git a/test/test_uc_performance b/test/test_uc_performance new file mode 100644 index 00000000..c38c2c7b --- /dev/null +++ b/test/test_uc_performance @@ -0,0 +1,947 @@ +import hashlib +import pathlib +import subprocess +import sys +import threading +import logging +from collections.abc import Iterable +import json +import os +from datetime import datetime +from pathlib import Path +import re +import time +import random +from typing import Any, Dict, List, Optional, Tuple + +import pandas as pd +import ray +import yaml +from openpyxl.reader.excel import load_workbook +from ray.util import ActorPool +import requests +from tqdm import tqdm + +from transformers import LlamaTokenizerFast, AutoTokenizer + +# —————————————————————— +# 常量定义(用于性能指标键名) +# —————————————————————— +SUPPORTED_APIS = ["openai", "anthropic", "litellm"] + +INTER_TOKEN_LAT = "inter_token_latency_s" +TTFT = "ttft_s" +E2E_LAT = "end_to_end_latency_s" +NUM_INPUT_TOKENS = "number_input_tokens" +NUM_OUTPUT_TOKENS = "number_output_tokens" +NUM_TOTAL_TOKENS = "number_total_tokens" +REQ_OUTPUT_THROUGHPUT = "request_output_throughput_token_per_s" +ERROR_MSG = "error_msg" +ERROR_CODE = "error_code" +ERROR_CODE_FREQ = "error_code_frequency" +NUM_ERRORS = "number_errors" +OUTPUT_THROUGHPUT = "mean_output_throughput_token_per_s" +NUM_COMPLETED_REQUESTS = "num_completed_requests" +COMPLETED_REQUESTS_PER_MIN = "num_completed_requests_per_min" +ERROR_RATE = "error_rate" +NUM_REQ_STARTED = "num_requests_started" + + +class RequestConfig: + """ + 请求配置类 — 表示一次 LLM 请求所需的参数。 + 属性: + model — 模型名称 + prompt — (文本, token 长度) 二元组 + sampling_params — 抽样参数字典(如 max_tokens 等) + llm_api — 使用的 API 名称(如 "openai") + metadata — 任意附加元数据字典 + openai_api_base — OpenAI 或兼容服务的基础 URL + """ + def __init__( + self, + model: str, + prompt: Tuple[str, int], + sampling_params: Optional[Dict[str, Any]] = None, + llm_api: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + openai_api_base: Optional[str] = "" + ): + self.model = model + self.prompt = prompt + self.sampling_params = sampling_params or {} + self.llm_api = llm_api + self.metadata = metadata or {} + self.openai_api_base = openai_api_base + +@ray.remote +class OpenAIChatCompletionsClient: + """ + LLM 客户端(远程 actor) — 用于调用 OpenAI Chat Completions 接口(流式)。 + 负责发送请求、接收 token 流、统计延迟和吞吐率等指标。 + """ + def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]: + prompt = request_config.prompt + prompt, prompt_len = prompt + + message = [ + {"role": "system", "content": ""}, + {"role": "user", "content": prompt}, + ] + model = request_config.model + body = { + "model": model, + "messages": message, + "stream": True, + "ignore_eos": True, + } + sampling_params = request_config.sampling_params + body.update(sampling_params or {}) + time_to_next_token = [] + tokens_received = 0 + ttft = 0 + error_response_code = -1 + generated_text = "" + error_msg = "" + output_throughput = 0 + total_request_time = 0 + + metrics = {} + + metrics[ERROR_CODE] = None + metrics[ERROR_MSG] = "" + + start_time = time.monotonic() + most_recent_received_token_time = time.monotonic() + address = request_config.openai_api_base + if not address: + raise ValueError("the environment variable OPENAI_API_BASE must be set.") + key = os.environ.get("OPENAI_API_KEY", "secret_abcdefg") + if not key: + raise ValueError("the environment variable OPENAI_API_KEY must be set.") + headers = {"Authorization": f"Bearer {key}"} + if not address: + raise ValueError("No host provided.") + if not address.endswith("/"): + address = address + "/" + address += "chat/completions" + try: + with requests.post( + address, + json=body, + stream=True, + timeout=180, + headers=headers, + ) as response: + if response.status_code != 200: + error_msg = response.text + error_response_code = response.status_code + response.raise_for_status() + for chunk in response.iter_lines(chunk_size=None): + chunk = chunk.strip() + + if not chunk: + continue + stem = "data: " + chunk = chunk[len(stem):] + if chunk == b"[DONE]": + continue + tokens_received += 1 + data = json.loads(chunk) + + if "error" in data: + error_msg = data["error"]["message"] + error_response_code = data["error"]["code"] + raise RuntimeError(data["error"]["message"]) + + delta = data["choices"][0]["delta"] + if delta.get("content", None): + if not ttft: + ttft = time.monotonic() - start_time + # time_to_next_token.append(ttft) + else: + time_to_next_token.append( + time.monotonic() - most_recent_received_token_time + ) + most_recent_received_token_time = time.monotonic() + generated_text += delta.get("content", None) or delta.get("reasoning_content", "") + + total_request_time = time.monotonic() - start_time + output_throughput = tokens_received / total_request_time + + except Exception as e: + metrics[ERROR_MSG] = error_msg + metrics[ERROR_CODE] = error_response_code + print(f"[WARN] 请求发生异常:{e},返回码:{error_response_code}") + print(error_response_code) + + metrics[INTER_TOKEN_LAT] = sum( + time_to_next_token) # This should be same as metrics[common_metrics.E2E_LAT]. Leave it here for now + metrics[TTFT] = ttft + metrics[E2E_LAT] = total_request_time + metrics[REQ_OUTPUT_THROUGHPUT] = output_throughput + metrics[NUM_TOTAL_TOKENS] = tokens_received + prompt_len + metrics[NUM_OUTPUT_TOKENS] = tokens_received + metrics[NUM_INPUT_TOKENS] = prompt_len + + return metrics, generated_text, request_config + + +class RequestsLauncher: + """ + 请求启动器 — 管理多个 LLM 客户端 actor,并发提交请求。 + """ + def __init__(self, llm_clients: List[OpenAIChatCompletionsClient]): + self._llm_client_pool = ActorPool(llm_clients) + + def launch_requests(self, request_config: RequestConfig) -> None: + """ + 提交一个请求配置至客户端池。 + 参数: + request_config — RequestConfig 实例,包含请求参数 + """ + if self._llm_client_pool.has_free(): + self._llm_client_pool.submit( + lambda client, _request_config: client.llm_request.remote( + _request_config + ), + request_config, + ) + + def get_next_ready(self, block: bool = False) -> List[Any]: + """ + 获取所有已完成的请求结果。 + 参数: + block — 若为 True,则阻塞直到至少一个结果准备好。 + 返回: + 已完成请求的结果列表。 + """ + results = [] + if not block: + while self._llm_client_pool.has_next(): + results.append(self._llm_client_pool.get_next_unordered()) + else: + while not self._llm_client_pool.has_next(): + pass + while self._llm_client_pool.has_next(): + results.append(self._llm_client_pool.get_next_unordered()) + return results + + +class LLMPerfResults: + """ + 高层记录包装类,可用于最终输出 JSON、flatten 结构等。 + """ + def __init__(self, name: str, metadata: Dict[str, Any] = None): + self.name = name + self.metadata = metadata or {} + self.timestamp = int(time.time()) + self.metadata["timestamp"] = self.timestamp + self.version = "2025-10-17" + + def to_dict(self): + data = { + "version": self.version, + "name": self.name, + } + data.update(self.metadata) + return flatten_dict(data) + + def json(self): + data = self.to_dict() + return json.dumps(data) + + +def sample_random_positive_int(mean: int, stddev: int) -> int: + """ + 从高斯分布采样一个正整数 (>0)。 + 参数: + mean — 均值 + stddev — 标准差 + 返回: + 一个大于 0 的整数 + """ + while True: + v = int(random.gauss(mean, stddev)) + if v > 0: + return v + + +def randomly_sample_sonnet_lines_prompt( + prompt_tokens_mean: int = 550, + prompt_tokens_stddev: int = 250, + tokenizer = None, +) -> Tuple[str, int]: + """ + 随机从 Shakespeare 的 sonnet.txt 中抽取行并拼为 prompt,使其 token 长度接近指定值。 + 参数: + prompt_tokens_mean — 目标 token 均值 + prompt_tokens_stddev — token 长度标准差 + tokenizer — 分词器实例(若为 None 则默认加载 LlamaTokenizerFast) + 返回: + (prompt_str, prompt_token_length) + """ + if tokenizer is None: + tokenizer = LlamaTokenizerFast.from_pretrained("./llama-tokenizer") + + def token_len(text: str) -> int: + return len(tokenizer.encode(text)) + + # 基础开头 prompt + base = ("Randomly stream lines from the following text\n\n" + "Don't generate eos tokens:\n\n") + base_len = token_len(base) + + # 目标 prompt token 总数 + target = sample_random_positive_int(prompt_tokens_mean, prompt_tokens_stddev) + while target < base_len: + target = sample_random_positive_int(prompt_tokens_mean, prompt_tokens_stddev) + + remaining = target - base_len + + sonnet_path = pathlib.Path(__file__).parent / "sonnet.txt" + lines = sonnet_path.read_text(encoding="utf-8").splitlines() + random.shuffle(lines) + + prompt = base + for line in lines: + l = line + "\n" + l_len = token_len(l) + if l_len <= remaining: + prompt += l + remaining -= l_len + else: + # 裁剪 + # 可能截断单词,但 ok + cut = l[: max(1, int(remaining))] + prompt += cut + break + + # 打印 prompt 的 hash 供 debug + h = hashlib.sha256(prompt.encode("utf-8")).hexdigest() + print(f"Prompt hash: {h}") + + return prompt, token_len(prompt) + +def get_token_throughput_latencies( + model: str, + mean_input_tokens: int, + stddev_input_tokens: int, + mean_output_tokens: int, + stddev_output_tokens: int, + additional_sampling_params: Optional[Dict[str, Any]] = None, + num_concurrent_requests: int = 1, + max_num_completed_requests: int = 500, + test_timeout_s=90, + llm_api="openai", + random_seed: int = None, + openai_api_base: str = "", + tokenizer_path: str = None, +) -> Tuple[Dict[str, Any], List[Dict[str, Any]], float, float]: + """ + 获取给定模型的令牌吞吐量和延迟。 + + 参数: + model:要查询的模型的名称。 + mean_input_tokens:请求提示中发送的平均令牌数。 + stddev_input_tokens:请求提示中发送的令牌数的标准差。 + mean_output_tokens:每个请求生成的平均令牌数。 + stddev_output_tokens:每个请求生成令牌数的标准差。 + additional_sampling_params:随请求发送的附加采样参数。 + 有关更多信息,请参阅 LLM API 文档中的补全功能。 + num_concurrent_requests:要发出的并发请求数。增加此值可增加负载量 + test_timeout_s:报告结果之前运行测试的时间。 + llm_api:要使用的 llm api 的名称 + + 返回: + 所有已完成请求的性能指标摘要 + """ + random.seed(random_seed) + + if tokenizer_path: + print(f"Using tokenizer:{tokenizer_path}") + tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) + else: + print("Using default tokenizer") + tokenizer = LlamaTokenizerFast.from_pretrained( + "./llama-tokenizer" + ) + get_token_length = lambda text: len(tokenizer.encode(text)) + + if not additional_sampling_params: + additional_sampling_params = {} + + completed_requests_lock = threading.Lock() + completed_requests = [] + num_completed_requests = 0 + incremental_time_delay = 0 + # make up prompts outside of send loop for faster benchmarking loop + num_output_tokens_list = [] + prompts = [] + for i in range(max_num_completed_requests): + num_output_tokens = (sample_random_positive_int( + mean_output_tokens, stddev_output_tokens + )) + num_output_tokens_list.append(num_output_tokens) + + prompts.append(randomly_sample_sonnet_lines_prompt( + prompt_tokens_mean=mean_input_tokens, + prompt_tokens_stddev=stddev_input_tokens, + tokenizer=tokenizer + )) + end_time = 0 + start_time = time.monotonic() + pbar = tqdm(total=max_num_completed_requests) + + def launch_request(thread_index): + nonlocal num_completed_requests, end_time, incremental_time_delay + num_clients = 1 + clients = [OpenAIChatCompletionsClient.remote() for _ in range(num_clients)] + req_launcher = RequestsLauncher(clients) + request_index = thread_index % max_num_completed_requests + + while ( + time.monotonic() - start_time < test_timeout_s + and num_completed_requests < max_num_completed_requests + ): + default_sampling_params = {"max_tokens": num_output_tokens_list[request_index] } + default_sampling_params.update(additional_sampling_params) + request_config = RequestConfig( + model=model, + prompt=prompts[request_index], + sampling_params=default_sampling_params, + llm_api=llm_api, + openai_api_base=openai_api_base + ) + req_launcher.launch_requests(request_config) + + outs = req_launcher.get_next_ready() + all_metrics = [] + for out in outs: + request_metrics, gen_text, _ = out + num_output_tokens = get_token_length(gen_text) + incremental_time_delay += request_metrics[INTER_TOKEN_LAT] + with completed_requests_lock: + if num_completed_requests < max_num_completed_requests: + if num_output_tokens: + request_metrics[INTER_TOKEN_LAT] /= (request_metrics[NUM_OUTPUT_TOKENS] - 1) + else: + request_metrics[INTER_TOKEN_LAT] = 0 + request_metrics[NUM_OUTPUT_TOKENS] = num_output_tokens + request_metrics[NUM_TOTAL_TOKENS] = request_metrics[NUM_INPUT_TOKENS] + num_output_tokens + try: + request_metrics[REQ_OUTPUT_THROUGHPUT] = num_output_tokens / request_metrics[E2E_LAT] + except ZeroDivisionError: + logging.error( + "Division by zero in throughput calculation: E2E_LAT is 0. " + "This indicates the client received no valid response. " + "Possible server-side error occurred — please check server logs for details." + ) + return + + all_metrics.append(request_metrics) + completed_requests.extend(all_metrics) + pbar.update(len(all_metrics)) + num_completed_requests += len(all_metrics) + if num_completed_requests == max_num_completed_requests: + end_time = time.monotonic() + request_index = (request_index + num_concurrent_requests) % max_num_completed_requests + + threads = [] + for i in range(num_concurrent_requests): + thread = threading.Thread(target=launch_request, args=(i,)) + threads.append(thread) + thread.start() + + for thread in threads: + thread.join() + + pbar.close() + if end_time - start_time >= test_timeout_s: + print("Test timed out before all requests could be completed.") + + # check one last time that there are no remaining results to collect. + num_clients = 1 + clients = [OpenAIChatCompletionsClient.remote() for _ in range(num_clients)] + req_launcher = RequestsLauncher(clients) + outs = req_launcher.get_next_ready() + all_metrics = [] + for out in outs: + request_metrics, gen_text, _ = out + num_output_tokens = get_token_length(gen_text) + with completed_requests_lock: + if num_completed_requests < max_num_completed_requests: + if num_output_tokens: + request_metrics[INTER_TOKEN_LAT] /= num_output_tokens + else: + request_metrics[INTER_TOKEN_LAT] = 0 + request_metrics[NUM_OUTPUT_TOKENS] = num_output_tokens + request_metrics[NUM_TOTAL_TOKENS] = request_metrics[NUM_INPUT_TOKENS] + num_output_tokens + request_metrics[REQ_OUTPUT_THROUGHPUT] = num_output_tokens / request_metrics[E2E_LAT] + completed_requests.extend(request_metrics) + + print(f"\Results for token benchmark for {model} queried with the {llm_api} api.\n") + if mean_output_tokens == 2: + print(f"[INFO] 首次token发送预埋完成\n") + return {}, [], 0.0, 0.0 + + ret = metrics_summary(completed_requests, start_time, end_time) + + metadata = { + "model": model, + "mean_input_tokens": mean_input_tokens, + "stddev_input_tokens": stddev_input_tokens, + "mean_output_tokens": mean_output_tokens, + "stddev_output_tokens": stddev_output_tokens, + "num_concurrent_requests": num_concurrent_requests, + "additional_sampling_params": additional_sampling_params, + } + + metadata["results"] = ret + elapsed_time = end_time - start_time + return metadata, completed_requests, elapsed_time, incremental_time_delay + + +def metrics_summary( + metrics: List[Dict[str, Any]], start_time: int, end_time: int +) -> Dict[str, Any]: + """ + 汇总多个请求的性能指标,生成总体统计(吞吐率、延迟分位数、错误率等)。 + 参数: + metrics — 单个请求指标的字典列表 + start_time — 测试启动时间(monotonic) + end_time — 测试结束时间(monotonic) + 返回: + 一个字典,包含汇总后的指标 + """ + ret = {} + + def flatten(item): + for sub_item in item: + if isinstance(sub_item, Iterable) and not isinstance(sub_item, str): + yield from flatten(sub_item) + else: + yield sub_item + + df = pd.DataFrame(metrics) + df_without_errored_req = df[df[ERROR_CODE].isna()] + + for key in [ + INTER_TOKEN_LAT, + TTFT, + E2E_LAT, + REQ_OUTPUT_THROUGHPUT, + NUM_INPUT_TOKENS, + NUM_OUTPUT_TOKENS + ]: + print(key) + ret[key] = {} + series = pd.Series(list(flatten(df_without_errored_req[key]))).dropna() + quantiles = series.quantile([0.25, 0.5, 0.75, 0.9, 0.95, 0.99]).to_dict() + quantiles_reformatted_keys = {} + for quantile, value in quantiles.items(): + reformatted_key = f"p{int(quantile * 100)}" + print(f" {reformatted_key} = {value}") + quantiles_reformatted_keys[reformatted_key] = value + ret[key]["quantiles"] = quantiles_reformatted_keys + mean = series.mean() + print(f" mean = {mean}") + ret[key]["mean"] = mean + print(f" min = {series.min()}") + ret[key]["min"] = series.min() + print(f" max = {series.max()}") + ret[key]["max"] = series.max() + print(f" stddev = {series.std()}") + ret[key]["stddev"] = series.std() + + ret[NUM_REQ_STARTED] = len(metrics) + + error_codes = df[ERROR_CODE].dropna() + num_errors = len(error_codes) + ret[ERROR_RATE] = num_errors / len(metrics) if len(metrics) else 0 + ret[NUM_ERRORS] = num_errors + print(f"Number Of Errored Requests: {num_errors}") + error_code_frequency = dict(error_codes.value_counts()) + if num_errors: + error_code_frequency = dict(error_codes.value_counts()) + print("Error Code Frequency") + print(error_code_frequency) + ret[ERROR_CODE_FREQ] = str(error_code_frequency) + + overall_output_throughput = df_without_errored_req[ + NUM_OUTPUT_TOKENS + ].sum() / (end_time - start_time) + + print(f"Overall Output Throughput: {overall_output_throughput}") + ret[OUTPUT_THROUGHPUT] = overall_output_throughput + + num_completed_requests = len(df_without_errored_req) + num_completed_requests_per_min = ( + num_completed_requests / (end_time - start_time) * 60 + ) + print(f"Number Of Completed Requests: {num_completed_requests}") + print(f"Completed Requests Per Minute: {num_completed_requests_per_min}") + + ret[NUM_COMPLETED_REQUESTS] = num_completed_requests + ret[COMPLETED_REQUESTS_PER_MIN] = num_completed_requests_per_min + + return ret + +def run_token_benchmark( + llm_api: str, + model: str, + test_timeout_s: int, + max_num_completed_requests: int, + num_concurrent_requests: int, + mean_input_tokens: int, + stddev_input_tokens: int, + mean_output_tokens: int, + stddev_output_tokens: int, + additional_sampling_params: str, + results_dir: str, + random_seed: int, + openai_api_base: str, + tokenizer_path: str, + user_metadata: Dict[str, Any], + idx: int +): + """ + 执行一次 token 吞吐率 + 延迟基准测试。 + 参数: + llm_api — 调用的 API 名称 + model — 模型名称 + test_timeout_s — 测试超时时间(秒) + max_num_completed_requests — 最大完成请求数 + num_concurrent_requests — 并发请求数 + mean_input_tokens — 输入 token 平均值 + stddev_input_tokens — 输入 token 标准差 + mean_output_tokens — 输出 token 平均值 + stddev_output_tokens — 输出 token 标准差 + additional_sampling_params — 抽样参数 JSON 字符串 + results_dir — 结果保存目录 + random_seed — 随机种子 + openai_api_base — OpenAI 或兼容服务基础 URL + tokenizer_path — 分词器路径 + user_metadata — 用户指定的元数据字典 + idx — 用例索引或标识(可选) + 返回: + summary — 汇总指标字典 + individual_responses — 单个请求指标列表 + elapsed_time — 总耗时 + incremental_time_delay — 累计 decode 时延(inter-token 总延时) + """ + if mean_input_tokens < 40: + print("[WARN] 由于目前的提示逻辑,Input tokens的最小数量为41") + + summary, individual_responses, elapsed_time, incremental_time_delay = get_token_throughput_latencies( + model=model, + llm_api=llm_api, + test_timeout_s=test_timeout_s, + max_num_completed_requests=max_num_completed_requests, + mean_input_tokens=mean_input_tokens, + stddev_input_tokens=stddev_input_tokens, + mean_output_tokens=mean_output_tokens, + stddev_output_tokens=stddev_output_tokens, + num_concurrent_requests=num_concurrent_requests, + additional_sampling_params=json.loads(additional_sampling_params), + random_seed=random_seed, + openai_api_base=openai_api_base, + tokenizer_path=tokenizer_path, + ) + if mean_output_tokens == 2: + return summary, individual_responses, elapsed_time, incremental_time_delay + + if results_dir: + filename = f"{model}_{mean_input_tokens}_{mean_output_tokens}_{idx}" + filename = re.sub(r"[^\w\d-]+", "-", filename) + filename = re.sub(r"-{2,}", "-", filename) + summary_filename = f"{filename}_summary" + individual_responses_filename = f"{filename}_individual_responses" + + # Update to metadata. + summary.update(user_metadata) + summary["elapsed_time"] = elapsed_time # 新增运行时长 + summary["incremental_time_delay"] = incremental_time_delay # 新增增量时延 decode时延总和 + + results = LLMPerfResults(name=summary_filename, metadata=summary) + results_dir = Path(results_dir) + if not results_dir.exists(): + results_dir.mkdir(parents=True) + elif not results_dir.is_dir(): + raise ValueError(f"{results_dir} is not a directory") + + try: + with open(results_dir / f"{summary_filename}.json", "w") as f: + json.dump(results.to_dict(), f, indent=4, default=str) + except Exception as e: + print(results.to_dict()) + raise e + + try: + with open(results_dir / f"{individual_responses_filename}.json", "w") as f: + json.dump(individual_responses, f, indent=4) + except Exception as e: + print(individual_responses) + raise e + +def flatten_dict(d: Dict[str, Any], parent_key: str = "", sep: str = "_") -> Dict[str, Any]: + """将可能嵌套的 dict 扁平化为 key1_key2 形式的单层 dict。""" + res: Dict[str, Any] = {} + for k, v in d.items(): + new_key = parent_key + sep + k if parent_key else k + if isinstance(v, dict): + res.update(flatten_dict(v, new_key, sep=sep)) + else: + res[new_key] = v + return res + +def reset_prefill_cache(env, server_url): + """ + 重置前缀缓存(prefix cache / HBM)。 + 参数: + env — 环境变量字典 + server_url — 服务基础 URL + """ + reset_url = f"{server_url}/reset_prefix_cache" + print(f"[INFO] 正在重置 prefix cache: {reset_url}") + try: + result = subprocess.run( + ["curl", "-X", "POST", reset_url, "-s", "-f"], + env=env, + check=False, + capture_output=True, + text=True, + timeout=10 + ) + if result.returncode == 0: + print("[INFO] prefix cache 重置成功") + else: + print(f"[ERROR] 重置 prefix cache 失败,返回码: {result.returncode}") + except Exception as e: + print(f"[ERROR] 重置 prefix cache 异常: {e}") + +def run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path): + """ + 执行所有测试用例,并返回失败用例索引列表及每个用例的命中率映射。 + 参数: + test_cases — 配置文件中读取的测试用例列表 + timestamp_dir — 用于保存结果的目录 Path + model — 模型名称 + server_url — 服务基础 URL + tokenizer_path— 分词器路径 + 返回: + failed_cases — 失败用例索引列表 + case_hit_rate_map — {case_idx: hit_rate} 的映射 + """ + print(f"[INFO] 共计 {len(test_cases)} 个测试用例待执行") + failed_case = [] + + # 清除代理环境变量 + env = os.environ.copy() + env.pop('http_proxy', None) + env.pop('https_proxy', None) + + # 用于存储每个 case_idx 的 hit_rate(用于后续导出至excel表格) + case_hit_rate_map = {} + + for i, case in enumerate(test_cases): + print(f"\n>>> 执行第 {i + 1} 个测试用例 <<<") + reset_prefill_cache(env, server_url) + # 每次测试使用固定 random_seed 控制 PC 命中率 + random_seed = random.randint(1, 100000) + + # 从配置文件读取参数 + mean_input = case.get("mean_input_tokens", 5000) + stddev_input = case.get("stddev_input_tokens", 0) + mean_output = case.get("mean_output_tokens", 1000) + stddev_output = case.get("stddev_output_tokens", 0) + max_completed = case.get("max_num_completed_requests", 1) + concurrent = case.get("num_concurrent_requests", 1) + llm_api = case.get("llm_api", "openai") + additional_sampling_params = case.get("additional_sampling_params", "{}") + timeout = case.get("timeout", 60000) + hit_rate = case.get("hit_rate", 0) + + # 记录这个 case 的 hit_rate + case_hit_rate_map[i] = hit_rate + + # 判断是否需要执行两次(PC 命中率测试) + if hit_rate == 0: + run_token_benchmark( + llm_api=llm_api, + model=model, + test_timeout_s=timeout, + max_num_completed_requests=max_completed, + num_concurrent_requests=concurrent, + mean_input_tokens=mean_input, + stddev_input_tokens=stddev_input, + mean_output_tokens=mean_output, + stddev_output_tokens=stddev_output, + additional_sampling_params=additional_sampling_params, + results_dir=str(timestamp_dir), + random_seed=random_seed, + openai_api_base=server_url + "/v1", + tokenizer_path=tokenizer_path, + user_metadata={"case_idx": i}, + idx=i+1 + ) + else: + print("[INFO] 检测到 hit_rate > 0,进入预填充模式") + # hit_rate > 0: 先 prefill 模式 + prefill_mean_input = int(mean_input * hit_rate / 100) + print(f"[INFO] 预填充执行:mean_input_tokens={prefill_mean_input}") + run_token_benchmark( + llm_api=llm_api, + model=model, + test_timeout_s=timeout, + max_num_completed_requests=max_completed, + num_concurrent_requests=concurrent, + mean_input_tokens=prefill_mean_input, + stddev_input_tokens=stddev_input, + mean_output_tokens=2, + stddev_output_tokens=stddev_output, + additional_sampling_params=additional_sampling_params, + results_dir=str(timestamp_dir), + random_seed=random_seed, + openai_api_base=server_url + "/v1", + tokenizer_path=tokenizer_path, + user_metadata={"case_idx": i, "phase": "prefill"} + ) + # 然后正常模式 + print("[INFO] 预填充完成,切换至正常模式执行") + run_token_benchmark( + llm_api=llm_api, + model=model, + test_timeout_s=timeout, + max_num_completed_requests=max_completed, + num_concurrent_requests=concurrent, + mean_input_tokens=mean_input, + stddev_input_tokens=stddev_input, + mean_output_tokens=mean_output, + stddev_output_tokens=stddev_output, + additional_sampling_params=additional_sampling_params, + results_dir=str(timestamp_dir), + random_seed=random_seed, + openai_api_base=server_url + "/v1", + tokenizer_path=tokenizer_path, + user_metadata={"case_idx": i, "phase": "normal"} + ) + + return failed_case, case_hit_rate_map + +def collect_and_export_results(results_dir, model, case_hit_rate_map): + """ + 收集每个测试产生的 `_summary.json` 文件,并导出为 Excel 报告。 + 参数: + results_dir — 结果文件保存目录 + model — 模型名称 + case_hit_rate_map — {case_idx: hit_rate} 映射 + """ + print(f"\n[INFO] 开始收集 {results_dir} 下的 summary.json 文件") + + results_dir = Path(results_dir) + json_files = sorted(results_dir.glob("*_summary.json"), key=lambda f: f.stat().st_mtime) + print(f"[INFO] 找到 {len(json_files)} 个 summary 文件") + + if not json_files: + print("[WARN] 未找到 summary.json 文件,跳过导出") + return + + field_mapping = { + "mean_input_tokens": "input_tokens", + "mean_output_tokens": "output_tokens", + "results_inter_token_latency_s_quantiles_p50": "TBT_p50", + "results_inter_token_latency_s_quantiles_p90": "TBT_p90", + "results_inter_token_latency_s_quantiles_p99": "TBT_p99", + "results_inter_token_latency_s_mean": "TBT_mean", + "results_ttft_s_quantiles_p50": "TTFT_p50", + "results_ttft_s_quantiles_p90": "TTFT_p90", + "results_ttft_s_quantiles_p99": "TTFT_p99", + "results_ttft_s_mean": "TTFT_mean", + "results_end_to_end_latency_s_quantiles_p50": "E2E_p50", + "results_end_to_end_latency_s_quantiles_p90": "E2E_p90", + "results_end_to_end_latency_s_quantiles_p99": "E2E_p99", + "results_end_to_end_latency_s_mean": "E2E_mean", + } + + rows = [] + for i, json_file in enumerate(json_files): + try: + with open(json_file, 'r', encoding='utf-8') as f: + data = json.load(f) + + hit_rate = case_hit_rate_map.get(i, 0) + mean_output_tokens = data.get("results_number_output_tokens_mean", 0) + num_completed_requests = data.get("results_num_completed_requests", 0) + total_e2e_latency_s = data.get("elapsed_time", 0) + total_generation_time_s = data.get("incremental_time_delay", 0) + + total_throughput = (mean_output_tokens * num_completed_requests / total_e2e_latency_s + if total_e2e_latency_s > 0 else 0.0) + incremental_throughput = (mean_output_tokens * num_completed_requests / total_generation_time_s + if total_generation_time_s > 0 else 0.0) + + row = {new_name: data.get(orig_name) for orig_name, new_name in field_mapping.items()} + row["TPT"] = round(total_throughput, 4) + row["IPT"] = round(incremental_throughput, 4) + row["Hit_Rate"] = hit_rate if hit_rate > 0 else 0.0 + rows.append(row) + except Exception as e: + print(f"[ERROR] 读取 {json_file} 失败: {e}") + + if not rows: + print("[WARN] 无有效数据可导出") + return + + df = pd.DataFrame(rows) + excel_path = results_dir / f"{model}_benchmark.xlsx" + df.to_excel(excel_path, index=False, engine='openpyxl') + + workbook = load_workbook(excel_path) + worksheet = workbook.active + for col in worksheet.columns: + worksheet.column_dimensions[col[0].column_letter].width = 10 + workbook.save(excel_path) + + print(f"[INFO] 已导出汇总结果到: {excel_path},共 {len(rows)} 行数据") + + +def main(): + """ + 主流程入口:读取配置 → 创建结果目录 → 执行所有用例 → 导出报告 + """ + config_file = "uc_test/config.yaml" + print(f"[INFO] 开始读取配置文件: {config_file}") + + try: + with open(config_file, 'r', encoding='utf-8') as f: + config = yaml.safe_load(f) + model = config.get("server_config", {}).get("model", "") + server_url = config.get("server_config", {}).get("server_url", "") + tokenizer_path = config.get("server_config", {}).get("tokenizer_path", "") + test_cases = config.get("test_cases", []) + except Exception as e: + print(f"[ERROR] 解析 YAML 失败: {e}") + sys.exit(1) + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + timestamp_dir = Path("result_outputs") / timestamp + timestamp_dir.mkdir(parents=True, exist_ok=True) + print(f"[INFO] 创建结果目录: {timestamp_dir}") + + failed_cases, case_hit_rate_map = run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path) + total = len(test_cases) + print(f"\n[INFO] 所有测试完成!成功: {total - len(failed_cases)}/{total}") + if failed_cases: + print(f"[WARN] 失败用例索引: {failed_cases}") + + collect_and_export_results(timestamp_dir, "qwen3", case_hit_rate_map) + + +if __name__ == "__main__": + # 初始化 ray + env_vars = dict(os.environ) + ray.init(runtime_env={"env_vars": env_vars}) + print("[INFO] Ray 初始化完成,开始主流程") + + main() From dc454e0f98b4d53107b0de396365959d2059da9a Mon Sep 17 00:00:00 2001 From: NaganooMei <104300720+NaganooMei@users.noreply.github.com> Date: Wed, 29 Oct 2025 15:25:45 +0800 Subject: [PATCH 2/5] [BugFix]fix mtp in ucm (#321) * fix mtp in ucm --- ucm/integration/vllm/uc_connector.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ucm/integration/vllm/uc_connector.py b/ucm/integration/vllm/uc_connector.py index ddba78d6..dac3d8a9 100644 --- a/ucm/integration/vllm/uc_connector.py +++ b/ucm/integration/vllm/uc_connector.py @@ -334,9 +334,9 @@ def wait_for_layer_load(self, layer_name: str) -> None: if self.layerwise_load_tasks: logger.debug(f"Waiting for layer {self.current_layer} to be loaded") - assert ( - self.current_layer < self.num_layers - ), "The current layer should be less than total layers!" + if self.current_layer >= self.num_layers: + return + for request_id, layer_to_task in self.layerwise_load_tasks.items(): if request_id in self._load_failed_reqs: continue @@ -384,6 +384,9 @@ def save_kv_layer( if not self.use_layerwise: return + if self.current_layer > self.num_layers: + return + metadata = self._get_connector_metadata() assert isinstance(metadata, UCConnectorV1Metadata) From 06442f04e13d02b89c7d57b8d5ed852c87cdb68b Mon Sep 17 00:00:00 2001 From: "Mag1c.H" Date: Wed, 29 Oct 2025 18:49:36 +0800 Subject: [PATCH 3/5] [bugfix] preserve DRAM buffer lifetime to restore inference accuracy (#322) * linear buffer for device * check data consistency after embedding --- ucm/store/device/ibuffered_device.h | 50 +++++++++++++++++++--------- ucm/store/test/e2e/nfsstore_embed.py | 36 ++++++++++++++++++++ 2 files changed, 71 insertions(+), 15 deletions(-) diff --git a/ucm/store/device/ibuffered_device.h b/ucm/store/device/ibuffered_device.h index 4c1ac2bb..a56ce67a 100644 --- a/ucm/store/device/ibuffered_device.h +++ b/ucm/store/device/ibuffered_device.h @@ -25,11 +25,37 @@ #define UNIFIEDCACHE_IBUFFERED_DEVICE_H #include "idevice.h" -#include "thread/index_pool.h" namespace UC { class IBufferedDevice : public IDevice { + class LinearBuffer { + std::shared_ptr addr_{nullptr}; + size_t index_{0}; + size_t number_{0}; + size_t size_{0}; + + public: + void Setup(std::shared_ptr addr, const size_t number, const size_t size) + { + this->addr_ = addr; + this->number_ = number; + this->size_ = size; + this->Reset(); + } + void Reset() noexcept { this->index_ = 0; } + bool Full() const noexcept { return this->index_ == this->number_; } + bool Available(const size_t size) const noexcept { return this->size_ >= size; } + std::shared_ptr Get() noexcept + { + auto addr = this->addr_.get(); + auto buffer = addr + this->size_ * this->index_; + ++this->index_; + return std::shared_ptr(buffer, [](auto) {}); + } + }; + LinearBuffer buffer_; + public: IBufferedDevice(const int32_t deviceId, const size_t bufferSize, const size_t bufferNumber) : IDevice{deviceId, bufferSize, bufferNumber} @@ -39,26 +65,20 @@ class IBufferedDevice : public IDevice { { auto totalSize = this->bufferSize * this->bufferNumber; if (totalSize == 0) { return Status::OK(); } - this->_addr = this->MakeBuffer(totalSize); - if (!this->_addr) { return Status::OutOfMemory(); } - this->_indexPool.Setup(this->bufferNumber); + auto addr = this->MakeBuffer(totalSize); + if (!addr) { return Status::OutOfMemory(); } + this->buffer_.Setup(addr, this->bufferNumber, this->bufferSize); return Status::OK(); } virtual std::shared_ptr GetBuffer(const size_t size) override { - if (!this->_addr || size > this->bufferSize) { return this->MakeBuffer(size); } - auto idx = this->_indexPool.Acquire(); - if (idx != IndexPool::npos) { - auto ptr = this->_addr.get() + this->bufferSize * idx; - return std::shared_ptr(ptr, - [this, idx](auto) { this->_indexPool.Release(idx); }); + if (this->buffer_.Full()) { + auto status = this->Synchronized(); + if (status.Failure()) { return nullptr; } + this->buffer_.Reset(); } - return this->MakeBuffer(size); + return this->buffer_.Available(size) ? this->buffer_.Get() : this->MakeBuffer(size); } - -private: - std::shared_ptr _addr{nullptr}; - IndexPool _indexPool; }; } // namespace UC diff --git a/ucm/store/test/e2e/nfsstore_embed.py b/ucm/store/test/e2e/nfsstore_embed.py index 8c76fcdb..0b6e2fc5 100644 --- a/ucm/store/test/e2e/nfsstore_embed.py +++ b/ucm/store/test/e2e/nfsstore_embed.py @@ -80,6 +80,39 @@ def embed(store: UcmKVStoreBase, hashes: List[str], tensors: List[List[torch.Ten store.commit(hashes, True) +def fetch(store: UcmKVStoreBase, hashes: List[str], tensors: List[List[torch.Tensor]]): + founds = store.lookup(hashes) + for found in founds: + assert found + block_ids = [] + offsets = [] + layers = [] + for hash_id, block in zip(hashes, tensors): + offset = 0 + for layer in block: + block_ids.append(hash_id) + offsets.append(offset) + layers.append(layer) + offset += layer.untyped_storage().size() + task = store.load(block_ids, offsets, layers) + assert task.task_id > 0 + ret = store.wait(task) + assert ret == 0 + + +def cmp_and_print_diff(a, b, rtol=0.0, atol=0.0): + for r, (row_a, row_b) in enumerate(zip(a, b)): + for c, (ta, tb) in enumerate(zip(row_a, row_b)): + if not torch.allclose(ta, tb, rtol=rtol, atol=atol): + mask = ~torch.isclose(ta, tb, rtol=rtol, atol=atol) + diff_a = ta[mask].cpu() + diff_b = tb[mask].cpu() + print(f"DIFF at [{r}][{c}] total {mask.sum().item()} element(s)") + print(" a val:", diff_a.flatten()) + print(" b val:", diff_b.flatten()) + assert False + + def store_all_hashes(hashes): kvcache_block_hashes_file = "kvcache_block_hashes.txt" current_directory = os.path.dirname(__file__) @@ -108,7 +141,10 @@ def main(): for batch in range(total_batches): start = batch_size * batch end = min(start + batch_size, block_number) + tensors2 = [[torch.empty_like(t) for t in row] for row in tensors] embed(store, hashes[start:end], tensors) + fetch(store, hashes[start:end], tensors2) + cmp_and_print_diff(tensors, tensors2) store_all_hashes(hashes) From 4b8b8deb14c7a78d7a33bbc8b45bac46d9ece713 Mon Sep 17 00:00:00 2001 From: paperTII <2293564561@qq.com> Date: Thu, 30 Oct 2025 10:23:01 +0800 Subject: [PATCH 4/5] New performance testing tools New performance testing tools New performance testing tools --- test/.gitignore | 9 + test/README.md | 219 ++++ test/README_zh.md | 227 +++++ test/common/__init__.py | 0 test/common/allure_utils.py | 196 ++++ test/common/config_utils.py | 80 ++ test/common/influxdb_utils.py | 58 ++ test/common/llmperf/__init__.py | 0 test/common/llmperf/run_inference.py | 169 ++++ test/common/llmperf/utils/__init__.py | 0 test/common/llmperf/utils/common_metrics.py | 17 + test/common/llmperf/utils/models.py | 22 + .../utils/openai_chat_completions_client.py | 122 +++ test/common/llmperf/utils/sonnet.txt | 84 ++ test/common/llmperf/utils/token_benchmark.py | 327 ++++++ test/common/llmperf/utils/utils.py | 168 ++++ test/config.yaml | 50 + test/config/uc_performance_config.yaml | 24 - test/conftest.py | 388 +++++++ test/pytest.ini | 26 + test/requirements.txt | 9 + test/suites/test_demo_function.py | 185 ++++ test/suites/test_uc_performance.py | 159 +++ test/test_uc_performance | 947 ------------------ 24 files changed, 2515 insertions(+), 971 deletions(-) create mode 100644 test/.gitignore create mode 100644 test/README.md create mode 100644 test/README_zh.md create mode 100644 test/common/__init__.py create mode 100644 test/common/allure_utils.py create mode 100644 test/common/config_utils.py create mode 100644 test/common/influxdb_utils.py create mode 100644 test/common/llmperf/__init__.py create mode 100644 test/common/llmperf/run_inference.py create mode 100644 test/common/llmperf/utils/__init__.py create mode 100644 test/common/llmperf/utils/common_metrics.py create mode 100644 test/common/llmperf/utils/models.py create mode 100644 test/common/llmperf/utils/openai_chat_completions_client.py create mode 100644 test/common/llmperf/utils/sonnet.txt create mode 100644 test/common/llmperf/utils/token_benchmark.py create mode 100644 test/common/llmperf/utils/utils.py create mode 100644 test/config.yaml delete mode 100644 test/config/uc_performance_config.yaml create mode 100644 test/conftest.py create mode 100644 test/pytest.ini create mode 100644 test/requirements.txt create mode 100644 test/suites/test_demo_function.py create mode 100644 test/suites/test_uc_performance.py delete mode 100644 test/test_uc_performance diff --git a/test/.gitignore b/test/.gitignore new file mode 100644 index 00000000..e6578117 --- /dev/null +++ b/test/.gitignore @@ -0,0 +1,9 @@ +reports/ +dataset/ +logs/ +$null +*__pycache__/ +.* +*.log +start.bat +!.gitignore \ No newline at end of file diff --git a/test/README.md b/test/README.md new file mode 100644 index 00000000..00aeb064 --- /dev/null +++ b/test/README.md @@ -0,0 +1,219 @@ +# UCM Pytest Testing Framework + +A unified cache management testing framework based on pytest, supporting multi-level testing, flexible marking, performance data collection, and beautiful Allure report generation. + +## Framework Features + +- [x] 🏗️ **Multi-level Testing**: UnitTest(0) → Smoke(1) → Feature(2) → E2E(3) +- [x] 🏷️ **Flexible Marking**: Support for feature tags, platform tags, and reliability tags +- [x] 📊 **Data Collection**: Integrated InfluxDB performance data pushing +- [x] 📋 **Beautiful Reports**: Allure test report integration, supporting both static HTML and dynamic server modes +- [x] 🔧 **Configuration Management**: Flexible YAML-based configuration system +- [x] 🚀 **Automation**: Support for parallel test execution and automatic cleanup + +## Test Level Definitions + +| Level | Name | Description | Execution Time | +|-----|------|------|----------| +| 0 | UnitTest | Unit Tests | Every code commit | +| 1 | Smoke | Smoke Tests | Build verification | +| 2 | Feature | Feature Tests | When features are completed | +| 3 | E2E | End-to-End Tests | Before version release | + +## Directory Structure + +``` +test/ +├── config.yaml # Test framework configuration file +├── conftest.py # pytest configuration and fixtures, main program entry +├── pytest.ini # pytest markers and basic configuration +├── requirements.txt # Dependency package list +├── common/ # Common utility library +│ ├── __init__.py +│ ├── config_utils.py # Configuration file reading tools +│ ├── influxdb_utils.py # InfluxDB writing tools +│ └── allure_utils.py # Allure reporting tools +├── suites/ # Test case directory +│ ├── UnitTest/ # Unit tests (stage 0) +│ ├── Smoke/ # Smoke tests (stage 1) +│ ├── Feature/ # Feature tests (stage 2) +│ ├── E2E/ # End-to-end tests (stage 3) +│ └── test_demo_function.py# Example test cases +├── reports/ # Test report directory +└── logs/ # Test log directory +``` + +## Quick Start + +### 1. Environment Setup +```bash +# Install dependencies +pip install -r requirements.txt + +# Ensure Allure CLI is installed (for report generation) +# Download from: https://github.com/allure-framework/allure2/releases +``` + +### 2. Configuration File +The main configuration file is `config.yaml`, containing the following configuration items: +- **reports**: Report generation configuration (HTML/Allure) +- **log**: Logging configuration +- **influxdb**: Performance data push configuration +- **llm_connection**: LLM connection configuration + +### 3. Running Tests +```bash +# Run all tests +pytest + +# Run specific level tests +pytest --stage=1 # Run smoke tests +pytest --stage=2+ # Run feature and end-to-end tests + +# Run specific tag tests +pytest --feature=performance # Run performance-related tests +pytest --platform=gpu # Run GPU platform tests +pytest --reliability=high # Run high reliability tests + +# Combined filtering +pytest --stage=1 --feature=performance,accuracy # Performance and accuracy tests in smoke tests +``` + +## Test Case Standards + +### Basic Structure +```python +import pytest +import allure +from common.config_utils import config_utils as config_instance + +class TestExample: + """Test example class""" + + @pytest.mark.stage(2) + @pytest.mark.feature("performance") + @pytest.mark.platform("gpu") + def test_gpu_performance(self): + """Test GPU performance""" + # Arrange + test_data = config_instance.get_config("test_data") + + # Act & Assert + with allure.step("Execute GPU computation"): + result = perform_gpu_calculation(test_data) + assert result.is_valid + + # Collect performance data + from common.influxdb_utils import push_to_influx + push_to_influx("gpu_compute_time", result.duration, { + "test_name": "test_gpu_performance", + "platform": "gpu" + }) +``` + +### Marking Usage Guidelines + +#### 1. Level Markers (Required) +```python +@pytest.mark.stage(0) # Unit tests +@pytest.mark.stage(1) # Smoke tests +@pytest.mark.stage(2) # Feature tests +@pytest.mark.stage(3) # End-to-end tests +``` + +#### 2. Feature Markers (Recommended) +```python +@pytest.mark.feature("performance") # Performance tests +@pytest.mark.feature("accuracy") # Accuracy tests +@pytest.mark.feature("memory") # Memory tests +``` + +#### 3. Platform Markers (Optional) +```python +@pytest.mark.platform("gpu") # GPU platform tests +@pytest.mark.platform("npu") # NPU platform tests +@pytest.mark.platform("cpu") # CPU platform tests +``` + +#### 4. Reliability Markers (Optional) +```python +@pytest.mark.reliability("high") # High reliability tests +@pytest.mark.reliability("medium") # Medium reliability tests +@pytest.mark.reliability("low") # Low reliability tests +``` + +## Allure Report Integration + +### 1. Basic Usage +```python +import allure + +@allure.feature('User Authentication') +@allure.story('Login Function') +def test_user_login(): + """Test user login functionality""" + with allure.step("Enter username and password"): + login_page.enter_credentials("user", "pass") + + with allure.step("Click login button"): + login_page.click_login() + + with allure.step("Verify successful login"): + assert dashboard_page.is_displayed() + + # Add attachment + allure.attach("Screenshot data", name="Login Screenshot", + attachment_type=allure.attachment_type.PNG) +``` + +### 2. Report Configuration +Configure Allure reports in `config.yaml`: +```yaml +reports: + allure: + enabled: true + html_enable: true + serve_mode: true # Use dynamic server mode + serve_host: "localhost" + serve_port: 8081 + directory: "allure-results" +``` + +### 3. Report Viewing +- **Static HTML Mode**: Automatically generates static HTML reports after test completion +- **Dynamic Server Mode**: Starts Allure server, providing interactive report interface + +## Performance Data Collection + +### InfluxDB Integration +```python +from common.influxdb_utils import push_to_influx + +# Collect performance data in tests +def test_performance_metrics(): + start_time = time.time() + + # Execute test logic + result = perform_operation() + + # Push performance data to InfluxDB + push_to_influx("operation_duration", time.time() - start_time, { + "test_name": "test_performance_metrics", + "operation_type": "calculation", + "success": str(result.success) + }) +``` + +## Extensions and Customization + +### Adding New Markers +1. Add new marker definitions in the `markers` section of `pytest.ini` +2. Keep the `markers =` and `# end of markers` lines unchanged +3. Re-run tests to use new markers + +### Custom Configuration +Customize through `config.yaml`: +- Report format and storage location +- Log level and output format +- InfluxDB connection parameters +- LLM service configuration diff --git a/test/README_zh.md b/test/README_zh.md new file mode 100644 index 00000000..56c68815 --- /dev/null +++ b/test/README_zh.md @@ -0,0 +1,227 @@ +# UCM Pytest 测试框架 + +基于pytest的统一缓存管理测试框架,支持多级别测试、灵活标记、性能数据收集和Allure精美报告生成。 + +## 框架特性 + +- [x] 🏗️ **多级别测试**: UnitTest(0) → Smoke(1) → Feature(2) → E2E(3) +- [x] 🏷️ **灵活标记**: 支持功能标签、平台标签和可靠性标签 +- [x] 📊 **数据收集**: 集成InfluxDB性能数据推送 +- [x] 📋 **精美报告**: Allure测试报告集成,支持静态HTML和动态服务模式 +- [x] 🔧 **配置管理**: 基于YAML的灵活配置系统 +- [x] 🚀 **自动化**: 支持并行测试执行和自动清理 + +## 测试级别定义 + +| 级别 | 名称 | 说明 | 执行时机 | +|-----|------|------|----------| +| 0 | UnitTest | 单元测试 | 每次代码提交 | +| 1 | Smoke | 冒烟测试 | 构建验证 | +| 2 | Feature | 功能测试 | 特性完成时 | +| 3 | E2E | 端到端测试 | 版本发布前 | + +## 目录结构 + +``` +test/ +├── config.yaml # 测试框架配置文件 +├── conftest.py # pytest配置和fixtures,程序主入口 +├── pytest.ini # pytest标记和基础配置 +├── requirements.txt # 依赖包列表 +├── common/ # 通用工具库 +│ ├── __init__.py +│ ├── config_utils.py # 配置文件读取工具 +│ ├── influxdb_utils.py # InfluxDB写入工具 +│ └── allure_utils.py # Allure报告工具 +├── suites/ # 测试用例目录 +│ ├── UnitTest/ # 单元测试 (stage 0) +│ ├── Smoke/ # 冒烟测试 (stage 1) +│ ├── Feature/ # 功能测试 (stage 2) +│ ├── E2E/ # 端到端测试 (stage 3) +│ └── test_demo_function.py# 示例测试用例 +├── reports/ # 测试报告目录 +└── logs/ # 日志目录 +``` + +## 快速开始 + +### 1. 环境准备 +```bash +# 安装依赖 +pip install -r requirements.txt + +# 确保Allure CLI已安装(用于生成报告) +# 下载地址: https://github.com/allure-framework/allure2/releases +``` + +### 2. 配置文件 +主要配置文件为 `config.yaml`,包含以下配置项: +- **reports**: 报告生成配置(HTML/Allure) +- **log**: 日志配置 +- **influxdb**: 性能数据推送配置 +- **llm_connection**: LLM连接配置 + +### 3. 运行测试 +```bash +# 运行所有测试 +pytest + +# 运行特定级别的测试 +pytest --stage=1 # 运行冒烟测试 +pytest --stage=2+ # 运行功能测试和端到端测试 + +# 运行特定标签的测试 +pytest --feature=performance # 运行性能相关测试 +pytest --platform=gpu # 运行GPU平台测试 +pytest --reliability=high # 运行高可靠性测试 + +# 组合过滤 +pytest --stage=1 --feature=performance,accuracy # 冒烟测试中的性能和准确性测试 +``` + +## 测试用例标准 + +### 基本结构 +```python +import pytest +import allure +from common.config_utils import config_utils as config_instance + +class TestExample: + """测试示例类""" + + @pytest.mark.stage(2) + @pytest.mark.feature("performance") + @pytest.mark.platform("gpu") + def test_gpu_performance(self): + """测试GPU性能""" + # Arrange + test_data = config_instance.get_config("test_data") + + # Act & Assert + with allure.step("执行GPU计算"): + result = perform_gpu_calculation(test_data) + assert result.is_valid + + # 收集性能数据 + from common.influxdb_utils import push_to_influx + push_to_influx("gpu_compute_time", result.duration, { + "test_name": "test_gpu_performance", + "platform": "gpu" + }) +``` + +### 标记使用规范 + +#### 1. 级别标记 (必需) +```python +@pytest.mark.stage(0) # 单元测试 +@pytest.mark.stage(1) # 冒烟测试 +@pytest.mark.stage(2) # 功能测试 +@pytest.mark.stage(3) # 端到端测试 +``` + +#### 2. 功能标记 (推荐) +```python +@pytest.mark.feature("performance") # 性能测试 +@pytest.mark.feature("accuracy") # 准确性测试 +@pytest.mark.feature("memory") # 内存测试 +``` + +#### 3. 平台标记 (可选) +```python +@pytest.mark.platform("gpu") # GPU平台测试 +@pytest.mark.platform("npu") # NPU平台测试 +@pytest.mark.platform("cpu") # CPU平台测试 +``` + +#### 4. 可靠性标记 (可选) +```python +@pytest.mark.reliability("high") # 高可靠性测试 +@pytest.mark.reliability("medium") # 中等可靠性测试 +@pytest.mark.reliability("low") # 低可靠性测试 +``` + +## Allure 报告集成 + +### 1. 基本用法 +```python +import allure + +@allure.feature('用户认证') +@allure.story('登录功能') +def test_user_login(): + """测试用户登录功能""" + with allure.step("输入用户名和密码"): + login_page.enter_credentials("user", "pass") + + with allure.step("点击登录按钮"): + login_page.click_login() + + with allure.step("验证登录成功"): + assert dashboard_page.is_displayed() + + # 添加附件 + allure.attach("Screenshot data", name="登录截图", + attachment_type=allure.attachment_type.PNG) +``` + +### 2. 报告配置 +在 `config.yaml` 中配置Allure报告: +```yaml +reports: + allure: + enabled: true + html_enable: true + serve_mode: true # 使用动态服务模式 + serve_host: "localhost" + serve_port: 8081 + directory: "allure-results" +``` + +### 3. 报告查看 +- **静态HTML模式**: 测试完成后自动生成静态HTML报告 +- **动态服务模式**: 启动Allure服务器,提供交互式报告界面 + +## 性能数据收集 + +### InfluxDB 集成 +```python +from common.influxdb_utils import push_to_influx + +# 在测试中收集性能数据 +def test_performance_metrics(): + start_time = time.time() + + # 执行测试逻辑 + result = perform_operation() + + # 推送性能数据到InfluxDB + push_to_influx("operation_duration", time.time() - start_time, { + "test_name": "test_performance_metrics", + "operation_type": "calculation", + "success": str(result.success) + }) +``` + +## 扩展和自定义 + +### 添加新标记 +1. 在 `pytest.ini` 的 `markers` 部分添加新标记定义 +2. 保持 `markers =` 和 `# end of markers` 两行不变 +3. 重新运行测试即可使用新标记 + +### 自定义配置 +通过修改 `config.yaml` 可以自定义: +- 报告格式和存储位置 +- 日志级别和输出格式 +- InfluxDB连接参数 +- LLM服务配置 + +## 最佳实践 + +1. **测试命名**: 使用描述性的测试方法名 +2. **标记使用**: 为每个测试添加适当的级别和功能标记 +3. **步骤分解**: 使用Allure步骤将复杂测试分解为可读的步骤 +4. **数据驱动**: 使用参数化测试减少重复代码 +5. **环境隔离**: 使用fixtures确保测试环境的一致性 diff --git a/test/common/__init__.py b/test/common/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/common/allure_utils.py b/test/common/allure_utils.py new file mode 100644 index 00000000..80bbd1d2 --- /dev/null +++ b/test/common/allure_utils.py @@ -0,0 +1,196 @@ +""" +Allure Report Utility +Provides convenient Allure reporting functionality and decorators +""" + +import allure +import os +import pytest +import subprocess +import shutil +import time +import platform +import sys +from pathlib import Path +from typing import Dict, Any, ContextManager, Optional, Union, List + + + + +def setup_allure(config: Dict[str, Any]) -> Optional[Path]: + """Configure Allure results directory and write environment.properties.""" + allure_cfg = config.get("allure", {}) + if not allure_cfg.get("enabled", False): + return None + + # 1. 沿用你原来的目录逻辑 + base_dir = Path(config.get("base_dir", "reports")) + if config.get("use_timestamp", False) and base_dir.exists(): + timestamp_dirs = [ + d for d in base_dir.iterdir() + if d.is_dir() and d.name.startswith(config.get("directory_prefix", "pytest")) + ] + if timestamp_dirs: + timestamp_dirs.sort(key=lambda x: x.stat().st_mtime, reverse=True) + base_dir = timestamp_dirs[0] + + allure_dir = base_dir / allure_cfg.get("directory", "allure-results") + allure_dir.mkdir(parents=True, exist_ok=True) + os.environ["ALLURE_REPORT_DIR"] = str(allure_dir) + + # 2. 新增:写入环境信息 + env_info = _get_system_info() # 采集系统信息 + custom_env = allure_cfg.get("environment", {}) # 允许用户再追加/覆盖 + env_info.update(custom_env) + _create_environment_properties(allure_dir, env_info) + + return allure_dir + + +def check_allure_available() -> bool: + """Check if Allure CLI is installed and working.""" + try: + allure_path = shutil.which("allure") + if not allure_path: + return False + result = subprocess.run( + [allure_path, "--version"], + capture_output=True, + text=True, + timeout=10, + shell=True + ) + return result.returncode == 0 + except Exception: + return False + + +def serve_allure_report( + allure_results_dir: Union[str, Path], + host: str = "localhost", + port: int = 8080, + auto_open: bool = True +) -> Optional[subprocess.Popen]: + """Start Allure server and optionally open browser.""" + if not check_allure_available(): + print("Allure CLI not found. Install from https://github.com/allure-framework/allure2/releases") + return None + + allure_results_dir = Path(allure_results_dir) + if not allure_results_dir.exists() or not any(allure_results_dir.iterdir()): + print(f"Allure results directory missing or empty: {allure_results_dir}") + return None + + allure_path = shutil.which("allure") + cmd = [allure_path, "serve", str(allure_results_dir), "--host", host] + if port > 0: + cmd.extend(["--port", str(port)]) + + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + universal_newlines=True + ) + print(f"Allure server starting at http://{host}:{port} (PID: {process.pid})") + print("Please press Ctrl+C to stop the server") + time.sleep(3) + + if process.poll() is not None: + print("Allure server failed to start") + return None + + try: + while process.poll() is None: + time.sleep(0.5) + except KeyboardInterrupt: + print("\nStopping Allure server...") + process.terminate() + try: + process.wait(timeout=5) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + return process + + +def generate_allure_html( + allure_results_dir: Union[str, Path], + html_output_dir: Optional[Union[str, Path]] = None, + clean: bool = False, + auto_serve: bool = False +) -> Optional[Union[Path, subprocess.Popen]]: + """Generate static HTML report or serve dynamically.""" + if not check_allure_available(): + print("Allure CLI not found. Install from https://github.com/allure-framework/allure2/releases") + return None + + allure_results_dir = Path(allure_results_dir) + if not allure_results_dir.exists() or not any(allure_results_dir.iterdir()): + print(f"Allure results directory missing or empty: {allure_results_dir}") + return None + + if auto_serve: + return serve_allure_report(allure_results_dir) + + html_output_dir = Path(html_output_dir or allure_results_dir.parent / "allure-report") + if clean and html_output_dir.exists(): + shutil.rmtree(html_output_dir) + html_output_dir.mkdir(parents=True, exist_ok=True) + + allure_path = shutil.which("allure") + cmd = f'{allure_path} generate "{allure_results_dir}" -o "{html_output_dir}" --clean' + result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60) + + if result.returncode == 0: + print(f"Allure HTML report generated: {html_output_dir}") + return html_output_dir + else: + print(f"HTML generation failed: {result.stderr}") + return None + + +def _create_environment_properties(allure_results_dir: Union[str, Path], + environment_info: Dict[str, str]) -> None: + allure_results_dir = Path(allure_results_dir) + allure_results_dir.mkdir(parents=True, exist_ok=True) + + env_file = allure_results_dir / "environment.properties" + + with open(env_file, 'w', encoding='utf-8') as f: + for key, value in environment_info.items(): + f.write(f"{key}={value}\n") + + print(f"Environment properties file created: {env_file}") + + +def _get_system_info() -> Dict[str, str]: + """Human-readable system information (English only).""" + info: Dict[str, str] = {} + + # ---------- OS ---------- + os_name = platform.system() + info["OS"] = os_name + + # ---------- Architecture ---------- + arch = platform.architecture()[0] # '64bit' / '32bit' + info["Architecture"] = "64-bit" if "64" in arch else "32-bit" + + # ---------- Python ---------- + # info["Python Implementation"] = platform.python_implementation() + info["Python"] = sys.version.split()[0].replace("Version=", "") + + # ---------- Hardware ---------- + machine = platform.machine() + info["Machine"] = "x86-64" if machine == "AMD64" else machine + proc = platform.processor() + if "Intel" in proc: + info["Processor"] = "Intel" + elif "AMD" in proc: + info["Processor"] = "AMD" + else: + info["Processor"] = proc.split()[0] if proc else "Kunpeng" + + return info \ No newline at end of file diff --git a/test/common/config_utils.py b/test/common/config_utils.py new file mode 100644 index 00000000..3cdc427b --- /dev/null +++ b/test/common/config_utils.py @@ -0,0 +1,80 @@ +import yaml +import os +import threading +from typing import Dict, Any + + +class ConfigUtils: + """ + Singleton Configuration Utility + Provides methods to read and access YAML configuration files. + """ + + _instance = None + _lock = threading.Lock() # Ensure thread-safe singleton creation + + def __new__(cls, config_file: str = None): + # Double-checked locking + if cls._instance is None: + with cls._lock: + if cls._instance is None: + instance = super().__new__(cls) + instance._init_config(config_file) + cls._instance = instance + return cls._instance + + def _init_config(self, config_file: str = None): + """Initialize configuration file path and load config""" + if config_file is None: + current_dir = os.path.dirname(os.path.abspath(__file__)) + config_file = os.path.join(current_dir, "..", "config.yaml") + + self.config_file = os.path.abspath(config_file) + self._config = None # Lazy load + + def _load_config(self) -> Dict[str, Any]: + """Internal method to read configuration from file""" + try: + with open(self.config_file, "r", encoding="utf-8") as f: + return yaml.safe_load(f) or {} + except FileNotFoundError: + print(f"[WARN] Config file not found: {self.config_file}") + return {} + except yaml.YAMLError as e: + print(f"[ERROR] Failed to parse YAML config: {e}") + return {} + + def read_config(self) -> Dict[str, Any]: + """Read configuration file (lazy load)""" + if self._config is None: + self._config = self._load_config() + return self._config + + def reload_config(self): + """Force reload configuration file""" + self._config = self._load_config() + + def get_config(self, key: str, default: Any = None) -> Any: + """Get top-level configuration item""" + config = self.read_config() + return config.get(key, default) + + def get_nested_config(self, key_path: str, default: Any = None) -> Any: + """Get nested configuration, e.g., 'influxdb.host'""" + config = self.read_config() + keys = key_path.split(".") + value = config + try: + for k in keys: + value = value[k] + return value + except (KeyError, TypeError): + return default + + +# Global instance +config_utils = ConfigUtils() + +if __name__ == "__main__": + print("InfluxDB config:", config_utils.get_config("influxdb")) + print("InfluxDB host:", config_utils.get_nested_config("influxdb.host", "localhost")) diff --git a/test/common/influxdb_utils.py b/test/common/influxdb_utils.py new file mode 100644 index 00000000..5d564061 --- /dev/null +++ b/test/common/influxdb_utils.py @@ -0,0 +1,58 @@ +""" +InfluxDB Data Push Utility +Provides convenient InfluxDB data writing functionality +""" + +from datetime import datetime +from typing import Dict, Any, Optional, Union +from influxdb_client import InfluxDBClient, Point, WritePrecision +from influxdb_client.client.write_api import SYNCHRONOUS +from config_utils import config_utils as config_instance + +class InfluxDBUtils: + """InfluxDB Utility Class""" + + def __init__(self): + """Initialize InfluxDB connection""" + self.config = config_instance.get_config("influxdb") + + +# Global InfluxDB utility instance +influxdb_utils = InfluxDBUtils() + + +def push_to_influx(measurement: str, + value: Union[int, float, str], + tags: Optional[Dict[str, str]] = None, + fields: Optional[Dict[str, Union[int, float, str]]] = None, + timestamp: Optional[datetime] = None) -> bool: + + return None + + +def push_test_metric(test_name: str, + metric_name: str, + value: Union[int, float], + additional_tags: Optional[Dict[str, str]] = None) -> bool: + print("Push to InfluxDB, To be implemented.") + + +if __name__ == "__main__": + # Simple data push + push_to_influx("response_time", 0.123) + + # Data push with tags + push_to_influx("accuracy", 0.95, { + "model": "v1.0", + "platform": "gpu", + "test_case": "classification" + }) + + # Test metric push + push_test_metric("test_calculation_accuracy", "calculation_time", 0.001, { + "feature": "accuracy" + }) + + # Data push with timestamp + from datetime import datetime + push_to_influx("memory_usage", 1024, {"test": "memory"}, timestamp=datetime.now()) \ No newline at end of file diff --git a/test/common/llmperf/__init__.py b/test/common/llmperf/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/common/llmperf/run_inference.py b/test/common/llmperf/run_inference.py new file mode 100644 index 00000000..801163de --- /dev/null +++ b/test/common/llmperf/run_inference.py @@ -0,0 +1,169 @@ +import json +import os +import random +from pathlib import Path +from typing import List, Dict, Any + +import yaml + +from common.llmperf.utils.token_benchmark import run_token_benchmark +from common.llmperf.utils.utils import reset_prefill_cache + + +def run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path): + """ + Execute all test cases and return the list of failed case indices and hit_rate mapping for each case. + Parameters: + test_cases — List of test cases read from the configuration file + timestamp_dir — Directory Path to save results + model — Model name + server_url — Base URL of the service + tokenizer_path— Path to the tokenizer + Returns: + failed_cases — List of failed case indices + case_hit_rate_map — Mapping of {case_idx: hit_rate} + """ + print(f"[INFO] Total {len(test_cases)} test cases to be executed") + failed_case = [] + + # Clear proxy environment variables + env = os.environ.copy() + env.pop('http_proxy', None) + env.pop('https_proxy', None) + + # Store hit_rate for each case_idx (to export to Excel later) + case_hit_rate_map = {} + + for i, case in enumerate(test_cases): + print(f"\n>>> Executing test case {i + 1} <<<") + reset_prefill_cache(env, server_url) + # Use a fixed random_seed for each test to control PC hit_rate + random_seed = random.randint(1, 100000) + + # Read parameters from configuration file + mean_input = case.get("mean_input_tokens", 5000) + stddev_input = case.get("stddev_input_tokens", 0) + mean_output = case.get("mean_output_tokens", 1000) + stddev_output = case.get("stddev_output_tokens", 0) + max_completed = case.get("max_num_completed_requests", 1) + concurrent = case.get("num_concurrent_requests", 1) + llm_api = case.get("llm_api", "openai") + additional_sampling_params = case.get("additional_sampling_params", "{}") + timeout = case.get("timeout", 60000) + hit_rate = case.get("hit_rate", 0) + + # Record hit_rate for this case + case_hit_rate_map[i] = hit_rate + try: + # Determine if two runs are needed (PC hit_rate test) + if hit_rate == 0: + run_token_benchmark( + llm_api=llm_api, + model=model, + test_timeout_s=timeout, + max_num_completed_requests=max_completed, + num_concurrent_requests=concurrent, + mean_input_tokens=mean_input, + stddev_input_tokens=stddev_input, + mean_output_tokens=mean_output, + stddev_output_tokens=stddev_output, + additional_sampling_params=additional_sampling_params, + results_dir=str(timestamp_dir), + random_seed=random_seed, + openai_api_base=server_url + "/v1", + tokenizer_path=tokenizer_path, + user_metadata={"case_idx": i} + ) + else: + print("[INFO] hit_rate > 0 detected, entering prefill mode") + # hit_rate > 0: first prefill mode + prefill_mean_input = int(mean_input * hit_rate / 100) + print(f"[INFO] Prefill execution: mean_input_tokens={prefill_mean_input}") + run_token_benchmark( + llm_api=llm_api, + model=model, + test_timeout_s=timeout, + max_num_completed_requests=max_completed, + num_concurrent_requests=concurrent, + mean_input_tokens=prefill_mean_input, + stddev_input_tokens=stddev_input, + mean_output_tokens=2, + stddev_output_tokens=stddev_output, + additional_sampling_params=additional_sampling_params, + results_dir=str(timestamp_dir), + random_seed=random_seed, + openai_api_base=server_url + "/v1", + tokenizer_path=tokenizer_path, + user_metadata={"case_idx": i, "phase": "prefill"} + ) + # Then run normal mode + print("[INFO] Prefill completed, switching to normal mode execution") + run_token_benchmark( + llm_api=llm_api, + model=model, + test_timeout_s=timeout, + max_num_completed_requests=max_completed, + num_concurrent_requests=concurrent, + mean_input_tokens=mean_input, + stddev_input_tokens=stddev_input, + mean_output_tokens=mean_output, + stddev_output_tokens=stddev_output, + additional_sampling_params=additional_sampling_params, + results_dir=str(timestamp_dir), + random_seed=random_seed, + openai_api_base=server_url + "/v1", + tokenizer_path=tokenizer_path, + user_metadata={"case_idx": i, "phase": "normal"} + ) + except Exception as e: + failed_case.append(i) + + return failed_case, case_hit_rate_map + +def getResult(performance_name: str): + results_dir = Path("result_outputs") + matched_values: List[Dict[str, Any]] = [] + for idx, fname in enumerate(os.listdir(results_dir)): + if not fname.lower().endswith(".json"): + continue + + file_path = os.path.join(results_dir, fname) + try: + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + except Exception as e: + print(f"[ERROR] Failed to read {file_path}: {e}") + continue + + # Iterate over each key in the dictionary + for key, value in data.items(): + if isinstance(key, str) and performance_name.lower() in key.lower(): + matched_values.append(value) + + print(f"[INFO] Found {len(matched_values)} matching values under {results_dir}, substring = '{performance_name}'") + return matched_values + +def inference_results(performance_name: str): + config_file = Path(__file__).parent.parent.parent / "config.yaml" + results_dir = Path("result_outputs") + if os.path.exists(results_dir) and len(os.listdir(results_dir)) != 0: + print("Test results already exist!!!!!!!!!!!!!!!") + else: + print("[INFO] Initialization complete, starting main process") + print(f"[INFO] Reading configuration file: {config_file}") + with open(config_file, 'r', encoding='utf-8') as f: + config = yaml.safe_load(f) + model = config.get("llm_connection", {}).get("model", "") + server_url = config.get("llm_connection", {}).get("server_url", "") + tokenizer_path = config.get("llm_connection", {}).get("tokenizer_path", "") + test_cases = config.get("llmperf_test_cases", []) + timestamp_dir = Path("result_outputs") + timestamp_dir.mkdir(parents=True, exist_ok=True) + print(f"[INFO] Created results directory: {timestamp_dir}") + + failed_cases, case_hit_rate_map = run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path) + total = len(test_cases) + print(f"\n[INFO] All tests completed! Success: {total - len(failed_cases)}/{total}") + if failed_cases: + print(f"[WARN] Failed case indices: {failed_cases}") + return getResult(performance_name) \ No newline at end of file diff --git a/test/common/llmperf/utils/__init__.py b/test/common/llmperf/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/common/llmperf/utils/common_metrics.py b/test/common/llmperf/utils/common_metrics.py new file mode 100644 index 00000000..3b05b437 --- /dev/null +++ b/test/common/llmperf/utils/common_metrics.py @@ -0,0 +1,17 @@ +# TODO (Avnishn): compute metrics in class +INTER_TOKEN_LAT = "inter_token_latency_s" +TTFT = "ttft_s" +E2E_LAT = "end_to_end_latency_s" +NUM_INPUT_TOKENS = "number_input_tokens" +NUM_OUTPUT_TOKENS = "number_output_tokens" +NUM_TOTAL_TOKENS = "number_total_tokens" +REQ_OUTPUT_THROUGHPUT = "request_output_throughput_token_per_s" +ERROR_MSG = "error_msg" +ERROR_CODE = "error_code" +ERROR_CODE_FREQ = "error_code_frequency" +NUM_ERRORS = "number_errors" +OUTPUT_THROUGHPUT = "mean_output_throughput_token_per_s" +NUM_COMPLETED_REQUESTS = "num_completed_requests" +COMPLETED_REQUESTS_PER_MIN = "num_completed_requests_per_min" +ERROR_RATE = "error_rate" +NUM_REQ_STARTED = "num_requests_started" \ No newline at end of file diff --git a/test/common/llmperf/utils/models.py b/test/common/llmperf/utils/models.py new file mode 100644 index 00000000..f70e8a7e --- /dev/null +++ b/test/common/llmperf/utils/models.py @@ -0,0 +1,22 @@ +from typing import Any, Dict, Optional, Tuple +from pydantic import BaseModel + + +class RequestConfig(BaseModel): + """The configuration for a request to the LLM API. + + Args: + model: The model to use. + prompt: The prompt to provide to the LLM API. + sampling_params: Additional sampling parameters to send with the request. + For more information see the Router app's documentation for the completions + llm_api: The name of the LLM API to send the request to. + metadata: Additional metadata to attach to the request for logging or validation purposes. + """ + + model: str + prompt: Tuple[str, int] + sampling_params: Optional[Dict[str, Any]] = None + llm_api: Optional[str] = None + metadata: Optional[Dict[str, Any]] = None + openai_api_base: Optional[str] = "" \ No newline at end of file diff --git a/test/common/llmperf/utils/openai_chat_completions_client.py b/test/common/llmperf/utils/openai_chat_completions_client.py new file mode 100644 index 00000000..b24320d0 --- /dev/null +++ b/test/common/llmperf/utils/openai_chat_completions_client.py @@ -0,0 +1,122 @@ +import json +import os +import time +from typing import Any, Dict, Tuple + +import requests + +from common.llmperf.utils.models import RequestConfig + +from common.llmperf.utils import common_metrics + + +class OpenAIChatCompletionsClient(): + """ + used for sending HTTP requests, receiving token streams, measuring latency, etc. + """ + def llm_request(self, request_config: RequestConfig) -> Tuple[Dict[str, Any], str, RequestConfig]: + prompt, prompt_len = request_config.prompt + + message = [ + {"role": "system", "content": ""}, + {"role": "user", "content": prompt}, + ] + model = request_config.model + body = { + "model": model, + "messages": message, + "stream": True, + "ignore_eos": True, + } + sampling_params = request_config.sampling_params + body.update(sampling_params or {}) + + time_to_next_token = [] + tokens_received = 0 + ttft = 0.0 + error_response_code = None + generated_text = "" + error_msg = "" + output_throughput = 0.0 + total_request_time = 0.0 + flag = False + + metrics: Dict[str, Any] = {} + + metrics[common_metrics.ERROR_CODE] = None + metrics[common_metrics.ERROR_MSG] = "" + + start_time = time.monotonic() + most_recent_received_token_time = start_time + + address = request_config.openai_api_base + + if not address: + raise ValueError("the environment variable OPENAI_API_BASE must be set.") + key = os.environ.get("OPENAI_API_KEY", "secret_abcdefg") + if not key: + raise ValueError("the environment variable OPENAI_API_KEY must be set.") + headers = {"Authorization": f"Bearer {key}"} + if not address.endswith("/"): + address = address + "/" + address += "chat/completions" + try: + with requests.post( + address, + json=body, + stream=True, + timeout=180, + headers=headers, + ) as response: + if response.status_code != 200: + error_msg = response.text + error_response_code = response.status_code + response.raise_for_status() + + for chunk in response.iter_lines(chunk_size=None): + if not chunk: + continue + stem = b"data: " + if chunk.startswith(stem): + chunk = chunk[len(stem):] + # Data might already be bytes or str + if isinstance(chunk, bytes): + chunk = chunk.decode("utf-8", errors="ignore") + if chunk.strip() == "[DONE]": + continue + tokens_received += 1 + data = json.loads(chunk) + if "error" in data: + error_msg = data["error"]["message"] + error_response_code = data["error"]["code"] + raise RuntimeError(error_msg) + delta = data["choices"][0]["delta"] + content = delta.get("content", None) or delta.get("reasoning_content", "") + if content: + if tokens_received != 0 and flag == False: + ttft = time.monotonic() - start_time + flag = True + else: + time_to_next_token.append(time.monotonic() - most_recent_received_token_time) + most_recent_received_token_time = time.monotonic() + generated_text += content + + total_request_time = time.monotonic() - start_time + if total_request_time > 0: + output_throughput = tokens_received / total_request_time + + except Exception as e: + metrics[common_metrics.ERROR_MSG] = error_msg + metrics[common_metrics.ERROR_CODE] = error_response_code + print(f"Warning Or Error: {e}") + print(error_response_code) + + metrics[common_metrics.INTER_TOKEN_LAT] = sum(time_to_next_token) + metrics[common_metrics.TTFT] = ttft + metrics[common_metrics.E2E_LAT] = total_request_time + metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = output_throughput + metrics[common_metrics.NUM_TOTAL_TOKENS] = tokens_received + prompt_len + metrics[common_metrics.NUM_OUTPUT_TOKENS] = tokens_received + metrics[common_metrics.NUM_INPUT_TOKENS] = prompt_len + + return metrics, generated_text, request_config \ No newline at end of file diff --git a/test/common/llmperf/utils/sonnet.txt b/test/common/llmperf/utils/sonnet.txt new file mode 100644 index 00000000..9f13ead4 --- /dev/null +++ b/test/common/llmperf/utils/sonnet.txt @@ -0,0 +1,84 @@ +Shall I compare thee to a summer's day? +Thou art more lovely and more temperate: +Rough winds do shake the darling buds of May, +And summer's lease hath all too short a date: +Sometime too hot the eye of heaven shines, +And often is his gold complexion dimm'd; +And every fair from fair sometime declines, +By chance or nature's changing course untrimm'd; +But thy eternal summer shall not fade +Nor lose possession of that fair thou owest; +Nor shall Death brag thou wander'st in his shade, +When in eternal lines to time thou growest: +So long as men can breathe or eyes can see, +So long lives this and this gives life to thee. +Then let not winter's ragged hand deface +In thee thy summer, ere thou be distill'd: +Make sweet some vial; treasure thou some place +With beauty's treasure, ere it be self-kill'd. +That use is not forbidden usury, +Which happies those that pay the willing loan; +That's for thyself to breed another thee, +Or ten times happier, be it ten for one; +Ten times thyself were happier than thou art, +If ten of thine ten times refigured thee: +Then what could death do, if thou shouldst depart, +Leaving thee living in posterity? +Be not self-will'd, for thou art much too fair +To be death's conquest and make worms thine heir. +Where art thou, Muse, that thou forget'st so long +To speak of that which gives thee all thy might? +Spend'st thou thy fury on some worthless song, +Darkening thy power to lend base subjects light? +Return, forgetful Muse, and straight redeem +In gentle numbers time so idly spent; +Sing to the ear that doth thy lays esteem +And gives thy pen both skill and argument. +Rise, resty Muse, my love's sweet face survey, +If Time have any wrinkle graven there; +If any, be a satire to decay, +And make Time's spoils despised every where. +Give my love fame faster than Time wastes life; +So thou prevent'st his scythe and crooked knife. +My glass shall not persuade me I am old, +So long as youth and thou are of one date; +But when in thee time's furrows I behold, +Then look I death my days should expiate. +For all that beauty that doth cover thee +Is but the seemly raiment of my heart, +Which in thy breast doth live, as thine in me: +How can I then be elder than thou art? +O, therefore, love, be of thyself so wary +As I, not for myself, but for thee will; +Bearing thy heart, which I will keep so chary +As tender nurse her babe from faring ill. +Presume not on thy heart when mine is slain; +Thou gavest me thine, not to give back again. +So am I as the rich, whose blessed key +Can bring him to his sweet up-locked treasure, +The which he will not every hour survey, +For blunting the fine point of seldom pleasure. +Therefore are feasts so solemn and so rare, +Since, seldom coming, in the long year set, +Like stones of worth they thinly placed are, +Or captain jewels in the carcanet. +So is the time that keeps you as my chest, +Or as the wardrobe which the robe doth hide, +To make some special instant special blest, +By new unfolding his imprison'd pride. +Blessed are you, whose worthiness gives scope, +Being had, to triumph, being lack'd, to hope. +If there be nothing new, but that which is +Hath been before, how are our brains beguiled, +Which, labouring for invention, bear amiss +The second burden of a former child! +O, that record could with a backward look, +Even of five hundred courses of the sun, +Show me your image in some antique book, +Since mind at first in character was done! +That I might see what the old world could say +To this composed wonder of your frame; +Whether we are mended, or whether better they, +Or whether revolution be the same. +O, sure I am, the wits of former days +To subjects worse have given admiring praise. \ No newline at end of file diff --git a/test/common/llmperf/utils/token_benchmark.py b/test/common/llmperf/utils/token_benchmark.py new file mode 100644 index 00000000..5f514267 --- /dev/null +++ b/test/common/llmperf/utils/token_benchmark.py @@ -0,0 +1,327 @@ +import logging +from collections.abc import Iterable +import json +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +import re +import time +import random +from typing import Any, Dict, List, Optional, Tuple + +import pandas as pd + + +from transformers import AutoTokenizer + +from common.llmperf.utils import common_metrics +from common.llmperf.utils.models import RequestConfig +from common.llmperf.utils.openai_chat_completions_client import OpenAIChatCompletionsClient +from common.llmperf.utils.utils import ( + randomly_sample_sonnet_lines_prompt, + LLMPerfResults, + sample_random_positive_int, ) + + +def get_token_throughput_latencies( + model: str, + mean_input_tokens: int, + stddev_input_tokens: int, + mean_output_tokens: int, + stddev_output_tokens: int, + additional_sampling_params: Optional[Dict[str, Any]] = None, + num_concurrent_requests: int = 1, + max_num_completed_requests: int = 500, + test_timeout_s=90, + llm_api="openai", + random_seed: int = None, + openai_api_base: str = "", + tokenizer_path: str = None, +) -> Tuple[Dict[str, Any], List[Dict[str, Any]], float, float]: + """Get the token throughput and latencies for the given model. + + Args: + model: The name of the model to query. + mean_input_tokens: The mean number of tokens to send in the prompt for the request. + stddev_input_tokens: The standard deviation of the number of tokens to send in the prompt for the request. + mean_output_tokens: The mean number of tokens to generate per request. + stddev_output_tokens: The standard deviation of the number of tokens to generate per request. + additional_sampling_params: Additional sampling parameters to send with the request. + For more information see the LLM APIs documentation for the completions + num_concurrent_requests: The number of concurrent requests to make. Increase + this to increase the amount of load and vice versa. + test_timeout_s: The amount of time to run the test for before reporting results. + llm_api: The name of the llm api to use. Either "openai" or "litellm". + + Returns: + A summary of the performance metrics collected across all completed requests + (e.g. throughput, latencies, etc.) + The individual metrics for each request. + """ + random.seed(random_seed) + + print(f"Using tokenizer:{tokenizer_path}") + tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) + get_token_length = lambda text: len(tokenizer.encode(text)) + + if not additional_sampling_params: + additional_sampling_params = {} + + # 1. create prompts + prompts: List[Tuple[str, int]] = [] + num_output_tokens_list: List[int] = [] + for i in range(max_num_completed_requests): + num_output = sample_random_positive_int(mean_output_tokens, stddev_output_tokens) + num_output_tokens_list.append(num_output) + prompts.append(randomly_sample_sonnet_lines_prompt( + prompt_tokens_mean=mean_input_tokens, + prompt_tokens_stddev=stddev_input_tokens, + tokenizer=tokenizer + )) + start_time = time.monotonic() + completed_requests: List[Dict[str, Any]] = [] + incremental_time_delay = 0.0 + client = OpenAIChatCompletionsClient() + futures = [] + + # 2. Submitting tasks using a thread pool + with ThreadPoolExecutor(max_workers=num_concurrent_requests) as executor: + for idx in range(max_num_completed_requests): + sampling = {"max_tokens": num_output_tokens_list[idx]} + sampling.update(additional_sampling_params) + cfg = RequestConfig( + model=model, + prompt=prompts[idx], + sampling_params=sampling, + llm_api=llm_api, + openai_api_base=openai_api_base + ) + futures.append(executor.submit(client.llm_request, cfg)) + # 3. Waiting for completion or timeout + for future in as_completed(futures, timeout=test_timeout_s): + try: + metrics, gen_text, req_cfg = future.result() + except Exception as e: + logging.warning(f"[WARN] Future raised exception: {e}") + continue + num_output_tokens = get_token_length(gen_text) + if num_output_tokens: + metrics[common_metrics.INTER_TOKEN_LAT] /= (metrics[common_metrics.NUM_OUTPUT_TOKENS] - 1) if ( + metrics[common_metrics.NUM_OUTPUT_TOKENS] - 1) else 1 + metrics[common_metrics.NUM_OUTPUT_TOKENS] = num_output_tokens + metrics[common_metrics.NUM_TOTAL_TOKENS] = metrics[ + common_metrics.NUM_INPUT_TOKENS] + num_output_tokens + try: + metrics[common_metrics.REQ_OUTPUT_THROUGHPUT] = num_output_tokens / metrics[ + common_metrics.E2E_LAT] + except ZeroDivisionError: + logging.error("Division by zero in throughput calculation.") + + completed_requests.append(metrics) + + incremental_time_delay += metrics.get(common_metrics.INTER_TOKEN_LAT, 0.0) + + end_time = time.monotonic() + + print(f"Results for token benchmark for {model} queried with the {llm_api} api.\n") + if mean_output_tokens == 2: + print(f"[INFO] First token sending pre-embedding completed\n") + return {}, [], 0.0, 0.0 + + ret = metrics_summary(completed_requests, start_time, end_time) + + metadata = { + "model": model, + "mean_input_tokens": mean_input_tokens, + "stddev_input_tokens": stddev_input_tokens, + "mean_output_tokens": mean_output_tokens, + "stddev_output_tokens": stddev_output_tokens, + "num_concurrent_requests": num_concurrent_requests, + "additional_sampling_params": additional_sampling_params, + } + + metadata["results"] = ret + elapsed_time = end_time - start_time + return metadata, completed_requests, elapsed_time, incremental_time_delay + + +def metrics_summary( + metrics: List[Dict[str, Any]], start_time: int, end_time: int +) -> Dict[str, Any]: + """Generate a summary over metrics generated from potentially multiple instances of this client. + + Args: + metrics: The metrics to summarize. + start_time: The time the test started. + end_time: The time the test ended. + + Returns: + A summary with the following information: + - Overall throughput (generated tokens / total test time) + - Number of completed requests + - Error rate + - Error code frequency + - Quantiles (p25-p99) for the following metrics: + - Inter token latency + - Time to first token + - User total request time + - Number of tokens processed per request + - Number of tokens generated per request + - User throughput (tokens / s) + """ + ret = {} + + def flatten(item): + for sub_item in item: + if isinstance(sub_item, Iterable) and not isinstance(sub_item, str): + yield from flatten(sub_item) + else: + yield sub_item + + df = pd.DataFrame(metrics) + df_without_errored_req = df[df[common_metrics.ERROR_CODE].isna()] + + for key in [ + common_metrics.INTER_TOKEN_LAT, + common_metrics.TTFT, + common_metrics.E2E_LAT, + common_metrics.REQ_OUTPUT_THROUGHPUT, + common_metrics.NUM_INPUT_TOKENS, + common_metrics.NUM_OUTPUT_TOKENS + ]: + print(key) + ret[key] = {} + series = pd.Series(list(flatten(df_without_errored_req[key]))).dropna() + quantiles = series.quantile([0.25, 0.5, 0.75, 0.9, 0.95, 0.99]).to_dict() + quantiles_reformatted_keys = {} + for quantile, value in quantiles.items(): + reformatted_key = f"p{int(quantile * 100)}" + print(f" {reformatted_key} = {value}") + quantiles_reformatted_keys[reformatted_key] = value + ret[key]["quantiles"] = quantiles_reformatted_keys + mean = series.mean() + print(f" mean = {mean}") + ret[key]["mean"] = mean + print(f" min = {series.min()}") + ret[key]["min"] = series.min() + print(f" max = {series.max()}") + ret[key]["max"] = series.max() + print(f" stddev = {series.std()}") + ret[key]["stddev"] = series.std() + + ret[common_metrics.NUM_REQ_STARTED] = len(metrics) + + error_codes = df[common_metrics.ERROR_CODE].dropna() + num_errors = len(error_codes) + ret[common_metrics.ERROR_RATE] = num_errors / len(metrics) if len(metrics) else 0 + ret[common_metrics.NUM_ERRORS] = num_errors + print(f"Number Of Errored Requests: {num_errors}") + error_code_frequency = dict(error_codes.value_counts()) + if num_errors: + error_code_frequency = dict(error_codes.value_counts()) + print("Error Code Frequency") + print(error_code_frequency) + ret[common_metrics.ERROR_CODE_FREQ] = str(error_code_frequency) + + overall_output_throughput = df_without_errored_req[ + common_metrics.NUM_OUTPUT_TOKENS + ].sum() / (end_time - start_time) + + print(f"Overall Output Throughput: {overall_output_throughput}") + ret[common_metrics.OUTPUT_THROUGHPUT] = overall_output_throughput + + num_completed_requests = len(df_without_errored_req) + num_completed_requests_per_min = ( + num_completed_requests / (end_time - start_time) * 60 + ) + print(f"Number Of Completed Requests: {num_completed_requests}") + print(f"Completed Requests Per Minute: {num_completed_requests_per_min}") + + ret[common_metrics.NUM_COMPLETED_REQUESTS] = num_completed_requests + ret[common_metrics.COMPLETED_REQUESTS_PER_MIN] = num_completed_requests_per_min + + return ret + + +def run_token_benchmark( + llm_api: str, + model: str, + test_timeout_s: int, + max_num_completed_requests: int, + num_concurrent_requests: int, + mean_input_tokens: int, + stddev_input_tokens: int, + mean_output_tokens: int, + stddev_output_tokens: int, + additional_sampling_params: str, + results_dir: str, + random_seed: int, + openai_api_base: str, + tokenizer_path: str, + user_metadata: Dict[str, Any], +): + """ + Args: + llm_api: The name of the llm api to use. + model: The name of the model to query. + max_num_completed_requests: The number of requests to complete before finishing the test. + test_timeout_s: The amount of time to run the test for before reporting results. + num_concurrent_requests: The number of concurrent requests to make. Increase + this to increase the amount of load and vice versa. + mean_input_tokens: The mean number of tokens to send in the prompt for the request. + stddev_input_tokens: The standard deviation of the number of tokens to send in the prompt for the request. + mean_output_tokens: The mean number of tokens to generate per request. + stddev_output_tokens: The standard deviation of the number of tokens to generate per request. + additional_sampling_params: Additional sampling parameters to send with the request. + For more information see the LLM APIs documentation for the completions. + results_dir: The directory to save the results to. + user_metadata: Additional metadata to include in the results. + """ + if mean_input_tokens < 40: + print( + "the minimum number of input tokens that will be sent is 41" + " because of the prompting logic right now" + ) + + summary, individual_responses, elapsed_time, incremental_time_delay = get_token_throughput_latencies( + model=model, + llm_api=llm_api, + test_timeout_s=test_timeout_s, + max_num_completed_requests=max_num_completed_requests, + mean_input_tokens=mean_input_tokens, + stddev_input_tokens=stddev_input_tokens, + mean_output_tokens=mean_output_tokens, + stddev_output_tokens=stddev_output_tokens, + num_concurrent_requests=num_concurrent_requests, + additional_sampling_params=json.loads(additional_sampling_params), + random_seed=random_seed, + openai_api_base=openai_api_base, + tokenizer_path=tokenizer_path, + ) + if mean_output_tokens == 2: + return summary, individual_responses, elapsed_time, incremental_time_delay + + timestamp = int(time.time() * 1000) + if results_dir: + filename = f"{model}_{mean_input_tokens}_{mean_output_tokens}_{timestamp}" + filename = re.sub(r"[^\w\d-]+", "-", filename) + filename = re.sub(r"-{2,}", "-", filename) + summary_filename = f"{filename}_summary" + + # Update to metadata. + summary.update(user_metadata) + summary["elapsed_time"] = elapsed_time + summary["incremental_time_delay"] = incremental_time_delay + + results = LLMPerfResults(name=summary_filename, metadata=summary) + results_dir = Path(results_dir) + if not results_dir.exists(): + results_dir.mkdir(parents=True) + elif not results_dir.is_dir(): + raise ValueError(f"{results_dir} is not a directory") + + try: + with open(results_dir / f"{summary_filename}.json", "w") as f: + json.dump(results.to_dict(), f, indent=4, default=str) + except Exception as e: + print(results.to_dict()) + raise e \ No newline at end of file diff --git a/test/common/llmperf/utils/utils.py b/test/common/llmperf/utils/utils.py new file mode 100644 index 00000000..e68078b4 --- /dev/null +++ b/test/common/llmperf/utils/utils.py @@ -0,0 +1,168 @@ +import json +import math +import os +import hashlib +import pathlib +import random +import subprocess +import time +from typing import Any, Dict, Tuple + +from transformers import LlamaTokenizerFast + + +RESULTS_VERSION = "2025-10-30" + + +class LLMPerfResults: + def __init__( + self, + name: str, + metadata: Dict[str, Any] = None, + ): + self.name = name + self.metadata = metadata or {} + self.timestamp = int(time.time()) + self.metadata["timestamp"] = self.timestamp + self.version = RESULTS_VERSION + + def to_dict(self): + data = { + "version": self.version, + "name": self.name, + } + data.update(self.metadata) + data = flatten_dict(data) + return data + + def json(self): + data = self.to_dict() + return json.dumps(data) + + +def upload_to_s3(results_path: str, s3_path: str) -> None: + """Upload the results to s3. + + Args: + results_path: The path to the results file. + s3_path: The s3 path to upload the results to. + + """ + + command = ["aws", "s3", "sync", results_path, f"{s3_path}/"] + result = subprocess.run(command) + if result.returncode == 0: + print("Files uploaded successfully!") + else: + print("An error occurred:") + print(result.stderr) + +def randomly_sample_sonnet_lines_prompt( + prompt_tokens_mean: int = 550, + prompt_tokens_stddev: int = 250, + tokenizer: LlamaTokenizerFast = None, +) -> Tuple[str, int]: + """Generate a prompt that randomly samples lines from a the shakespeare sonnet at sonnet.txt. + + Args: + prompt_length_mean: The mean length of the prompt to generate. + prompt_len_stddev: The standard deviation of the length of the prompt to generate. + expect_output_tokens: The number of tokens to expect in the output. This is used to + determine the length of the prompt. The prompt will be generated such that the output + will be approximately this many tokens. + + Note: + tokens will be counted from the sonnet using the Llama tokenizer. Using one tokenizer + ensures a fairer comparison across different LLMs. For example, if gpt 3.5 tokenizes + a prompt in less tokens than Llama2, then this will be reflected in the results since + they will be fed identical prompts. + + Returns: + A tuple of the prompt and the length of the prompt. + """ + get_token_length = lambda text: len(tokenizer.encode(text)) + + prompt = ( + "Randomly stream lines from the following text " + "Don't generate eos tokens:\n\n" + ) + # get a prompt length that is at least as long as the base + num_prompt_tokens = sample_random_positive_int( + prompt_tokens_mean, prompt_tokens_stddev + ) + while num_prompt_tokens < get_token_length(prompt): + num_prompt_tokens = sample_random_positive_int( + prompt_tokens_mean, prompt_tokens_stddev + ) + remaining_prompt_tokens = num_prompt_tokens - get_token_length(prompt) + sonnet_path = pathlib.Path(__file__).parent.resolve() / "sonnet.txt" + with open(sonnet_path, "r") as f: + sonnet_lines = f.readlines() + random.shuffle(sonnet_lines) + sampling_lines = True + while sampling_lines: + for line in sonnet_lines: + line_to_add = line + if remaining_prompt_tokens - get_token_length(line_to_add) < 0: + # This will cut off a line in the middle of a word, but that's ok since an + # llm should be able to handle that. + line_to_add = line_to_add[: int(math.ceil(remaining_prompt_tokens))] + sampling_lines = False + prompt += line_to_add + break + prompt += line_to_add + remaining_prompt_tokens -= get_token_length(line_to_add) + print(hashlib.sha256(prompt.encode("utf-8")).hexdigest()) + return (prompt, num_prompt_tokens) + + +def sample_random_positive_int(mean: int, stddev: int) -> int: + """Sample random numbers from a gaussian distribution until a positive number is sampled. + + Args: + mean: The mean of the gaussian distribution to sample from. + stddev: The standard deviation of the gaussian distribution to sample from. + + Returns: + A random positive integer sampled from the gaussian distribution. + """ + ret = -1 + while ret <= 0: + ret = int(random.gauss(mean, stddev)) + return ret + + +def flatten_dict(d, parent_key="", sep="_"): + items = [] + for k, v in d.items(): + new_key = f"{parent_key}{sep}{k}" if parent_key else k + if isinstance(v, dict): + items.extend(flatten_dict(v, new_key, sep=sep).items()) + else: + items.append((new_key, v)) + return dict(items) + +def reset_prefill_cache(env, server_url): + """ + prefix cache / HBM + Param: + env + server_url + """ + reset_url = f"{server_url}/reset_prefix_cache" + print(f"[INFO] Resetting prefix cache: {reset_url}") + try: + result = subprocess.run( + ["curl", "-X", "POST", reset_url, "-s", "-f"], + env=env, + check=False, + capture_output=True, + text=True, + timeout=10 + ) + if result.returncode == 0: + print("[INFO] Prefix cache successfully reset") + else: + print(f"[ERROR] Unsuccessfully reset prefix cache,error code: {result.returncode}") + except Exception as e: + print(f"[ERROR] Exception in resetting prefix cache: {e}") \ No newline at end of file diff --git a/test/config.yaml b/test/config.yaml new file mode 100644 index 00000000..df1bb6a7 --- /dev/null +++ b/test/config.yaml @@ -0,0 +1,50 @@ +reports: + base_dir: "reports" + use_timestamp: true + directory_prefix: "pytest" + html: # pytest-html + enabled: false + filename: "report.html" + title: "UCM Pytest Test Report" + allure: + enabled: true + html_enable: true + serve_mode: true # 使用allure serve mode + serve_host: "localhost" + serve_port: 8081 + directory: "allure-results" + +log: + enabled: true + path: "logs" + filename: "pytest.log" + use_timestamp: false + +# InfluxDB Configuration +influxdb: + host: localhost + port: 8086 + token: your-influxdb-token-here + org: your-organization + bucket: test-metrics + timeout: 10 + +# LLM Connection Configuration +llm_connection: + model: "qwen3" + server_url: "http://141.111.32.70:9382" + tokenizer_path: "/home/models/QwQ-32B" +# Performance Test Configuration +llmperf_test_cases: + - mean_input_tokens: 600 + mean_output_tokens: 300 + max_num_completed_requests: 1 + num_concurrent_requests: 1 + additional_sampling_params: "{}" + hit_rate: 0 + - mean_input_tokens: 600 + mean_output_tokens: 200 + max_num_completed_requests: 3 + num_concurrent_requests: 1 + additional_sampling_params: "{}" + hit_rate: 0 diff --git a/test/config/uc_performance_config.yaml b/test/config/uc_performance_config.yaml deleted file mode 100644 index f1c4c5f1..00000000 --- a/test/config/uc_performance_config.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# 测试用例列表 -server_config: - model: "qwen3" - server_url: "http://141.111.32.70:9382" - tokenizer_path: "/home/models/QwQ-32B" - -test_cases: - - mean_input_tokens: 600 - stddev_input_tokens: 0 - mean_output_tokens: 300 - stddev_output_tokens: 0 - max_num_completed_requests: 1 - num_concurrent_requests: 1 - additional_sampling_params: "{}" - hit_rate: 0 - - - mean_input_tokens: 600 - stddev_input_tokens: 0 - mean_output_tokens: 300 - stddev_output_tokens: 0 - max_num_completed_requests: 1 - num_concurrent_requests: 1 - additional_sampling_params: "{}" - hit_rate: 0 diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 00000000..65ace924 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,388 @@ +from __future__ import annotations +import logging +from math import log +import shutil +import sys +import re +import pytest +import tempfile +import datetime as dt +import platform as pf +from pathlib import Path +from typing import Dict, Any, List +from common.config_utils import config_utils as config_instance +from common.allure_utils import setup_allure, generate_allure_html, serve_allure_report + + +# ---------------- Constants ---------------- +PRJ_ROOT = Path(__file__).resolve().parent +REPORT_DIR = PRJ_ROOT / "reports" +sys.path.insert(0, str(PRJ_ROOT)) + +# Global variables for Allure configuration +ALLURE_DIR = None +ALLURE_CONFIG = None + + +# ---------------- Logging ---------------- +# TODO:Unified log +def _init_logger() -> logging.Logger: + """Initialize and configure test logger.""" + log_config = config_instance.get_config("log", {}) + if not log_config.get("enabled", True): + return logging.getLogger("UCM_TEST") + + log = logging.getLogger("UCM_TEST") + log.setLevel(logging.DEBUG) + log.handlers.clear() + + log_path = Path(log_config.get("path", "logs")) + log_path.mkdir(parents=True, exist_ok=True) + + filename = config_instance.get_nested_config("log.filename", "pytest.log") + use_timestamp = config_instance.get_nested_config("log.use_timestamp", True) + if use_timestamp: + ts = dt.datetime.now().strftime("%Y%m%d-%H%M%S") + stem, ext = Path(filename).stem, Path(filename).suffix + filename = f"{stem}_{ts}{ext}" + + log_file = log_path / filename + + # Common formatter + console_fmt = logging.Formatter("[%(levelname)s] %(name)s: %(message)s") + + # File handler + fh = logging.FileHandler(log_file, encoding="utf-8") + fh.setLevel(logging.INFO) + fh.setFormatter(console_fmt) + log.addHandler(fh) + + # Console handler + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + ch.setFormatter(console_fmt) + log.addHandler(ch) + + log.propagate = False + return log + + +logger = _init_logger() +reports_config = config_instance.get_config("reports") + + +# ---------------- pytest Hooks ---------------- +def _prepare_report_dir(config: pytest.Config) -> Path: + """Prepare report directory based on config.yaml.""" + cfg = config_instance.get_config("reports", {}) + base_dir = Path(cfg.get("base_dir", "reports")) + prefix = cfg.get("directory_prefix", "pytest") + if cfg.get("use_timestamp", False): + ts = dt.datetime.now().strftime("%Y%m%d_%H%M%S") + report_dir = base_dir / f"{prefix}_{ts}" + else: + report_dir = base_dir + report_dir.mkdir(parents=True, exist_ok=True) + return report_dir + + +def _setup_html_report(config: pytest.Config, report_dir: Path) -> None: + """Configure pytest-html if enabled.""" + html_cfg = reports_config.get("html", {}) + if not html_cfg.get("enabled", True): + if hasattr(config.option, "htmlpath"): + config.option.htmlpath = None + logger.info("HTML report disabled according to config.yaml") + return + + html_filename = html_cfg.get("filename", "report.html") + html_path = report_dir / html_filename + config.option.htmlpath = str(html_path) + config.option.self_contained_html = True + logger.info(f"HTML report enabled → {html_path}") + + +def pytest_configure(config: pytest.Config) -> None: + """Pytest entry hook: configure logging and reports.""" + logger.info(f"Starting Test Session: {dt.datetime.now():%Y-%m-%d %H:%M:%S}") + global REPORT_DIR, ALLURE_DIR, ALLURE_CONFIG + REPORT_DIR = _prepare_report_dir(config) + _setup_html_report(config, REPORT_DIR) + reports_cfg = config_instance.get_config("reports", {}) + + # Save Allure configuration globally + ALLURE_CONFIG = reports_cfg + allure_dir = setup_allure(reports_cfg) + ALLURE_DIR = allure_dir + + # Configure allure-pytest plugin if enabled + if allure_dir: + # Set allure results directory for pytest-allure plugin + if hasattr(config.option, 'allure_report_dir'): + config.option.allure_report_dir = str(allure_dir) + # Also set as environment variable + import os + os.environ["ALLURE_REPORT_DIR"] = str(allure_dir) + logger.info(f"Allure results will be stored at {allure_dir}") + else: + logger.info("Allure report disabled according to config.yaml") + + +# ---------------- Marker & Filter Logic ---------------- +def _load_markers_from_ini() -> Dict[str, Dict[str, Any]]: + """Parse pytest.ini markers section.""" + ini_path = Path(__file__).with_name("pytest.ini") + if not ini_path.exists(): + return {} + + markers: Dict[str, Dict[str, Any]] = {} + in_markers = False + + for raw in ini_path.read_text(encoding="utf-8").splitlines(): + line = raw.strip() + if line.startswith("markers"): + in_markers = True + continue + if not in_markers or not line or line.startswith("#"): + continue + if line == "# end of markers": + break + + m = re.match(r"(\w+)(?:\((\w+)\))?\s*:\s*(.+)", line) + if m: + name, arg, help_txt = m.groups() + markers[name] = {"name": name, "arg": arg, "help": help_txt.strip()} + return markers + + +_MARKER_DEFS = _load_markers_from_ini() + + +def pytest_addoption(parser: pytest.Parser) -> None: + """Add CLI options dynamically from marker definitions.""" + for info in _MARKER_DEFS.values(): + parser.addoption( + f"--{info['name']}", + action="store", + default="", + help=( + f"Filter by {info['name']} marker. " + "Syntax: val1,val2,... | all | empty(no filter). " + f"({info['help']})" + ), + ) + + +def _get_marker_values(item: pytest.Item, name: str) -> List[str]: + """Extract marker values from test item.""" + vals: List[str] = [] + mark_infos = [] + + for mark in item.iter_markers(name=name): + mark_val_list = [str(a) for a in mark.args] + + if name in mark.kwargs: + mark_val_list.append(str(mark.kwargs[name])) + + vals.extend(mark_val_list) + mark_infos.append(f"{name}: {', '.join(mark_val_list) if mark_val_list else 'None'}") + + return vals + + +@pytest.hookimpl(hookwrapper=True, tryfirst=True) +def pytest_runtest_makereport(item: pytest.Item, call: pytest.CallInfo): + """Attach test reports to item for access in fixtures.""" + outcome = yield + rep = outcome.get_result() + setattr(item, f"rep_{rep.when}", rep) + + +def pytest_collection_modifyitems(config: pytest.Config, items: List[pytest.Item]) -> None: + """Filter test collection based on CLI options.""" + # Store marker information for later use in test execution + for item in items: + markers_info = [] + for mark in item.iter_markers(): + # Skip pytest's built-in markers + if mark.name in ['parametrize', 'usefixtures', 'skip', 'skipif', 'xfail']: + continue + markers_info.append({ + 'name': mark.name, + 'args': mark.args + }) + # Store marker info in the item for later use + item._pytest_markers_info = markers_info + + # Original filtering logic + kept = items[:] + + for name, info in _MARKER_DEFS.items(): + opt = config.getoption(f"--{name}", "").strip() + if not opt: + continue + + # all means any marker value with the marker + if opt == "all": + kept = [it for it in kept if _get_marker_values(it, name)] + continue + + # 特殊处理 stage + if name == "stage": + if opt.endswith("+"): + min_stage = int(opt[:-1]) + kept = [ + it for it in kept + if any(int(v) >= min_stage for v in _get_marker_values(it, "stage")) + ] + else: + wanted = {x.strip() for x in opt.split(",") if x.strip()} + kept = [ + it for it in kept + if any(v in wanted for v in _get_marker_values(it, "stage")) + ] + else: + wanted = {x.strip() for x in opt.split(",") if x.strip()} + kept = [ + it for it in kept + if any(v in wanted for v in _get_marker_values(it, name)) + ] + + if not kept: + logger.warning( + "No tests matched filter conditions: %s", + {m: config.getoption(f"--{m}") for m in _MARKER_DEFS}, + ) + else: + logger.info( + "Filter %d / %d tests after applying markers %s", + len(kept), len(items), + {m: config.getoption(f'--{m}') for m in _MARKER_DEFS if config.getoption(f'--{m}')} + ) + + items[:] = kept + + +@pytest.hookimpl(tryfirst=True) +def pytest_runtest_setup(item): + """Add pytest markers as Allure labels during test setup.""" + # Add pytest markers as Allure labels + if hasattr(item, '_pytest_markers_info'): + import allure + for marker_info in item._pytest_markers_info: + marker_name = marker_info['name'] + marker_args = marker_info['args'] + + # Add marker as Allure label + label_name = f"pytest_{marker_name}" + if marker_args: + # If marker has arguments, add each as a separate label + for arg in marker_args: + allure.dynamic.label(label_name, str(arg)) + else: + # If marker has no arguments, just add the marker name + allure.dynamic.label(label_name, marker_name) + + +# ---------------- Fixtures ---------------- +@pytest.fixture(scope="session", autouse=True) +def session_logger() -> None: + """Session-level setup and teardown with system info logging.""" + logger.info("-" * 60) + logger.info(f"{'Python':<10} │ {pf.python_version()}") + logger.info(f"{'Platform':<10} │ {pf.system()} {pf.release()}") + logger.info("-" * 60) + yield + logger.info("-" * 60) + logger.info(f"{'Reports at':<10} │ {REPORT_DIR}") + logger.info("Test session ended") + logger.info("-" * 60) + + +@pytest.fixture(scope="function", autouse=True) +def test_logger(request): + """Function-level logging before and after each test.""" + node = request.node + klass = f"{node.cls.__name__}::" if node.cls else "" + identifier = f"{node.path.relative_to(Path.cwd())}::{klass}{node.name}" + print() + logger.info("-" * 60) + logger.info(f"[TEST_CLASS] {identifier}") + logger.info(f"[START] {node.name}") + yield + + result = getattr(node, "rep_call", None) + status = "PASSED" if result and result.outcome == "passed" else "FAILED" + logger.info(f"[ END ] {node.name} - {status}") + if result and getattr(result, "longrepr", None): + logger.error(f"Error details: {result.longrepr}") + + +@pytest.hookimpl(hookwrapper=True, tryfirst=True) +def pytest_runtest_makereport(item: pytest.Item, call: pytest.CallInfo): + """Attach test reports to item for access in fixtures.""" + outcome = yield + rep = outcome.get_result() + setattr(item, f"rep_{rep.when}", rep) + + +@pytest.fixture(scope="session", autouse=True) +def cleanup() -> None: + """Cleanup temporary pytest directories after test session.""" + yield + tmp_root = Path(tempfile.gettempdir()) + for d in tmp_root.iterdir(): + if d.is_dir() and d.name.startswith(("pytest_", "test_")): + shutil.rmtree(d, ignore_errors=True) + + +def pytest_unconfigure(config: pytest.Config) -> None: + """Pytest cleanup hook: generate Allure HTML report or start server if configured.""" + global ALLURE_DIR, ALLURE_CONFIG + + if ALLURE_DIR and ALLURE_CONFIG: + allure_cfg = ALLURE_CONFIG.get("allure", {}) + + # Check if HTML generation is enabled + if allure_cfg.get("html_enable", False): + serve_mode = allure_cfg.get("serve_mode", False) + + if serve_mode: + # Start Allure server + serve_host = allure_cfg.get("serve_host", "localhost") + serve_port = allure_cfg.get("serve_port", 8080) + + logger.info("Starting Allure server...") + logger.info(f"Server will be available at http://{serve_host}:{serve_port}") + + server_process = serve_allure_report( + ALLURE_DIR, + host=serve_host, + port=serve_port, + + ) + + if server_process: + logger.info("Allure server started successfully") + else: + logger.warning("Failed to start Allure server, falling back to static HTML generation...") + # Fallback to static HTML + html_dir = generate_allure_html(ALLURE_DIR, clean=True) + if html_dir: + logger.info(f"Static HTML report generated: {html_dir}") + else: + logger.warning("Failed to generate static HTML report") + else: + # Generate static HTML report + logger.info("Generating Allure HTML report...") + html_dir = generate_allure_html(ALLURE_DIR, clean=True) + + if html_dir: + logger.info(f"Allure HTML report generated: {html_dir}") + logger.info("Tip: If the report doesn't load properly, enable serve_mode in config.yaml") + else: + logger.warning("Failed to generate Allure HTML report") + else: + logger.info("Allure HTML generation disabled in configuration") + else: + logger.info("Allure not configured, skipping HTML generation") diff --git a/test/pytest.ini b/test/pytest.ini new file mode 100644 index 00000000..d5ff2635 --- /dev/null +++ b/test/pytest.ini @@ -0,0 +1,26 @@ +[pytest] +# 0. Test Discovery Rules +testpaths = suites +python_files = test_*.py +python_classes = Test* +python_functions = test_* + + +addopts = + -ra + --strict-markers + --capture=no + +log_cli = 1 +log_cli_level = INFO +log_cli_format = [%(levelname)s] %(name)s: %(message)s +norecursedirs = .git venv env __pycache__ *.egg + +markers = + # -------- Levels (Required) -------- + stage(n): Unit/Smoke/Regression/Release (0=Unit 1=Smoke 2=Regression 3=Release) + # -------- Features (Recommended) -------- + feature: Feature tag + platform(name): Platform tag(gpu/npu) + reliability: Reliability tag +# end of markers diff --git a/test/requirements.txt b/test/requirements.txt new file mode 100644 index 00000000..2d2f2d19 --- /dev/null +++ b/test/requirements.txt @@ -0,0 +1,9 @@ +pytest>=7.0.0 +pytest-xdist>=3.0.0 +pytest-html>=3.1.1 +pytest-json-report>=1.5.0 +allure-pytest>=2.12.0 +influxdb-client>=1.36.0 +PyYAML>=6.0 +python-dotenv>=1.0.0 +requests>=2.28.0 \ No newline at end of file diff --git a/test/suites/test_demo_function.py b/test/suites/test_demo_function.py new file mode 100644 index 00000000..67433ebb --- /dev/null +++ b/test/suites/test_demo_function.py @@ -0,0 +1,185 @@ +# tests/test_demo.py +import pytest +import allure + +@pytest.mark.stage(1) +@pytest.mark.feature("mark") +@pytest.mark.platform("gpu") +def test_gpu_smoke(): + assert 1 == 1 + +@pytest.mark.stage(1) +@pytest.mark.feature("mark") +def test_regress_accuracy(): + assert 2 + 2 <= 5 + +@pytest.mark.stage(1) +@pytest.mark.feature("mark") +@pytest.mark.platform("npu") +def test_performance_accuracy(): + assert 2 + 2 <= 5 + +# Example of new mark +@pytest.mark.feature("mark") +@pytest.mark.reliability("high") +def test_llm_reliability(): + assert True + + +# Example of importing configuration file parameters +from common.config_utils import config_utils as config_instance +@pytest.mark.feature("config") +def test_llm_config(): + llm_config = config_instance.get_config("llm_connection") + assert llm_config["type"] == "openai" + assert config_instance.get_nested_config("llm_connection.model") == "gpt-3.5-turbo" + assert config_instance.get_nested_config("llm_connection.models", "gpt-3.5-turbo") == "gpt-3.5-turbo" + + + +# Example of using allure +@pytest.mark.feature("allure1") +@allure.feature('test_success') +def test_success(): + """this test succeeds""" + assert True + +@allure.feature('test_failure') +@pytest.mark.feature("allure1") +def test_failure(): + """this test fails""" + assert False + +@allure.feature('test_skip') +@pytest.mark.feature("allure1") +def test_skip(): + """this test is skipped""" + pytest.skip('for a reason!') + +@allure.feature('test_broken') +@pytest.mark.feature("allure1") +def test_broken(): + raise Exception('oops') + +@pytest.mark.feature("allure2") +@pytest.mark.parametrize('param1', ["Hello", "World"]) +@pytest.mark.parametrize('param2', ['Hello', "Hello"]) +def test_parametrize_with_two_parameters(param1, param2): + assert param1 == param2 + +@pytest.mark.feature("allure3") +@allure.description_html(""" +

This is HTML description

+ + + + + + + + + + + + + + + + +
FirstnameLastnameAge
jademr18
roadTester18
+""") +def test_html_description(): + assert True + +@pytest.mark.feature("allure3") +@allure.description("""Multi-line description""") +def test_description_from_decorator(): + assert 42 == int(6 * 7) + +@pytest.mark.feature("allure3") +def test_unicode_in_docstring_description(): + """Description can also be below the function""" + assert 42 == int(6 * 7) + +@pytest.mark.feature("allure4") +@allure.title("Assert that 2+2=4") +def test_with_a_title(): + assert 2 + 2 == 4 + +@pytest.mark.feature("allure4") +@allure.title("Dynamic title: {param1} + {param2} = {expected}") +@pytest.mark.parametrize('param1,param2,expected', [(2, 2, 4),(1, 2, 5)]) +def test_with_parameterized_title(param1, param2, expected): + assert param1 + param2 == expected + +@pytest.mark.feature("allure4") +@allure.title("This is a dynamic title that will be replaced") +def test_with_dynamic_title(): + assert 2 + 2 == 4 + allure.dynamic.title('Test completed, used as title') + + +@pytest.mark.feature("allure5") +def test_with_steps(): + """Example test case with steps""" + with allure.step("Step 1: Initialize variables"): + a = 2 + b = 3 + + with allure.step("Step 2: Perform addition"): + result = a + b + + with allure.step("Step 3: Verify result"): + assert result == 5 + +import tempfile +import os +@pytest.mark.feature("allure6") +def test_with_attachment(): + """Example test case with attachment""" + # Create some data to attach + data = "This is sample data for attachment\nLine 2\nLine 3" + + # Attach text data + allure.attach(data, name="Sample Data", attachment_type=allure.attachment_type.TEXT) + + # Create and attach a simple file + with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f: + f.write("Sample file content\nFor testing attachment feature") + temp_file_path = f.name + + # Attach the file + allure.attach.file(temp_file_path, name="Attached File", + attachment_type=allure.attachment_type.TEXT) + + # Clean up temporary file + os.unlink(temp_file_path) + + assert True + +@pytest.mark.feature("allure7") +def test_mixed_steps_and_attachments(): + """Example test case combining steps and attachments""" + with allure.step("Initialize test data"): + test_data = {"name": "John", "age": 30, "city": "New York"} + + with allure.step("Convert data to JSON string"): + import json + json_data = json.dumps(test_data, indent=2) + allure.attach(json_data, name="JSON Data", attachment_type=allure.attachment_type.JSON) + + with allure.step("Validate data"): + assert test_data["name"] == "John" + assert test_data["age"] == 30 + + with allure.step("Create and attach report"): + report_content = f""" + Test Report + =========== + Name: {test_data['name']} + Age: {test_data['age']} + City: {test_data['city']} + Status: PASSED + """ + allure.attach(report_content, name="Test Report", + attachment_type=allure.attachment_type.TEXT) \ No newline at end of file diff --git a/test/suites/test_uc_performance.py b/test/suites/test_uc_performance.py new file mode 100644 index 00000000..7fe425c7 --- /dev/null +++ b/test/suites/test_uc_performance.py @@ -0,0 +1,159 @@ +import pytest + +from common.llmperf.run_inference import inference_results + +mean_output_tokens = [] +num_completed_requests = [] +total_e2e_latency_s = [] +total_generation_time_s = [] + +@pytest.mark.feature("mean_input_tokens") +def test_mean_input_tokens(): + result = inference_results("mean_input_tokens") + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("mean_output_tokens") +def test_mean_output_tokens(): + global mean_output_tokens + result = inference_results("mean_output_tokens") + mean_output_tokens = result[:] + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("results_inter_token_latency_s_quantiles_p50") +def test_inter_token_latency_s_quantiles_p50(): + result = inference_results("results_inter_token_latency_s_quantiles_p50") + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("results_inter_token_latency_s_quantiles_p90") +def test_inter_token_latency_s_quantiles_p90(): + result = inference_results("results_inter_token_latency_s_quantiles_p90") + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("results_inter_token_latency_s_quantiles_p99") +def test_inter_token_latency_s_quantiles_p99(): + result = inference_results("results_inter_token_latency_s_quantiles_p99") + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("results_inter_token_latency_s_mean") +def test_inter_token_latency_s_mean(): + result = inference_results("results_inter_token_latency_s_mean") + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("results_ttft_s_quantiles_p50") +def test_ttft_s_quantiles_p50(): + result = inference_results("results_ttft_s_quantiles_p50") + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("results_ttft_s_quantiles_p90") +def test_ttft_s_quantiles_p90(): + result = inference_results("results_ttft_s_quantiles_p90") + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("results_ttft_s_quantiles_p99") +def test_ttft_s_quantiles_p99(): + result = inference_results("results_ttft_s_quantiles_p99") + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("results_ttft_s_mean") +def test_ttft_s_mean(): + result = inference_results("results_ttft_s_mean") + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("results_end_to_end_latency_s_quantiles_p50") +def test_end_to_end_latency_s_quantiles_p50(): + result = inference_results("results_end_to_end_latency_s_quantiles_p50") + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("results_end_to_end_latency_s_quantiles_p90") +def test_end_to_end_latency_s_quantiles_p90(): + result = inference_results("results_end_to_end_latency_s_quantiles_p90") + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("results_end_to_end_latency_s_quantiles_p99") +def test_end_to_end_latency_s_quantiles_p99(): + result = inference_results("results_end_to_end_latency_s_quantiles_p99") + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("results_end_to_end_latency_s_mean") +def test_end_to_end_latency_s_mean(): + result = inference_results("results_end_to_end_latency_s_mean") + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("results_num_completed_requests") +def test_num_completed_requests(): + global num_completed_requests + result = inference_results("results_num_completed_requests") + num_completed_requests = result[:] + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("elapsed_time") +def test_elapsed_time(): + global total_e2e_latency_s + result = inference_results("elapsed_time") + total_e2e_latency_s = result[:] + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("incremental_time_delay") +def test_incremental_time_delay(): + global total_generation_time_s + result = inference_results("incremental_time_delay") + total_generation_time_s = result[:] + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("total_throughput") +def test_total_throughput(): + result = [] + n = len(mean_output_tokens) + for i in range(n): + total_throughput = (mean_output_tokens[i] * num_completed_requests[i] / total_e2e_latency_s[i] + if total_e2e_latency_s[i] > 0 else 0.0) + result.append(total_throughput) + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" + +@pytest.mark.feature("incremental_throughput") +def test_incremental_throughput(): + result = [] + n = len(mean_output_tokens) + for i in range(n): + incremental_throughput = (mean_output_tokens[i] * num_completed_requests[i] / total_generation_time_s[i] + if total_generation_time_s[i] > 0 else 0.0) + result.append(incremental_throughput) + assert len(result) > 0, "result list is empty! Please check data source or inference process." + non_positive = [x for x in result if x <= 0] + assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" \ No newline at end of file diff --git a/test/test_uc_performance b/test/test_uc_performance deleted file mode 100644 index c38c2c7b..00000000 --- a/test/test_uc_performance +++ /dev/null @@ -1,947 +0,0 @@ -import hashlib -import pathlib -import subprocess -import sys -import threading -import logging -from collections.abc import Iterable -import json -import os -from datetime import datetime -from pathlib import Path -import re -import time -import random -from typing import Any, Dict, List, Optional, Tuple - -import pandas as pd -import ray -import yaml -from openpyxl.reader.excel import load_workbook -from ray.util import ActorPool -import requests -from tqdm import tqdm - -from transformers import LlamaTokenizerFast, AutoTokenizer - -# —————————————————————— -# 常量定义(用于性能指标键名) -# —————————————————————— -SUPPORTED_APIS = ["openai", "anthropic", "litellm"] - -INTER_TOKEN_LAT = "inter_token_latency_s" -TTFT = "ttft_s" -E2E_LAT = "end_to_end_latency_s" -NUM_INPUT_TOKENS = "number_input_tokens" -NUM_OUTPUT_TOKENS = "number_output_tokens" -NUM_TOTAL_TOKENS = "number_total_tokens" -REQ_OUTPUT_THROUGHPUT = "request_output_throughput_token_per_s" -ERROR_MSG = "error_msg" -ERROR_CODE = "error_code" -ERROR_CODE_FREQ = "error_code_frequency" -NUM_ERRORS = "number_errors" -OUTPUT_THROUGHPUT = "mean_output_throughput_token_per_s" -NUM_COMPLETED_REQUESTS = "num_completed_requests" -COMPLETED_REQUESTS_PER_MIN = "num_completed_requests_per_min" -ERROR_RATE = "error_rate" -NUM_REQ_STARTED = "num_requests_started" - - -class RequestConfig: - """ - 请求配置类 — 表示一次 LLM 请求所需的参数。 - 属性: - model — 模型名称 - prompt — (文本, token 长度) 二元组 - sampling_params — 抽样参数字典(如 max_tokens 等) - llm_api — 使用的 API 名称(如 "openai") - metadata — 任意附加元数据字典 - openai_api_base — OpenAI 或兼容服务的基础 URL - """ - def __init__( - self, - model: str, - prompt: Tuple[str, int], - sampling_params: Optional[Dict[str, Any]] = None, - llm_api: Optional[str] = None, - metadata: Optional[Dict[str, Any]] = None, - openai_api_base: Optional[str] = "" - ): - self.model = model - self.prompt = prompt - self.sampling_params = sampling_params or {} - self.llm_api = llm_api - self.metadata = metadata or {} - self.openai_api_base = openai_api_base - -@ray.remote -class OpenAIChatCompletionsClient: - """ - LLM 客户端(远程 actor) — 用于调用 OpenAI Chat Completions 接口(流式)。 - 负责发送请求、接收 token 流、统计延迟和吞吐率等指标。 - """ - def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]: - prompt = request_config.prompt - prompt, prompt_len = prompt - - message = [ - {"role": "system", "content": ""}, - {"role": "user", "content": prompt}, - ] - model = request_config.model - body = { - "model": model, - "messages": message, - "stream": True, - "ignore_eos": True, - } - sampling_params = request_config.sampling_params - body.update(sampling_params or {}) - time_to_next_token = [] - tokens_received = 0 - ttft = 0 - error_response_code = -1 - generated_text = "" - error_msg = "" - output_throughput = 0 - total_request_time = 0 - - metrics = {} - - metrics[ERROR_CODE] = None - metrics[ERROR_MSG] = "" - - start_time = time.monotonic() - most_recent_received_token_time = time.monotonic() - address = request_config.openai_api_base - if not address: - raise ValueError("the environment variable OPENAI_API_BASE must be set.") - key = os.environ.get("OPENAI_API_KEY", "secret_abcdefg") - if not key: - raise ValueError("the environment variable OPENAI_API_KEY must be set.") - headers = {"Authorization": f"Bearer {key}"} - if not address: - raise ValueError("No host provided.") - if not address.endswith("/"): - address = address + "/" - address += "chat/completions" - try: - with requests.post( - address, - json=body, - stream=True, - timeout=180, - headers=headers, - ) as response: - if response.status_code != 200: - error_msg = response.text - error_response_code = response.status_code - response.raise_for_status() - for chunk in response.iter_lines(chunk_size=None): - chunk = chunk.strip() - - if not chunk: - continue - stem = "data: " - chunk = chunk[len(stem):] - if chunk == b"[DONE]": - continue - tokens_received += 1 - data = json.loads(chunk) - - if "error" in data: - error_msg = data["error"]["message"] - error_response_code = data["error"]["code"] - raise RuntimeError(data["error"]["message"]) - - delta = data["choices"][0]["delta"] - if delta.get("content", None): - if not ttft: - ttft = time.monotonic() - start_time - # time_to_next_token.append(ttft) - else: - time_to_next_token.append( - time.monotonic() - most_recent_received_token_time - ) - most_recent_received_token_time = time.monotonic() - generated_text += delta.get("content", None) or delta.get("reasoning_content", "") - - total_request_time = time.monotonic() - start_time - output_throughput = tokens_received / total_request_time - - except Exception as e: - metrics[ERROR_MSG] = error_msg - metrics[ERROR_CODE] = error_response_code - print(f"[WARN] 请求发生异常:{e},返回码:{error_response_code}") - print(error_response_code) - - metrics[INTER_TOKEN_LAT] = sum( - time_to_next_token) # This should be same as metrics[common_metrics.E2E_LAT]. Leave it here for now - metrics[TTFT] = ttft - metrics[E2E_LAT] = total_request_time - metrics[REQ_OUTPUT_THROUGHPUT] = output_throughput - metrics[NUM_TOTAL_TOKENS] = tokens_received + prompt_len - metrics[NUM_OUTPUT_TOKENS] = tokens_received - metrics[NUM_INPUT_TOKENS] = prompt_len - - return metrics, generated_text, request_config - - -class RequestsLauncher: - """ - 请求启动器 — 管理多个 LLM 客户端 actor,并发提交请求。 - """ - def __init__(self, llm_clients: List[OpenAIChatCompletionsClient]): - self._llm_client_pool = ActorPool(llm_clients) - - def launch_requests(self, request_config: RequestConfig) -> None: - """ - 提交一个请求配置至客户端池。 - 参数: - request_config — RequestConfig 实例,包含请求参数 - """ - if self._llm_client_pool.has_free(): - self._llm_client_pool.submit( - lambda client, _request_config: client.llm_request.remote( - _request_config - ), - request_config, - ) - - def get_next_ready(self, block: bool = False) -> List[Any]: - """ - 获取所有已完成的请求结果。 - 参数: - block — 若为 True,则阻塞直到至少一个结果准备好。 - 返回: - 已完成请求的结果列表。 - """ - results = [] - if not block: - while self._llm_client_pool.has_next(): - results.append(self._llm_client_pool.get_next_unordered()) - else: - while not self._llm_client_pool.has_next(): - pass - while self._llm_client_pool.has_next(): - results.append(self._llm_client_pool.get_next_unordered()) - return results - - -class LLMPerfResults: - """ - 高层记录包装类,可用于最终输出 JSON、flatten 结构等。 - """ - def __init__(self, name: str, metadata: Dict[str, Any] = None): - self.name = name - self.metadata = metadata or {} - self.timestamp = int(time.time()) - self.metadata["timestamp"] = self.timestamp - self.version = "2025-10-17" - - def to_dict(self): - data = { - "version": self.version, - "name": self.name, - } - data.update(self.metadata) - return flatten_dict(data) - - def json(self): - data = self.to_dict() - return json.dumps(data) - - -def sample_random_positive_int(mean: int, stddev: int) -> int: - """ - 从高斯分布采样一个正整数 (>0)。 - 参数: - mean — 均值 - stddev — 标准差 - 返回: - 一个大于 0 的整数 - """ - while True: - v = int(random.gauss(mean, stddev)) - if v > 0: - return v - - -def randomly_sample_sonnet_lines_prompt( - prompt_tokens_mean: int = 550, - prompt_tokens_stddev: int = 250, - tokenizer = None, -) -> Tuple[str, int]: - """ - 随机从 Shakespeare 的 sonnet.txt 中抽取行并拼为 prompt,使其 token 长度接近指定值。 - 参数: - prompt_tokens_mean — 目标 token 均值 - prompt_tokens_stddev — token 长度标准差 - tokenizer — 分词器实例(若为 None 则默认加载 LlamaTokenizerFast) - 返回: - (prompt_str, prompt_token_length) - """ - if tokenizer is None: - tokenizer = LlamaTokenizerFast.from_pretrained("./llama-tokenizer") - - def token_len(text: str) -> int: - return len(tokenizer.encode(text)) - - # 基础开头 prompt - base = ("Randomly stream lines from the following text\n\n" - "Don't generate eos tokens:\n\n") - base_len = token_len(base) - - # 目标 prompt token 总数 - target = sample_random_positive_int(prompt_tokens_mean, prompt_tokens_stddev) - while target < base_len: - target = sample_random_positive_int(prompt_tokens_mean, prompt_tokens_stddev) - - remaining = target - base_len - - sonnet_path = pathlib.Path(__file__).parent / "sonnet.txt" - lines = sonnet_path.read_text(encoding="utf-8").splitlines() - random.shuffle(lines) - - prompt = base - for line in lines: - l = line + "\n" - l_len = token_len(l) - if l_len <= remaining: - prompt += l - remaining -= l_len - else: - # 裁剪 - # 可能截断单词,但 ok - cut = l[: max(1, int(remaining))] - prompt += cut - break - - # 打印 prompt 的 hash 供 debug - h = hashlib.sha256(prompt.encode("utf-8")).hexdigest() - print(f"Prompt hash: {h}") - - return prompt, token_len(prompt) - -def get_token_throughput_latencies( - model: str, - mean_input_tokens: int, - stddev_input_tokens: int, - mean_output_tokens: int, - stddev_output_tokens: int, - additional_sampling_params: Optional[Dict[str, Any]] = None, - num_concurrent_requests: int = 1, - max_num_completed_requests: int = 500, - test_timeout_s=90, - llm_api="openai", - random_seed: int = None, - openai_api_base: str = "", - tokenizer_path: str = None, -) -> Tuple[Dict[str, Any], List[Dict[str, Any]], float, float]: - """ - 获取给定模型的令牌吞吐量和延迟。 - - 参数: - model:要查询的模型的名称。 - mean_input_tokens:请求提示中发送的平均令牌数。 - stddev_input_tokens:请求提示中发送的令牌数的标准差。 - mean_output_tokens:每个请求生成的平均令牌数。 - stddev_output_tokens:每个请求生成令牌数的标准差。 - additional_sampling_params:随请求发送的附加采样参数。 - 有关更多信息,请参阅 LLM API 文档中的补全功能。 - num_concurrent_requests:要发出的并发请求数。增加此值可增加负载量 - test_timeout_s:报告结果之前运行测试的时间。 - llm_api:要使用的 llm api 的名称 - - 返回: - 所有已完成请求的性能指标摘要 - """ - random.seed(random_seed) - - if tokenizer_path: - print(f"Using tokenizer:{tokenizer_path}") - tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) - else: - print("Using default tokenizer") - tokenizer = LlamaTokenizerFast.from_pretrained( - "./llama-tokenizer" - ) - get_token_length = lambda text: len(tokenizer.encode(text)) - - if not additional_sampling_params: - additional_sampling_params = {} - - completed_requests_lock = threading.Lock() - completed_requests = [] - num_completed_requests = 0 - incremental_time_delay = 0 - # make up prompts outside of send loop for faster benchmarking loop - num_output_tokens_list = [] - prompts = [] - for i in range(max_num_completed_requests): - num_output_tokens = (sample_random_positive_int( - mean_output_tokens, stddev_output_tokens - )) - num_output_tokens_list.append(num_output_tokens) - - prompts.append(randomly_sample_sonnet_lines_prompt( - prompt_tokens_mean=mean_input_tokens, - prompt_tokens_stddev=stddev_input_tokens, - tokenizer=tokenizer - )) - end_time = 0 - start_time = time.monotonic() - pbar = tqdm(total=max_num_completed_requests) - - def launch_request(thread_index): - nonlocal num_completed_requests, end_time, incremental_time_delay - num_clients = 1 - clients = [OpenAIChatCompletionsClient.remote() for _ in range(num_clients)] - req_launcher = RequestsLauncher(clients) - request_index = thread_index % max_num_completed_requests - - while ( - time.monotonic() - start_time < test_timeout_s - and num_completed_requests < max_num_completed_requests - ): - default_sampling_params = {"max_tokens": num_output_tokens_list[request_index] } - default_sampling_params.update(additional_sampling_params) - request_config = RequestConfig( - model=model, - prompt=prompts[request_index], - sampling_params=default_sampling_params, - llm_api=llm_api, - openai_api_base=openai_api_base - ) - req_launcher.launch_requests(request_config) - - outs = req_launcher.get_next_ready() - all_metrics = [] - for out in outs: - request_metrics, gen_text, _ = out - num_output_tokens = get_token_length(gen_text) - incremental_time_delay += request_metrics[INTER_TOKEN_LAT] - with completed_requests_lock: - if num_completed_requests < max_num_completed_requests: - if num_output_tokens: - request_metrics[INTER_TOKEN_LAT] /= (request_metrics[NUM_OUTPUT_TOKENS] - 1) - else: - request_metrics[INTER_TOKEN_LAT] = 0 - request_metrics[NUM_OUTPUT_TOKENS] = num_output_tokens - request_metrics[NUM_TOTAL_TOKENS] = request_metrics[NUM_INPUT_TOKENS] + num_output_tokens - try: - request_metrics[REQ_OUTPUT_THROUGHPUT] = num_output_tokens / request_metrics[E2E_LAT] - except ZeroDivisionError: - logging.error( - "Division by zero in throughput calculation: E2E_LAT is 0. " - "This indicates the client received no valid response. " - "Possible server-side error occurred — please check server logs for details." - ) - return - - all_metrics.append(request_metrics) - completed_requests.extend(all_metrics) - pbar.update(len(all_metrics)) - num_completed_requests += len(all_metrics) - if num_completed_requests == max_num_completed_requests: - end_time = time.monotonic() - request_index = (request_index + num_concurrent_requests) % max_num_completed_requests - - threads = [] - for i in range(num_concurrent_requests): - thread = threading.Thread(target=launch_request, args=(i,)) - threads.append(thread) - thread.start() - - for thread in threads: - thread.join() - - pbar.close() - if end_time - start_time >= test_timeout_s: - print("Test timed out before all requests could be completed.") - - # check one last time that there are no remaining results to collect. - num_clients = 1 - clients = [OpenAIChatCompletionsClient.remote() for _ in range(num_clients)] - req_launcher = RequestsLauncher(clients) - outs = req_launcher.get_next_ready() - all_metrics = [] - for out in outs: - request_metrics, gen_text, _ = out - num_output_tokens = get_token_length(gen_text) - with completed_requests_lock: - if num_completed_requests < max_num_completed_requests: - if num_output_tokens: - request_metrics[INTER_TOKEN_LAT] /= num_output_tokens - else: - request_metrics[INTER_TOKEN_LAT] = 0 - request_metrics[NUM_OUTPUT_TOKENS] = num_output_tokens - request_metrics[NUM_TOTAL_TOKENS] = request_metrics[NUM_INPUT_TOKENS] + num_output_tokens - request_metrics[REQ_OUTPUT_THROUGHPUT] = num_output_tokens / request_metrics[E2E_LAT] - completed_requests.extend(request_metrics) - - print(f"\Results for token benchmark for {model} queried with the {llm_api} api.\n") - if mean_output_tokens == 2: - print(f"[INFO] 首次token发送预埋完成\n") - return {}, [], 0.0, 0.0 - - ret = metrics_summary(completed_requests, start_time, end_time) - - metadata = { - "model": model, - "mean_input_tokens": mean_input_tokens, - "stddev_input_tokens": stddev_input_tokens, - "mean_output_tokens": mean_output_tokens, - "stddev_output_tokens": stddev_output_tokens, - "num_concurrent_requests": num_concurrent_requests, - "additional_sampling_params": additional_sampling_params, - } - - metadata["results"] = ret - elapsed_time = end_time - start_time - return metadata, completed_requests, elapsed_time, incremental_time_delay - - -def metrics_summary( - metrics: List[Dict[str, Any]], start_time: int, end_time: int -) -> Dict[str, Any]: - """ - 汇总多个请求的性能指标,生成总体统计(吞吐率、延迟分位数、错误率等)。 - 参数: - metrics — 单个请求指标的字典列表 - start_time — 测试启动时间(monotonic) - end_time — 测试结束时间(monotonic) - 返回: - 一个字典,包含汇总后的指标 - """ - ret = {} - - def flatten(item): - for sub_item in item: - if isinstance(sub_item, Iterable) and not isinstance(sub_item, str): - yield from flatten(sub_item) - else: - yield sub_item - - df = pd.DataFrame(metrics) - df_without_errored_req = df[df[ERROR_CODE].isna()] - - for key in [ - INTER_TOKEN_LAT, - TTFT, - E2E_LAT, - REQ_OUTPUT_THROUGHPUT, - NUM_INPUT_TOKENS, - NUM_OUTPUT_TOKENS - ]: - print(key) - ret[key] = {} - series = pd.Series(list(flatten(df_without_errored_req[key]))).dropna() - quantiles = series.quantile([0.25, 0.5, 0.75, 0.9, 0.95, 0.99]).to_dict() - quantiles_reformatted_keys = {} - for quantile, value in quantiles.items(): - reformatted_key = f"p{int(quantile * 100)}" - print(f" {reformatted_key} = {value}") - quantiles_reformatted_keys[reformatted_key] = value - ret[key]["quantiles"] = quantiles_reformatted_keys - mean = series.mean() - print(f" mean = {mean}") - ret[key]["mean"] = mean - print(f" min = {series.min()}") - ret[key]["min"] = series.min() - print(f" max = {series.max()}") - ret[key]["max"] = series.max() - print(f" stddev = {series.std()}") - ret[key]["stddev"] = series.std() - - ret[NUM_REQ_STARTED] = len(metrics) - - error_codes = df[ERROR_CODE].dropna() - num_errors = len(error_codes) - ret[ERROR_RATE] = num_errors / len(metrics) if len(metrics) else 0 - ret[NUM_ERRORS] = num_errors - print(f"Number Of Errored Requests: {num_errors}") - error_code_frequency = dict(error_codes.value_counts()) - if num_errors: - error_code_frequency = dict(error_codes.value_counts()) - print("Error Code Frequency") - print(error_code_frequency) - ret[ERROR_CODE_FREQ] = str(error_code_frequency) - - overall_output_throughput = df_without_errored_req[ - NUM_OUTPUT_TOKENS - ].sum() / (end_time - start_time) - - print(f"Overall Output Throughput: {overall_output_throughput}") - ret[OUTPUT_THROUGHPUT] = overall_output_throughput - - num_completed_requests = len(df_without_errored_req) - num_completed_requests_per_min = ( - num_completed_requests / (end_time - start_time) * 60 - ) - print(f"Number Of Completed Requests: {num_completed_requests}") - print(f"Completed Requests Per Minute: {num_completed_requests_per_min}") - - ret[NUM_COMPLETED_REQUESTS] = num_completed_requests - ret[COMPLETED_REQUESTS_PER_MIN] = num_completed_requests_per_min - - return ret - -def run_token_benchmark( - llm_api: str, - model: str, - test_timeout_s: int, - max_num_completed_requests: int, - num_concurrent_requests: int, - mean_input_tokens: int, - stddev_input_tokens: int, - mean_output_tokens: int, - stddev_output_tokens: int, - additional_sampling_params: str, - results_dir: str, - random_seed: int, - openai_api_base: str, - tokenizer_path: str, - user_metadata: Dict[str, Any], - idx: int -): - """ - 执行一次 token 吞吐率 + 延迟基准测试。 - 参数: - llm_api — 调用的 API 名称 - model — 模型名称 - test_timeout_s — 测试超时时间(秒) - max_num_completed_requests — 最大完成请求数 - num_concurrent_requests — 并发请求数 - mean_input_tokens — 输入 token 平均值 - stddev_input_tokens — 输入 token 标准差 - mean_output_tokens — 输出 token 平均值 - stddev_output_tokens — 输出 token 标准差 - additional_sampling_params — 抽样参数 JSON 字符串 - results_dir — 结果保存目录 - random_seed — 随机种子 - openai_api_base — OpenAI 或兼容服务基础 URL - tokenizer_path — 分词器路径 - user_metadata — 用户指定的元数据字典 - idx — 用例索引或标识(可选) - 返回: - summary — 汇总指标字典 - individual_responses — 单个请求指标列表 - elapsed_time — 总耗时 - incremental_time_delay — 累计 decode 时延(inter-token 总延时) - """ - if mean_input_tokens < 40: - print("[WARN] 由于目前的提示逻辑,Input tokens的最小数量为41") - - summary, individual_responses, elapsed_time, incremental_time_delay = get_token_throughput_latencies( - model=model, - llm_api=llm_api, - test_timeout_s=test_timeout_s, - max_num_completed_requests=max_num_completed_requests, - mean_input_tokens=mean_input_tokens, - stddev_input_tokens=stddev_input_tokens, - mean_output_tokens=mean_output_tokens, - stddev_output_tokens=stddev_output_tokens, - num_concurrent_requests=num_concurrent_requests, - additional_sampling_params=json.loads(additional_sampling_params), - random_seed=random_seed, - openai_api_base=openai_api_base, - tokenizer_path=tokenizer_path, - ) - if mean_output_tokens == 2: - return summary, individual_responses, elapsed_time, incremental_time_delay - - if results_dir: - filename = f"{model}_{mean_input_tokens}_{mean_output_tokens}_{idx}" - filename = re.sub(r"[^\w\d-]+", "-", filename) - filename = re.sub(r"-{2,}", "-", filename) - summary_filename = f"{filename}_summary" - individual_responses_filename = f"{filename}_individual_responses" - - # Update to metadata. - summary.update(user_metadata) - summary["elapsed_time"] = elapsed_time # 新增运行时长 - summary["incremental_time_delay"] = incremental_time_delay # 新增增量时延 decode时延总和 - - results = LLMPerfResults(name=summary_filename, metadata=summary) - results_dir = Path(results_dir) - if not results_dir.exists(): - results_dir.mkdir(parents=True) - elif not results_dir.is_dir(): - raise ValueError(f"{results_dir} is not a directory") - - try: - with open(results_dir / f"{summary_filename}.json", "w") as f: - json.dump(results.to_dict(), f, indent=4, default=str) - except Exception as e: - print(results.to_dict()) - raise e - - try: - with open(results_dir / f"{individual_responses_filename}.json", "w") as f: - json.dump(individual_responses, f, indent=4) - except Exception as e: - print(individual_responses) - raise e - -def flatten_dict(d: Dict[str, Any], parent_key: str = "", sep: str = "_") -> Dict[str, Any]: - """将可能嵌套的 dict 扁平化为 key1_key2 形式的单层 dict。""" - res: Dict[str, Any] = {} - for k, v in d.items(): - new_key = parent_key + sep + k if parent_key else k - if isinstance(v, dict): - res.update(flatten_dict(v, new_key, sep=sep)) - else: - res[new_key] = v - return res - -def reset_prefill_cache(env, server_url): - """ - 重置前缀缓存(prefix cache / HBM)。 - 参数: - env — 环境变量字典 - server_url — 服务基础 URL - """ - reset_url = f"{server_url}/reset_prefix_cache" - print(f"[INFO] 正在重置 prefix cache: {reset_url}") - try: - result = subprocess.run( - ["curl", "-X", "POST", reset_url, "-s", "-f"], - env=env, - check=False, - capture_output=True, - text=True, - timeout=10 - ) - if result.returncode == 0: - print("[INFO] prefix cache 重置成功") - else: - print(f"[ERROR] 重置 prefix cache 失败,返回码: {result.returncode}") - except Exception as e: - print(f"[ERROR] 重置 prefix cache 异常: {e}") - -def run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path): - """ - 执行所有测试用例,并返回失败用例索引列表及每个用例的命中率映射。 - 参数: - test_cases — 配置文件中读取的测试用例列表 - timestamp_dir — 用于保存结果的目录 Path - model — 模型名称 - server_url — 服务基础 URL - tokenizer_path— 分词器路径 - 返回: - failed_cases — 失败用例索引列表 - case_hit_rate_map — {case_idx: hit_rate} 的映射 - """ - print(f"[INFO] 共计 {len(test_cases)} 个测试用例待执行") - failed_case = [] - - # 清除代理环境变量 - env = os.environ.copy() - env.pop('http_proxy', None) - env.pop('https_proxy', None) - - # 用于存储每个 case_idx 的 hit_rate(用于后续导出至excel表格) - case_hit_rate_map = {} - - for i, case in enumerate(test_cases): - print(f"\n>>> 执行第 {i + 1} 个测试用例 <<<") - reset_prefill_cache(env, server_url) - # 每次测试使用固定 random_seed 控制 PC 命中率 - random_seed = random.randint(1, 100000) - - # 从配置文件读取参数 - mean_input = case.get("mean_input_tokens", 5000) - stddev_input = case.get("stddev_input_tokens", 0) - mean_output = case.get("mean_output_tokens", 1000) - stddev_output = case.get("stddev_output_tokens", 0) - max_completed = case.get("max_num_completed_requests", 1) - concurrent = case.get("num_concurrent_requests", 1) - llm_api = case.get("llm_api", "openai") - additional_sampling_params = case.get("additional_sampling_params", "{}") - timeout = case.get("timeout", 60000) - hit_rate = case.get("hit_rate", 0) - - # 记录这个 case 的 hit_rate - case_hit_rate_map[i] = hit_rate - - # 判断是否需要执行两次(PC 命中率测试) - if hit_rate == 0: - run_token_benchmark( - llm_api=llm_api, - model=model, - test_timeout_s=timeout, - max_num_completed_requests=max_completed, - num_concurrent_requests=concurrent, - mean_input_tokens=mean_input, - stddev_input_tokens=stddev_input, - mean_output_tokens=mean_output, - stddev_output_tokens=stddev_output, - additional_sampling_params=additional_sampling_params, - results_dir=str(timestamp_dir), - random_seed=random_seed, - openai_api_base=server_url + "/v1", - tokenizer_path=tokenizer_path, - user_metadata={"case_idx": i}, - idx=i+1 - ) - else: - print("[INFO] 检测到 hit_rate > 0,进入预填充模式") - # hit_rate > 0: 先 prefill 模式 - prefill_mean_input = int(mean_input * hit_rate / 100) - print(f"[INFO] 预填充执行:mean_input_tokens={prefill_mean_input}") - run_token_benchmark( - llm_api=llm_api, - model=model, - test_timeout_s=timeout, - max_num_completed_requests=max_completed, - num_concurrent_requests=concurrent, - mean_input_tokens=prefill_mean_input, - stddev_input_tokens=stddev_input, - mean_output_tokens=2, - stddev_output_tokens=stddev_output, - additional_sampling_params=additional_sampling_params, - results_dir=str(timestamp_dir), - random_seed=random_seed, - openai_api_base=server_url + "/v1", - tokenizer_path=tokenizer_path, - user_metadata={"case_idx": i, "phase": "prefill"} - ) - # 然后正常模式 - print("[INFO] 预填充完成,切换至正常模式执行") - run_token_benchmark( - llm_api=llm_api, - model=model, - test_timeout_s=timeout, - max_num_completed_requests=max_completed, - num_concurrent_requests=concurrent, - mean_input_tokens=mean_input, - stddev_input_tokens=stddev_input, - mean_output_tokens=mean_output, - stddev_output_tokens=stddev_output, - additional_sampling_params=additional_sampling_params, - results_dir=str(timestamp_dir), - random_seed=random_seed, - openai_api_base=server_url + "/v1", - tokenizer_path=tokenizer_path, - user_metadata={"case_idx": i, "phase": "normal"} - ) - - return failed_case, case_hit_rate_map - -def collect_and_export_results(results_dir, model, case_hit_rate_map): - """ - 收集每个测试产生的 `_summary.json` 文件,并导出为 Excel 报告。 - 参数: - results_dir — 结果文件保存目录 - model — 模型名称 - case_hit_rate_map — {case_idx: hit_rate} 映射 - """ - print(f"\n[INFO] 开始收集 {results_dir} 下的 summary.json 文件") - - results_dir = Path(results_dir) - json_files = sorted(results_dir.glob("*_summary.json"), key=lambda f: f.stat().st_mtime) - print(f"[INFO] 找到 {len(json_files)} 个 summary 文件") - - if not json_files: - print("[WARN] 未找到 summary.json 文件,跳过导出") - return - - field_mapping = { - "mean_input_tokens": "input_tokens", - "mean_output_tokens": "output_tokens", - "results_inter_token_latency_s_quantiles_p50": "TBT_p50", - "results_inter_token_latency_s_quantiles_p90": "TBT_p90", - "results_inter_token_latency_s_quantiles_p99": "TBT_p99", - "results_inter_token_latency_s_mean": "TBT_mean", - "results_ttft_s_quantiles_p50": "TTFT_p50", - "results_ttft_s_quantiles_p90": "TTFT_p90", - "results_ttft_s_quantiles_p99": "TTFT_p99", - "results_ttft_s_mean": "TTFT_mean", - "results_end_to_end_latency_s_quantiles_p50": "E2E_p50", - "results_end_to_end_latency_s_quantiles_p90": "E2E_p90", - "results_end_to_end_latency_s_quantiles_p99": "E2E_p99", - "results_end_to_end_latency_s_mean": "E2E_mean", - } - - rows = [] - for i, json_file in enumerate(json_files): - try: - with open(json_file, 'r', encoding='utf-8') as f: - data = json.load(f) - - hit_rate = case_hit_rate_map.get(i, 0) - mean_output_tokens = data.get("results_number_output_tokens_mean", 0) - num_completed_requests = data.get("results_num_completed_requests", 0) - total_e2e_latency_s = data.get("elapsed_time", 0) - total_generation_time_s = data.get("incremental_time_delay", 0) - - total_throughput = (mean_output_tokens * num_completed_requests / total_e2e_latency_s - if total_e2e_latency_s > 0 else 0.0) - incremental_throughput = (mean_output_tokens * num_completed_requests / total_generation_time_s - if total_generation_time_s > 0 else 0.0) - - row = {new_name: data.get(orig_name) for orig_name, new_name in field_mapping.items()} - row["TPT"] = round(total_throughput, 4) - row["IPT"] = round(incremental_throughput, 4) - row["Hit_Rate"] = hit_rate if hit_rate > 0 else 0.0 - rows.append(row) - except Exception as e: - print(f"[ERROR] 读取 {json_file} 失败: {e}") - - if not rows: - print("[WARN] 无有效数据可导出") - return - - df = pd.DataFrame(rows) - excel_path = results_dir / f"{model}_benchmark.xlsx" - df.to_excel(excel_path, index=False, engine='openpyxl') - - workbook = load_workbook(excel_path) - worksheet = workbook.active - for col in worksheet.columns: - worksheet.column_dimensions[col[0].column_letter].width = 10 - workbook.save(excel_path) - - print(f"[INFO] 已导出汇总结果到: {excel_path},共 {len(rows)} 行数据") - - -def main(): - """ - 主流程入口:读取配置 → 创建结果目录 → 执行所有用例 → 导出报告 - """ - config_file = "uc_test/config.yaml" - print(f"[INFO] 开始读取配置文件: {config_file}") - - try: - with open(config_file, 'r', encoding='utf-8') as f: - config = yaml.safe_load(f) - model = config.get("server_config", {}).get("model", "") - server_url = config.get("server_config", {}).get("server_url", "") - tokenizer_path = config.get("server_config", {}).get("tokenizer_path", "") - test_cases = config.get("test_cases", []) - except Exception as e: - print(f"[ERROR] 解析 YAML 失败: {e}") - sys.exit(1) - - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - timestamp_dir = Path("result_outputs") / timestamp - timestamp_dir.mkdir(parents=True, exist_ok=True) - print(f"[INFO] 创建结果目录: {timestamp_dir}") - - failed_cases, case_hit_rate_map = run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path) - total = len(test_cases) - print(f"\n[INFO] 所有测试完成!成功: {total - len(failed_cases)}/{total}") - if failed_cases: - print(f"[WARN] 失败用例索引: {failed_cases}") - - collect_and_export_results(timestamp_dir, "qwen3", case_hit_rate_map) - - -if __name__ == "__main__": - # 初始化 ray - env_vars = dict(os.environ) - ray.init(runtime_env={"env_vars": env_vars}) - print("[INFO] Ray 初始化完成,开始主流程") - - main() From e858ba19ce6b903dcaa20ddf520ec45c43f3e95c Mon Sep 17 00:00:00 2001 From: paperTII <2293564561@qq.com> Date: Wed, 12 Nov 2025 09:47:59 +0800 Subject: [PATCH 5/5] Adapted to pytest framework Adapted to pytest framework --- test/.gitignore | 4 + test/README.md | 324 ++++++-------- test/README_zh.md | 327 ++++++-------- test/common/allure_utils.py | 196 --------- test/common/capture_utils.py | 95 ++++ test/common/config_utils.py | 14 +- test/common/db_utils.py | 183 ++++++++ test/common/influxdb_utils.py | 58 --- test/common/llmperf/run_inference.py | 91 ++-- test/common/llmperf/utils/token_benchmark.py | 65 ++- test/config.yaml | 49 +-- test/conftest.py | 433 +++++-------------- test/pytest.ini | 7 +- test/requirements.txt | 11 +- test/suites/E2E/test_demo_function.py | 66 +++ test/suites/E2E/test_uc_performance.py | 121 ++++++ test/suites/test_demo_function.py | 185 -------- test/suites/test_uc_performance.py | 159 ------- test/test_uc_connector.py | 14 +- test/test_ucm_dram.py | 250 +++++++++++ 20 files changed, 1226 insertions(+), 1426 deletions(-) delete mode 100644 test/common/allure_utils.py create mode 100644 test/common/capture_utils.py create mode 100644 test/common/db_utils.py delete mode 100644 test/common/influxdb_utils.py create mode 100644 test/suites/E2E/test_demo_function.py create mode 100644 test/suites/E2E/test_uc_performance.py delete mode 100644 test/suites/test_demo_function.py delete mode 100644 test/suites/test_uc_performance.py create mode 100644 test/test_ucm_dram.py diff --git a/test/.gitignore b/test/.gitignore index e6578117..220d21ac 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -1,6 +1,10 @@ reports/ dataset/ logs/ +result_outputs/ +results/ +.cache/ +backup/ $null *__pycache__/ .* diff --git a/test/README.md b/test/README.md index 00aeb064..1e11da7e 100644 --- a/test/README.md +++ b/test/README.md @@ -1,219 +1,179 @@ -# UCM Pytest Testing Framework +# Pytest +[简体中文](README_zh.md) +A comprehensive Pytest testing framework featuring configuration management, database integration, performance testing, and HTML report generation. -A unified cache management testing framework based on pytest, supporting multi-level testing, flexible marking, performance data collection, and beautiful Allure report generation. +## 📋 Features -## Framework Features +- **Modern Testing Framework**: Complete test solution built on Pytest 7.0+ +- **Configuration Management**: YAML-based config with thread-safe singleton pattern +- **Database Integration**: Built-in MySQL support with automatic result storage +- **HTML Reports**: Auto-generated pytest HTML test reports +- **Tagging System**: Multi-dimensional test tags (stage, feature, platform, etc.) -- [x] 🏗️ **Multi-level Testing**: UnitTest(0) → Smoke(1) → Feature(2) → E2E(3) -- [x] 🏷️ **Flexible Marking**: Support for feature tags, platform tags, and reliability tags -- [x] 📊 **Data Collection**: Integrated InfluxDB performance data pushing -- [x] 📋 **Beautiful Reports**: Allure test report integration, supporting both static HTML and dynamic server modes -- [x] 🔧 **Configuration Management**: Flexible YAML-based configuration system -- [x] 🚀 **Automation**: Support for parallel test execution and automatic cleanup - -## Test Level Definitions - -| Level | Name | Description | Execution Time | -|-----|------|------|----------| -| 0 | UnitTest | Unit Tests | Every code commit | -| 1 | Smoke | Smoke Tests | Build verification | -| 2 | Feature | Feature Tests | When features are completed | -| 3 | E2E | End-to-End Tests | Before version release | - -## Directory Structure +## 🗂️ Project Structure ``` -test/ -├── config.yaml # Test framework configuration file -├── conftest.py # pytest configuration and fixtures, main program entry -├── pytest.ini # pytest markers and basic configuration -├── requirements.txt # Dependency package list -├── common/ # Common utility library +pytest_demo/ +├── common/ # Common modules │ ├── __init__.py -│ ├── config_utils.py # Configuration file reading tools -│ ├── influxdb_utils.py # InfluxDB writing tools -│ └── allure_utils.py # Allure reporting tools -├── suites/ # Test case directory -│ ├── UnitTest/ # Unit tests (stage 0) -│ ├── Smoke/ # Smoke tests (stage 1) -│ ├── Feature/ # Feature tests (stage 2) -│ ├── E2E/ # End-to-end tests (stage 3) -│ └── test_demo_function.py# Example test cases -├── reports/ # Test report directory -└── logs/ # Test log directory +│ ├── config_utils.py # Configuration utilities +│ ├── db_utils.py # Database utilities +│ └── capture_utils # Return-value capture utilities +├── results/ # Result storage folder +├── suites/ # Test suites +│ ├── UnitTest # Unit tests +│ ├── Feature # Feature tests +│ └── E2E/ # End-to-end tests +│ └── test_demo_performance.py # Sample test file +├── config.yaml # Main config file +├── conftest.py # Pytest config +├── pytest.ini # Pytest settings +├── requirements.txt # Dependencies +└── README.md # This doc (CN) ``` -## Quick Start +## 🚀 Quick Start -### 1. Environment Setup -```bash -# Install dependencies -pip install -r requirements.txt +### Prerequisites -# Ensure Allure CLI is installed (for report generation) -# Download from: https://github.com/allure-framework/allure2/releases -``` +- Python 3.8+ +- MySQL 5.7+ (optional, for DB features) +- Git -### 2. Configuration File -The main configuration file is `config.yaml`, containing the following configuration items: -- **reports**: Report generation configuration (HTML/Allure) -- **log**: Logging configuration -- **influxdb**: Performance data push configuration -- **llm_connection**: LLM connection configuration +### Installation -### 3. Running Tests -```bash -# Run all tests -pytest +1. **Install dependencies** + ```bash + pip install -r requirements.txt + ``` -# Run specific level tests -pytest --stage=1 # Run smoke tests -pytest --stage=2+ # Run feature and end-to-end tests +2. **Configure database** (optional) -# Run specific tag tests -pytest --feature=performance # Run performance-related tests -pytest --platform=gpu # Run GPU platform tests -pytest --reliability=high # Run high reliability tests + Edit `config.yaml`: + ```yaml + database: + backup: "results/" + host: "127.0.0.1" + port: 3306 + name: "ucm_pytest" + user: "root" + password: "123456" + charset: "utf8mb4" + ``` -# Combined filtering -pytest --stage=1 --feature=performance,accuracy # Performance and accuracy tests in smoke tests -``` +3. **Run tests** + ```bash + # Run all tests + pytest + + # Run tests by tag + pytest --stage=1 + pytest --feature=performance + ``` + +## ⚙️ Configuration -## Test Case Standards +### config.yaml + +Full YAML-based config. Key sections: + +- **reports**: Report settings (HTML, timestamp, etc.) +- **database**: MySQL connection details + +## 🧪 Test Examples + +### Basic functional test -### Basic Structure ```python +# suites/E2E/test_demo_performance.py import pytest -import allure -from common.config_utils import config_utils as config_instance - -class TestExample: - """Test example class""" - - @pytest.mark.stage(2) - @pytest.mark.feature("performance") - @pytest.mark.platform("gpu") - def test_gpu_performance(self): - """Test GPU performance""" - # Arrange - test_data = config_instance.get_config("test_data") - - # Act & Assert - with allure.step("Execute GPU computation"): - result = perform_gpu_calculation(test_data) - assert result.is_valid - - # Collect performance data - from common.influxdb_utils import push_to_influx - push_to_influx("gpu_compute_time", result.duration, { - "test_name": "test_gpu_performance", - "platform": "gpu" - }) -``` -### Marking Usage Guidelines +@pytest.fixture(scope="module", name="calc") +def calculator(): + return Calculator() -#### 1. Level Markers (Required) -```python -@pytest.mark.stage(0) # Unit tests -@pytest.mark.stage(1) # Smoke tests -@pytest.mark.stage(2) # Feature tests -@pytest.mark.stage(3) # End-to-end tests -``` +@pytest.mark.feature("mark") +class TestCalculator: + def test_add(self, calc): + assert calc.add(1, 2) == 3 -#### 2. Feature Markers (Recommended) -```python -@pytest.mark.feature("performance") # Performance tests -@pytest.mark.feature("accuracy") # Accuracy tests -@pytest.mark.feature("memory") # Memory tests + def test_divide_by_zero(self, calc): + with pytest.raises(ZeroDivisionError): + calc.divide(6, 0) ``` -#### 3. Platform Markers (Optional) -```python -@pytest.mark.platform("gpu") # GPU platform tests -@pytest.mark.platform("npu") # NPU platform tests -@pytest.mark.platform("cpu") # CPU platform tests -``` +## 🏷️ Tagging System -#### 4. Reliability Markers (Optional) -```python -@pytest.mark.reliability("high") # High reliability tests -@pytest.mark.reliability("medium") # Medium reliability tests -@pytest.mark.reliability("low") # Low reliability tests -``` +Multi-dimensional tags supported: -## Allure Report Integration +### Stage tags +- `stage(0)`: Unit tests +- `stage(1)`: Smoke tests +- `stage(2)`: Regression tests +- `stage(3)`: Release tests -### 1. Basic Usage -```python -import allure - -@allure.feature('User Authentication') -@allure.story('Login Function') -def test_user_login(): - """Test user login functionality""" - with allure.step("Enter username and password"): - login_page.enter_credentials("user", "pass") - - with allure.step("Click login button"): - login_page.click_login() - - with allure.step("Verify successful login"): - assert dashboard_page.is_displayed() - - # Add attachment - allure.attach("Screenshot data", name="Login Screenshot", - attachment_type=allure.attachment_type.PNG) -``` +### Functional tags +- `feature`: Module tag +- `platform`: Platform tag (GPU/NPU) + +### Usage + +```bash +# Run smoke tests and above +pytest --stage=1+ + +# Run by feature +pytest --feature=performance +pytest --feature=performance,reliability -### 2. Report Configuration -Configure Allure reports in `config.yaml`: -```yaml -reports: - allure: - enabled: true - html_enable: true - serve_mode: true # Use dynamic server mode - serve_host: "localhost" - serve_port: 8081 - directory: "allure-results" +# Run by platform +pytest --platform=gpu ``` -### 3. Report Viewing -- **Static HTML Mode**: Automatically generates static HTML reports after test completion -- **Dynamic Server Mode**: Starts Allure server, providing interactive report interface +### HTML Reports + +Auto-generated timestamped HTML reports: +- Location: `reports/pytest_YYYYMMDD_HHMMSS/report.html` +- Detailed results, errors, timing +- Customizable title & style + +### Database Storage + +If enabled, results are auto-saved to MySQL. +To add new record types, ask DB admin to create tables; otherwise only local files are used. + +Example: +```python +@pytest.mark.feature("capture") # Must be top decorator +@export_vars +def test_capture_mix(): + assert 1 == 1 + return { + '_name': 'demo', + '_data': { + 'length': 10086, # single value + 'accuracy': [0.1, 0.2, 0.3], # list + 'loss': [0.1, 0.2, 0.3], # list + } + } +``` -## Performance Data Collection +### Config Access -### InfluxDB Integration +Read settings easily: ```python -from common.influxdb_utils import push_to_influx - -# Collect performance data in tests -def test_performance_metrics(): - start_time = time.time() - - # Execute test logic - result = perform_operation() - - # Push performance data to InfluxDB - push_to_influx("operation_duration", time.time() - start_time, { - "test_name": "test_performance_metrics", - "operation_type": "calculation", - "success": str(result.success) - }) +from common.config_utils import config_utils +# Get config +db_config = config_utils.get_config("database") +api_config = config_utils.get_nested_config("easyPerf.api") ``` -## Extensions and Customization +## 🛠️ Development Guide -### Adding New Markers -1. Add new marker definitions in the `markers` section of `pytest.ini` -2. Keep the `markers =` and `# end of markers` lines unchanged -3. Re-run tests to use new markers +### Adding New Tests -### Custom Configuration -Customize through `config.yaml`: -- Report format and storage location -- Log level and output format -- InfluxDB connection parameters -- LLM service configuration +1. Create test files under `suites/` categories +2. Apply appropriate tags +3. Naming: `test_*.py` +4. Use fixtures & marks for data management +5. Keep custom marks concise and aligned with overall goals \ No newline at end of file diff --git a/test/README_zh.md b/test/README_zh.md index 56c68815..26b0f393 100644 --- a/test/README_zh.md +++ b/test/README_zh.md @@ -1,227 +1,182 @@ -# UCM Pytest 测试框架 +# Pytest 项目 + Pytest 测试框架,包括配置管理、数据库集成、性能测试和 HTML 报告生成。 -基于pytest的统一缓存管理测试框架,支持多级别测试、灵活标记、性能数据收集和Allure精美报告生成。 +## 📋 项目特性 -## 框架特性 +- **现代化测试框架**: 基于 Pytest 7.0+ 的完整测试解决方案 +- **配置管理**: 支持 YAML 配置文件,线程安全的单例模式配置管理 +- **数据库集成**: 内置 MySQL 数据库支持,自动结果存储 +- **HTML 报告**: 自动生成pytest HTML 测试报告 +- **标记系统**: 支持多维度测试标记(阶段、功能、平台等) -- [x] 🏗️ **多级别测试**: UnitTest(0) → Smoke(1) → Feature(2) → E2E(3) -- [x] 🏷️ **灵活标记**: 支持功能标签、平台标签和可靠性标签 -- [x] 📊 **数据收集**: 集成InfluxDB性能数据推送 -- [x] 📋 **精美报告**: Allure测试报告集成,支持静态HTML和动态服务模式 -- [x] 🔧 **配置管理**: 基于YAML的灵活配置系统 -- [x] 🚀 **自动化**: 支持并行测试执行和自动清理 - -## 测试级别定义 - -| 级别 | 名称 | 说明 | 执行时机 | -|-----|------|------|----------| -| 0 | UnitTest | 单元测试 | 每次代码提交 | -| 1 | Smoke | 冒烟测试 | 构建验证 | -| 2 | Feature | 功能测试 | 特性完成时 | -| 3 | E2E | 端到端测试 | 版本发布前 | - -## 目录结构 +## 🗂️ 项目结构 ``` -test/ -├── config.yaml # 测试框架配置文件 -├── conftest.py # pytest配置和fixtures,程序主入口 -├── pytest.ini # pytest标记和基础配置 -├── requirements.txt # 依赖包列表 -├── common/ # 通用工具库 +pytest_demo/ +├── common/ # 公共模块 │ ├── __init__.py -│ ├── config_utils.py # 配置文件读取工具 -│ ├── influxdb_utils.py # InfluxDB写入工具 -│ └── allure_utils.py # Allure报告工具 -├── suites/ # 测试用例目录 -│ ├── UnitTest/ # 单元测试 (stage 0) -│ ├── Smoke/ # 冒烟测试 (stage 1) -│ ├── Feature/ # 功能测试 (stage 2) -│ ├── E2E/ # 端到端测试 (stage 3) -│ └── test_demo_function.py# 示例测试用例 -├── reports/ # 测试报告目录 -└── logs/ # 日志目录 +│ ├── config_utils.py # 配置管理工具 +│ ├── db_utils.py # 数据库工具 +│ └── capture_utils # 返回值捕获工具 +├── results/ # 结果存储目录 +├── suites/ # 测试套件 +│ ├── UnitTest # 单元测试 +│ ├── Feature # 功能测试 +│ └── E2E/ # 端到端测试 +│ └── test_demo_performance.py # 示例测试文件 +├── config.yaml # 主配置文件 +├── conftest.py # Pytest 配置文件 +├── pytest.ini # Pytest 配置 +├── requirements.txt # 项目依赖 +└── README.md # 本文档 ``` -## 快速开始 +## 🚀 快速开始 -### 1. 环境准备 -```bash -# 安装依赖 -pip install -r requirements.txt +### 环境要求 -# 确保Allure CLI已安装(用于生成报告) -# 下载地址: https://github.com/allure-framework/allure2/releases -``` +- Python 3.8+ +- MySQL 5.7+ (可选,用于数据库功能) +- Git -### 2. 配置文件 -主要配置文件为 `config.yaml`,包含以下配置项: -- **reports**: 报告生成配置(HTML/Allure) -- **log**: 日志配置 -- **influxdb**: 性能数据推送配置 -- **llm_connection**: LLM连接配置 +### 安装步骤 -### 3. 运行测试 -```bash -# 运行所有测试 -pytest +1. **安装依赖** + ```bash + pip install -r requirements.txt + ``` -# 运行特定级别的测试 -pytest --stage=1 # 运行冒烟测试 -pytest --stage=2+ # 运行功能测试和端到端测试 +2. **配置数据库**(可选) -# 运行特定标签的测试 -pytest --feature=performance # 运行性能相关测试 -pytest --platform=gpu # 运行GPU平台测试 -pytest --reliability=high # 运行高可靠性测试 + 编辑 `config.yaml` 文件中的数据库配置: + ```yaml + database: + backup: "results/" + host: "127.0.0.1" + port: 3306 + name: "ucm_pytest" + user: "root" + password: "123456" + charset: "utf8mb4" + ``` -# 组合过滤 -pytest --stage=1 --feature=performance,accuracy # 冒烟测试中的性能和准确性测试 -``` +3. **运行测试** + ```bash + # 运行所有测试 + pytest + + # 运行特定标记的测试 + pytest --stage=1 + pytest --feature=performance + ``` -## 测试用例标准 +## ⚙️ 配置说明 + + +### config.yaml 配置 + +项目支持完整的 YAML 配置管理,主要配置项包括: + +- **reports**: 报告配置(HTML 报告、时间戳等) +- **database**: 数据库连接配置 + +## 🧪 测试示例 + +### 基础功能测试 -### 基本结构 ```python +# suites/E2E/test_demo_performance.py import pytest -import allure -from common.config_utils import config_utils as config_instance - -class TestExample: - """测试示例类""" - - @pytest.mark.stage(2) - @pytest.mark.feature("performance") - @pytest.mark.platform("gpu") - def test_gpu_performance(self): - """测试GPU性能""" - # Arrange - test_data = config_instance.get_config("test_data") - - # Act & Assert - with allure.step("执行GPU计算"): - result = perform_gpu_calculation(test_data) - assert result.is_valid - - # 收集性能数据 - from common.influxdb_utils import push_to_influx - push_to_influx("gpu_compute_time", result.duration, { - "test_name": "test_gpu_performance", - "platform": "gpu" - }) -``` -### 标记使用规范 +@pytest.fixture(scope="module", name="calc") +def calculator(): + return Calculator() -#### 1. 级别标记 (必需) -```python -@pytest.mark.stage(0) # 单元测试 -@pytest.mark.stage(1) # 冒烟测试 -@pytest.mark.stage(2) # 功能测试 -@pytest.mark.stage(3) # 端到端测试 -``` +@pytest.mark.feature("mark") +class TestCalculator: + def test_add(self, calc): + assert calc.add(1, 2) == 3 -#### 2. 功能标记 (推荐) -```python -@pytest.mark.feature("performance") # 性能测试 -@pytest.mark.feature("accuracy") # 准确性测试 -@pytest.mark.feature("memory") # 内存测试 + def test_divide_by_zero(self, calc): + with pytest.raises(ZeroDivisionError): + calc.divide(6, 0) ``` -#### 3. 平台标记 (可选) -```python -@pytest.mark.platform("gpu") # GPU平台测试 -@pytest.mark.platform("npu") # NPU平台测试 -@pytest.mark.platform("cpu") # CPU平台测试 -``` +## 🏷️ 测试标记系统 -#### 4. 可靠性标记 (可选) -```python -@pytest.mark.reliability("high") # 高可靠性测试 -@pytest.mark.reliability("medium") # 中等可靠性测试 -@pytest.mark.reliability("low") # 低可靠性测试 +项目支持多维度的测试标记: + +### 测试阶段标记 +- `stage(0)`: 单元测试 +- `stage(1)`: 冒烟测试 +- `stage(2)`: 回归测试 +- `stage(3)`: 发布测试 + +### 功能标记 +- `feature`: 功能模块标记 +- `platform`: 平台标记(GPU/NPU) + +### 使用示例 + +```bash +# 运行冒烟测试及以上的所有测试 +pytest --stage=1+ + +# 运行特定功能的测试 +pytest --feature=performance +pytest --feature=performance, reliability +# 运行特定平台的测试 +pytest --platform=gpu ``` -## Allure 报告集成 -### 1. 基本用法 +### HTML 报告 + +项目自动生成带时间戳的 HTML 测试报告: +- 报告位置:`reports/pytest_YYYYMMDD_HHMMSS/report.html` +- 包含详细的测试结果、错误信息和执行时间 +- 支持自定义报告标题和样式 + +### 数据库存储 + +如果启用数据库功能,测试结果会自动存储到 MySQL 数据库。 +若需要新增记录,请联系管理人员在数据库新增对应表;否则只能保存至本地文件。 +使用方式示例: ```python -import allure - -@allure.feature('用户认证') -@allure.story('登录功能') -def test_user_login(): - """测试用户登录功能""" - with allure.step("输入用户名和密码"): - login_page.enter_credentials("user", "pass") - - with allure.step("点击登录按钮"): - login_page.click_login() - - with allure.step("验证登录成功"): - assert dashboard_page.is_displayed() - - # 添加附件 - allure.attach("Screenshot data", name="登录截图", - attachment_type=allure.attachment_type.PNG) -``` +@pytest.mark.feature("capture") # pytest 的标签必须在上面,否则无法正常使用标记功能 +@export_vars +def test_capture_mix(): + assert 1 == 1 + return { + '_name': 'demo', + '_data': { + 'length': 10086, # single value + 'accuracy': [0.1, 0.2, 0.3], # list + 'loss': [0.1, 0.2, 0.3], # list + } + } -### 2. 报告配置 -在 `config.yaml` 中配置Allure报告: -```yaml -reports: - allure: - enabled: true - html_enable: true - serve_mode: true # 使用动态服务模式 - serve_host: "localhost" - serve_port: 8081 - directory: "allure-results" ``` -### 3. 报告查看 -- **静态HTML模式**: 测试完成后自动生成静态HTML报告 -- **动态服务模式**: 启动Allure服务器,提供交互式报告界面 -## 性能数据收集 +### 配置管理 -### InfluxDB 集成 +可以通过配置工具便捷读取参数: ```python -from common.influxdb_utils import push_to_influx - -# 在测试中收集性能数据 -def test_performance_metrics(): - start_time = time.time() - - # 执行测试逻辑 - result = perform_operation() - - # 推送性能数据到InfluxDB - push_to_influx("operation_duration", time.time() - start_time, { - "test_name": "test_performance_metrics", - "operation_type": "calculation", - "success": str(result.success) - }) +from common.config_utils import config_utils +# 获取配置 +db_config = config_utils.get_config("database") +api_config = config_utils.get_nested_config("easyPerf.api") ``` -## 扩展和自定义 -### 添加新标记 -1. 在 `pytest.ini` 的 `markers` 部分添加新标记定义 -2. 保持 `markers =` 和 `# end of markers` 两行不变 -3. 重新运行测试即可使用新标记 -### 自定义配置 -通过修改 `config.yaml` 可以自定义: -- 报告格式和存储位置 -- 日志级别和输出格式 -- InfluxDB连接参数 -- LLM服务配置 +## 🛠️ 开发指南 -## 最佳实践 +### 添加新测试 -1. **测试命名**: 使用描述性的测试方法名 -2. **标记使用**: 为每个测试添加适当的级别和功能标记 -3. **步骤分解**: 使用Allure步骤将复杂测试分解为可读的步骤 -4. **数据驱动**: 使用参数化测试减少重复代码 -5. **环境隔离**: 使用fixtures确保测试环境的一致性 +1. 在 `suites/` 目录下的各个分类下创建新的测试文件 +2. 使用适当的测试标记 +3. 遵循命名规范:`test_*.py` +4. 使用 fixture 及mark 进行测试数据管理 +5. 自定义 mark 标签不易过细,应当与整体功能目标相符合 \ No newline at end of file diff --git a/test/common/allure_utils.py b/test/common/allure_utils.py deleted file mode 100644 index 80bbd1d2..00000000 --- a/test/common/allure_utils.py +++ /dev/null @@ -1,196 +0,0 @@ -""" -Allure Report Utility -Provides convenient Allure reporting functionality and decorators -""" - -import allure -import os -import pytest -import subprocess -import shutil -import time -import platform -import sys -from pathlib import Path -from typing import Dict, Any, ContextManager, Optional, Union, List - - - - -def setup_allure(config: Dict[str, Any]) -> Optional[Path]: - """Configure Allure results directory and write environment.properties.""" - allure_cfg = config.get("allure", {}) - if not allure_cfg.get("enabled", False): - return None - - # 1. 沿用你原来的目录逻辑 - base_dir = Path(config.get("base_dir", "reports")) - if config.get("use_timestamp", False) and base_dir.exists(): - timestamp_dirs = [ - d for d in base_dir.iterdir() - if d.is_dir() and d.name.startswith(config.get("directory_prefix", "pytest")) - ] - if timestamp_dirs: - timestamp_dirs.sort(key=lambda x: x.stat().st_mtime, reverse=True) - base_dir = timestamp_dirs[0] - - allure_dir = base_dir / allure_cfg.get("directory", "allure-results") - allure_dir.mkdir(parents=True, exist_ok=True) - os.environ["ALLURE_REPORT_DIR"] = str(allure_dir) - - # 2. 新增:写入环境信息 - env_info = _get_system_info() # 采集系统信息 - custom_env = allure_cfg.get("environment", {}) # 允许用户再追加/覆盖 - env_info.update(custom_env) - _create_environment_properties(allure_dir, env_info) - - return allure_dir - - -def check_allure_available() -> bool: - """Check if Allure CLI is installed and working.""" - try: - allure_path = shutil.which("allure") - if not allure_path: - return False - result = subprocess.run( - [allure_path, "--version"], - capture_output=True, - text=True, - timeout=10, - shell=True - ) - return result.returncode == 0 - except Exception: - return False - - -def serve_allure_report( - allure_results_dir: Union[str, Path], - host: str = "localhost", - port: int = 8080, - auto_open: bool = True -) -> Optional[subprocess.Popen]: - """Start Allure server and optionally open browser.""" - if not check_allure_available(): - print("Allure CLI not found. Install from https://github.com/allure-framework/allure2/releases") - return None - - allure_results_dir = Path(allure_results_dir) - if not allure_results_dir.exists() or not any(allure_results_dir.iterdir()): - print(f"Allure results directory missing or empty: {allure_results_dir}") - return None - - allure_path = shutil.which("allure") - cmd = [allure_path, "serve", str(allure_results_dir), "--host", host] - if port > 0: - cmd.extend(["--port", str(port)]) - - process = subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - bufsize=1, - universal_newlines=True - ) - print(f"Allure server starting at http://{host}:{port} (PID: {process.pid})") - print("Please press Ctrl+C to stop the server") - time.sleep(3) - - if process.poll() is not None: - print("Allure server failed to start") - return None - - try: - while process.poll() is None: - time.sleep(0.5) - except KeyboardInterrupt: - print("\nStopping Allure server...") - process.terminate() - try: - process.wait(timeout=5) - except subprocess.TimeoutExpired: - process.kill() - process.wait() - return process - - -def generate_allure_html( - allure_results_dir: Union[str, Path], - html_output_dir: Optional[Union[str, Path]] = None, - clean: bool = False, - auto_serve: bool = False -) -> Optional[Union[Path, subprocess.Popen]]: - """Generate static HTML report or serve dynamically.""" - if not check_allure_available(): - print("Allure CLI not found. Install from https://github.com/allure-framework/allure2/releases") - return None - - allure_results_dir = Path(allure_results_dir) - if not allure_results_dir.exists() or not any(allure_results_dir.iterdir()): - print(f"Allure results directory missing or empty: {allure_results_dir}") - return None - - if auto_serve: - return serve_allure_report(allure_results_dir) - - html_output_dir = Path(html_output_dir or allure_results_dir.parent / "allure-report") - if clean and html_output_dir.exists(): - shutil.rmtree(html_output_dir) - html_output_dir.mkdir(parents=True, exist_ok=True) - - allure_path = shutil.which("allure") - cmd = f'{allure_path} generate "{allure_results_dir}" -o "{html_output_dir}" --clean' - result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60) - - if result.returncode == 0: - print(f"Allure HTML report generated: {html_output_dir}") - return html_output_dir - else: - print(f"HTML generation failed: {result.stderr}") - return None - - -def _create_environment_properties(allure_results_dir: Union[str, Path], - environment_info: Dict[str, str]) -> None: - allure_results_dir = Path(allure_results_dir) - allure_results_dir.mkdir(parents=True, exist_ok=True) - - env_file = allure_results_dir / "environment.properties" - - with open(env_file, 'w', encoding='utf-8') as f: - for key, value in environment_info.items(): - f.write(f"{key}={value}\n") - - print(f"Environment properties file created: {env_file}") - - -def _get_system_info() -> Dict[str, str]: - """Human-readable system information (English only).""" - info: Dict[str, str] = {} - - # ---------- OS ---------- - os_name = platform.system() - info["OS"] = os_name - - # ---------- Architecture ---------- - arch = platform.architecture()[0] # '64bit' / '32bit' - info["Architecture"] = "64-bit" if "64" in arch else "32-bit" - - # ---------- Python ---------- - # info["Python Implementation"] = platform.python_implementation() - info["Python"] = sys.version.split()[0].replace("Version=", "") - - # ---------- Hardware ---------- - machine = platform.machine() - info["Machine"] = "x86-64" if machine == "AMD64" else machine - proc = platform.processor() - if "Intel" in proc: - info["Processor"] = "Intel" - elif "AMD" in proc: - info["Processor"] = "AMD" - else: - info["Processor"] = proc.split()[0] if proc else "Kunpeng" - - return info \ No newline at end of file diff --git a/test/common/capture_utils.py b/test/common/capture_utils.py new file mode 100644 index 00000000..ee12ed2a --- /dev/null +++ b/test/common/capture_utils.py @@ -0,0 +1,95 @@ +from typing import Any, Dict, List + +from common.db_utils import write_to_db + + +def _align_and_split(name: str, data: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Align a mixed data package (single values and/or lists) and split it into + """ + if not data: + return [] + + aligned: Dict[str, List[Any]] = {} + lengths: Dict[str, int] = {} + for k, v in data.items(): + if isinstance(v, (list, tuple)): + aligned[k] = list(v) + else: + aligned[k] = [v] + lengths[k] = len(aligned[k]) + + max_len = max(lengths.values()) + + for k, lst in aligned.items(): + if len(lst) < max_len: + lst.extend([lst[-1]] * (max_len - len(lst))) + + return [{k: aligned[k][i] for k in aligned} for i in range(max_len)] + + +def post_process(table_name: str, **kwargs) -> List[Dict[str, Any]]: + """ + Unified post-processing entry point. Supports two calling styles: + """ + results = [] + if "_data" in kwargs: + name = kwargs.get("_name", table_name) + results = _align_and_split(name, kwargs["_data"]) + for result in results: + write_to_db(name, result) + return results + return [] + + +# ---------------- decorator ---------------- +def export_vars(func): + def wrapper(*args, **kwargs): + result = func(*args, **kwargs) + # If the function returns a dict containing '_data' or 'data', post-process it + if isinstance(result, dict): + if "_data" in result or "data" in result: + return post_process(func.__name__, **result) + # Otherwise return unchanged + return result + + return wrapper + + +# ---------------- usage examples ---------------- +@export_vars +def capture(): + """All single values via 'name' + 'data'""" + return {"name": "demo", "_data": {"accuracy": 0.1, "loss": 0.3}} + + +@export_vars +def capture_list(): + """All lists via '_name' + '_data'""" + return { + "_name": "demo", + "_data": { + "accuracy": [0.1, 0.2, 0.3], + "loss": [0.1, 0.2, 0.3], + }, + } + + +@export_vars +def capture_mix(): + """Mixed single + lists via '_name' + '_data'""" + return { + "_name": "demo", + "_data": { + "length": 10086, # single value + "accuracy": [0.1, 0.2, 0.3], # list + "loss": [0.1, 0.2, 0.3], # list + }, + } + + +# quick test +if __name__ == "__main__": + print("capture(): ", capture()) + print("capture_list(): ", capture_list()) + print("capture_mix(): ", capture_mix()) diff --git a/test/common/config_utils.py b/test/common/config_utils.py index 3cdc427b..106f783e 100644 --- a/test/common/config_utils.py +++ b/test/common/config_utils.py @@ -1,7 +1,8 @@ -import yaml import os import threading -from typing import Dict, Any +from typing import Any, Dict + +import yaml class ConfigUtils: @@ -13,6 +14,9 @@ class ConfigUtils: _instance = None _lock = threading.Lock() # Ensure thread-safe singleton creation + def __init__(self): + self._config = None + def __new__(cls, config_file: str = None): # Double-checked locking if cls._instance is None: @@ -76,5 +80,7 @@ def get_nested_config(self, key_path: str, default: Any = None) -> Any: config_utils = ConfigUtils() if __name__ == "__main__": - print("InfluxDB config:", config_utils.get_config("influxdb")) - print("InfluxDB host:", config_utils.get_nested_config("influxdb.host", "localhost")) + print("DataBase config:", config_utils.get_config("database")) + print( + "DataBase host:", config_utils.get_nested_config("database.host", "localhost") + ) diff --git a/test/common/db_utils.py b/test/common/db_utils.py new file mode 100644 index 00000000..089af43b --- /dev/null +++ b/test/common/db_utils.py @@ -0,0 +1,183 @@ +import json +import logging +import threading +from pathlib import Path +from typing import Any, Dict, Optional + +import peewee +from common.config_utils import config_utils as config_instance +from peewee import AutoField, Model, MySQLDatabase, TextField + +logger = logging.getLogger("db_handler") +logger.setLevel(logging.DEBUG) + +# Avoid adding handlers multiple times +if not logger.handlers: + logger.setLevel(logging.DEBUG) + +# Global DB instance and lock for thread-safe singleton +_db_instance: Optional[MySQLDatabase] = None +_db_lock = threading.Lock() +_test_build_id: Optional[str] = None +_backup_path: Optional[Path] = None +_db_enabled: bool = False # from config + + +def _get_db() -> Optional[MySQLDatabase]: + """Return a singleton MySQLDatabase instance based on YAML configuration.""" + global _db_instance, _backup_path, _db_enabled + + if _db_instance is None: + with _db_lock: + if _db_instance is None: + db_config = config_instance.get_config("database", {}) + _db_enabled = db_config.get("enabled", False) + + backup_str = db_config.get("backup", "results/") + _backup_path = Path(backup_str).resolve() + _backup_path.mkdir(parents=True, exist_ok=True) + logger.info(f"Backup directory set to: {_backup_path}") + + if not _db_enabled: + return None + + try: + _db_instance = MySQLDatabase( + db_config.get("name", "test_db"), + user=db_config.get("user", "root"), + password=db_config.get("password", ""), + host=db_config.get("host", "localhost"), + port=db_config.get("port", 3306), + charset=db_config.get("charset", "utf8mb4"), + ) + logger.info( + f"Database instance created for: {_db_instance.database}" + ) + except Exception as e: + logger.error(f"Failed to create database instance: {e}") + _db_instance = None + + return _db_instance + + +def _set_test_build_id(build_id: Optional[str] = None) -> None: + """Set or generate a unique test build ID.""" + global _test_build_id + _test_build_id = build_id or "default_build_id" + logger.debug(f"Test build ID set to: {_test_build_id}") + + +def _get_test_build_id() -> str: + """Return the current test build ID, generating one if necessary.""" + global _test_build_id + if _test_build_id is None: + _set_test_build_id() + return _test_build_id + + +class BaseEntity(Model): + """Base PeeWee model class using the singleton database.""" + + class Meta: + database = _get_db() + + +def _backup_to_file(table_name: str, data: Dict[str, Any]) -> None: + """Write data to a JSON Lines (.jsonl) file in the backup directory.""" + if not _backup_path: + logger.warning("Backup path is not set. Skipping backup.") + return + + file_path = _backup_path / f"{table_name}.jsonl" + try: + file_path.parent.mkdir(parents=True, exist_ok=True) + with file_path.open("a", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False) + f.write("\n") + logger.info(f"Data backed up to {file_path}") + except Exception as e: + logger.error(f"Failed to write backup file {file_path}: {e}") + + +def write_to_db(table_name: str, data: Dict[str, Any]) -> bool: + """ + Attempt to insert data into the specified database table. + If the table doesn't exist or an error occurs, back up to a JSONL file. + """ + db = _get_db() + data["test_build_id"] = _get_test_build_id() + + # Skip DB entirely if disabled + if not _db_enabled or db is None: + _backup_to_file(table_name, data) + return False + + try: + if not db.table_exists(table_name): + logger.warning(f"Table '{table_name}' does not exist. Writing to backup.") + _backup_to_file(table_name, data) + return False + + # Get existing columns and filter data + columns = db.get_columns(table_name) + col_names = {col.name for col in columns} + filtered_data = {k: v for k, v in data.items() if k in col_names} + + # Build dynamic model for insertion + fields = {"id": AutoField()} + for col in columns: + if col.name != "id": + fields[col.name] = TextField(null=True) + + DynamicEntity = type( + f"{table_name.capitalize()}DynamicModel", + (BaseEntity,), + { + "Meta": type("Meta", (), {"database": db, "table_name": table_name}), + **fields, + }, + ) + + with db.atomic(): + DynamicEntity.insert(filtered_data).execute() + logger.info(f"Successfully inserted data into table '{table_name}'.") + return True + + except peewee.PeeweeException as e: + logger.error( + f"Database write error for table '{table_name}': {e}", exc_info=True + ) + except Exception as e: + logger.critical( + f"Unexpected error during DB write for '{table_name}': {e}", exc_info=True + ) + + # Fallback to backup on any failure + _backup_to_file(table_name, data) + return False + + +def database_connection(build_id: str) -> None: + """Test database connection and set the build ID.""" + logger.info(f"Setting test build ID: {build_id}") + _set_test_build_id(build_id) + + db = _get_db() + if not _db_enabled: + logger.info("Database connection skipped because enabled=false.") + return + + if db is None: + logger.error("No database instance available.") + return + + logger.info(f"Attempting connection to database: {db.database}") + try: + db.connect(reuse_if_open=True) + logger.info("Database connection successful.") + except Exception as e: + logger.error(f"Database connection failed: {e}", exc_info=True) + finally: + if not db.is_closed(): + db.close() + logger.debug("Database connection closed.") diff --git a/test/common/influxdb_utils.py b/test/common/influxdb_utils.py deleted file mode 100644 index 5d564061..00000000 --- a/test/common/influxdb_utils.py +++ /dev/null @@ -1,58 +0,0 @@ -""" -InfluxDB Data Push Utility -Provides convenient InfluxDB data writing functionality -""" - -from datetime import datetime -from typing import Dict, Any, Optional, Union -from influxdb_client import InfluxDBClient, Point, WritePrecision -from influxdb_client.client.write_api import SYNCHRONOUS -from config_utils import config_utils as config_instance - -class InfluxDBUtils: - """InfluxDB Utility Class""" - - def __init__(self): - """Initialize InfluxDB connection""" - self.config = config_instance.get_config("influxdb") - - -# Global InfluxDB utility instance -influxdb_utils = InfluxDBUtils() - - -def push_to_influx(measurement: str, - value: Union[int, float, str], - tags: Optional[Dict[str, str]] = None, - fields: Optional[Dict[str, Union[int, float, str]]] = None, - timestamp: Optional[datetime] = None) -> bool: - - return None - - -def push_test_metric(test_name: str, - metric_name: str, - value: Union[int, float], - additional_tags: Optional[Dict[str, str]] = None) -> bool: - print("Push to InfluxDB, To be implemented.") - - -if __name__ == "__main__": - # Simple data push - push_to_influx("response_time", 0.123) - - # Data push with tags - push_to_influx("accuracy", 0.95, { - "model": "v1.0", - "platform": "gpu", - "test_case": "classification" - }) - - # Test metric push - push_test_metric("test_calculation_accuracy", "calculation_time", 0.001, { - "feature": "accuracy" - }) - - # Data push with timestamp - from datetime import datetime - push_to_influx("memory_usage", 1024, {"test": "memory"}, timestamp=datetime.now()) \ No newline at end of file diff --git a/test/common/llmperf/run_inference.py b/test/common/llmperf/run_inference.py index 801163de..661f74b1 100644 --- a/test/common/llmperf/run_inference.py +++ b/test/common/llmperf/run_inference.py @@ -21,9 +21,9 @@ def run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path) tokenizer_path— Path to the tokenizer Returns: failed_cases — List of failed case indices - case_hit_rate_map — Mapping of {case_idx: hit_rate} """ print(f"[INFO] Total {len(test_cases)} test cases to be executed") + all_summaries = [] failed_case = [] # Clear proxy environment variables @@ -31,14 +31,12 @@ def run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path) env.pop('http_proxy', None) env.pop('https_proxy', None) - # Store hit_rate for each case_idx (to export to Excel later) - case_hit_rate_map = {} - for i, case in enumerate(test_cases): print(f"\n>>> Executing test case {i + 1} <<<") reset_prefill_cache(env, server_url) # Use a fixed random_seed for each test to control PC hit_rate random_seed = random.randint(1, 100000) + summary = {} # Read parameters from configuration file mean_input = case.get("mean_input_tokens", 5000) @@ -46,23 +44,21 @@ def run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path) mean_output = case.get("mean_output_tokens", 1000) stddev_output = case.get("stddev_output_tokens", 0) max_completed = case.get("max_num_completed_requests", 1) - concurrent = case.get("num_concurrent_requests", 1) + concurrent = case.get("concurrent_requests", 1) llm_api = case.get("llm_api", "openai") additional_sampling_params = case.get("additional_sampling_params", "{}") timeout = case.get("timeout", 60000) hit_rate = case.get("hit_rate", 0) - # Record hit_rate for this case - case_hit_rate_map[i] = hit_rate try: # Determine if two runs are needed (PC hit_rate test) if hit_rate == 0: - run_token_benchmark( + summary = run_token_benchmark( llm_api=llm_api, model=model, test_timeout_s=timeout, max_num_completed_requests=max_completed, - num_concurrent_requests=concurrent, + concurrent_requests=concurrent, mean_input_tokens=mean_input, stddev_input_tokens=stddev_input, mean_output_tokens=mean_output, @@ -75,7 +71,7 @@ def run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path) user_metadata={"case_idx": i} ) else: - print("[INFO] hit_rate > 0 detected, entering prefill mode") + print(f"[INFO] hit_rate > 0 detected, entering prefill mode, PC hit rate: {hit_rate} %") # hit_rate > 0: first prefill mode prefill_mean_input = int(mean_input * hit_rate / 100) print(f"[INFO] Prefill execution: mean_input_tokens={prefill_mean_input}") @@ -84,7 +80,7 @@ def run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path) model=model, test_timeout_s=timeout, max_num_completed_requests=max_completed, - num_concurrent_requests=concurrent, + concurrent_requests=concurrent, mean_input_tokens=prefill_mean_input, stddev_input_tokens=stddev_input, mean_output_tokens=2, @@ -98,12 +94,12 @@ def run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path) ) # Then run normal mode print("[INFO] Prefill completed, switching to normal mode execution") - run_token_benchmark( + summary = run_token_benchmark( llm_api=llm_api, model=model, test_timeout_s=timeout, max_num_completed_requests=max_completed, - num_concurrent_requests=concurrent, + concurrent_requests=concurrent, mean_input_tokens=mean_input, stddev_input_tokens=stddev_input, mean_output_tokens=mean_output, @@ -115,55 +111,30 @@ def run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path) tokenizer_path=tokenizer_path, user_metadata={"case_idx": i, "phase": "normal"} ) + all_summaries.append(summary) except Exception as e: failed_case.append(i) - return failed_case, case_hit_rate_map - -def getResult(performance_name: str): - results_dir = Path("result_outputs") - matched_values: List[Dict[str, Any]] = [] - for idx, fname in enumerate(os.listdir(results_dir)): - if not fname.lower().endswith(".json"): - continue - - file_path = os.path.join(results_dir, fname) - try: - with open(file_path, "r", encoding="utf-8") as f: - data = json.load(f) - except Exception as e: - print(f"[ERROR] Failed to read {file_path}: {e}") - continue - - # Iterate over each key in the dictionary - for key, value in data.items(): - if isinstance(key, str) and performance_name.lower() in key.lower(): - matched_values.append(value) + return all_summaries, failed_case - print(f"[INFO] Found {len(matched_values)} matching values under {results_dir}, substring = '{performance_name}'") - return matched_values - -def inference_results(performance_name: str): +def inference_results(): config_file = Path(__file__).parent.parent.parent / "config.yaml" - results_dir = Path("result_outputs") - if os.path.exists(results_dir) and len(os.listdir(results_dir)) != 0: - print("Test results already exist!!!!!!!!!!!!!!!") - else: - print("[INFO] Initialization complete, starting main process") - print(f"[INFO] Reading configuration file: {config_file}") - with open(config_file, 'r', encoding='utf-8') as f: - config = yaml.safe_load(f) - model = config.get("llm_connection", {}).get("model", "") - server_url = config.get("llm_connection", {}).get("server_url", "") - tokenizer_path = config.get("llm_connection", {}).get("tokenizer_path", "") - test_cases = config.get("llmperf_test_cases", []) - timestamp_dir = Path("result_outputs") - timestamp_dir.mkdir(parents=True, exist_ok=True) - print(f"[INFO] Created results directory: {timestamp_dir}") - - failed_cases, case_hit_rate_map = run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path) - total = len(test_cases) - print(f"\n[INFO] All tests completed! Success: {total - len(failed_cases)}/{total}") - if failed_cases: - print(f"[WARN] Failed case indices: {failed_cases}") - return getResult(performance_name) \ No newline at end of file + all_smmaries = {} + print("[INFO] Initialization complete, starting main process") + print(f"[INFO] Reading configuration file: {config_file}") + with open(config_file, 'r', encoding='utf-8') as f: + config = yaml.safe_load(f) + model = config.get("llm_connection", {}).get("model", "") + server_url = config.get("llm_connection", {}).get("server_url", "") + tokenizer_path = config.get("llm_connection", {}).get("tokenizer_path", "") + test_cases = config.get("llmperf_test_cases", []) + timestamp_dir = Path("results") + timestamp_dir.mkdir(parents=True, exist_ok=True) + print(f"[INFO] Created results directory: {timestamp_dir}") + + all_summaries, failed_cases = run_test_cases(test_cases, timestamp_dir, model, server_url, tokenizer_path) + total = len(test_cases) + print(f"\n[INFO] All tests completed! Success: {total - len(failed_cases)}/{total}") + if failed_cases: + print(f"[WARN] Failed case indices: {failed_cases}") + return all_summaries \ No newline at end of file diff --git a/test/common/llmperf/utils/token_benchmark.py b/test/common/llmperf/utils/token_benchmark.py index 5f514267..2b714109 100644 --- a/test/common/llmperf/utils/token_benchmark.py +++ b/test/common/llmperf/utils/token_benchmark.py @@ -10,7 +10,6 @@ import pandas as pd - from transformers import AutoTokenizer from common.llmperf.utils import common_metrics @@ -29,7 +28,7 @@ def get_token_throughput_latencies( mean_output_tokens: int, stddev_output_tokens: int, additional_sampling_params: Optional[Dict[str, Any]] = None, - num_concurrent_requests: int = 1, + concurrent_requests: int = 1, max_num_completed_requests: int = 500, test_timeout_s=90, llm_api="openai", @@ -47,7 +46,7 @@ def get_token_throughput_latencies( stddev_output_tokens: The standard deviation of the number of tokens to generate per request. additional_sampling_params: Additional sampling parameters to send with the request. For more information see the LLM APIs documentation for the completions - num_concurrent_requests: The number of concurrent requests to make. Increase + concurrent_requests: The number of concurrent requests to make. Increase this to increase the amount of load and vice versa. test_timeout_s: The amount of time to run the test for before reporting results. llm_api: The name of the llm api to use. Either "openai" or "litellm". @@ -84,7 +83,7 @@ def get_token_throughput_latencies( futures = [] # 2. Submitting tasks using a thread pool - with ThreadPoolExecutor(max_workers=num_concurrent_requests) as executor: + with ThreadPoolExecutor(max_workers=concurrent_requests) as executor: for idx in range(max_num_completed_requests): sampling = {"max_tokens": num_output_tokens_list[idx]} sampling.update(additional_sampling_params) @@ -135,7 +134,7 @@ def get_token_throughput_latencies( "stddev_input_tokens": stddev_input_tokens, "mean_output_tokens": mean_output_tokens, "stddev_output_tokens": stddev_output_tokens, - "num_concurrent_requests": num_concurrent_requests, + "concurrent_requests": concurrent_requests, "additional_sampling_params": additional_sampling_params, } @@ -144,6 +143,36 @@ def get_token_throughput_latencies( return metadata, completed_requests, elapsed_time, incremental_time_delay +def compute_throughput(summary: Dict[str, Any], + completed_requests: List[Dict[str, Any]], + elapsed_time: float, + incremental_time_delay: float) -> Tuple[float, float]: + """ + Compute total_throughput (token/s) based on the metrics in summary. + + Formula: (mean_output_tokens * num_completed_requests) / total_e2e_latency_s + + Args: + summary (Dict[str, Any]): A dictionary containing performance metrics. + + Returns: + float: The computed total throughput in tokens per second. Returns 0.0 if latency is zero. + """ + mean_output_tokens = summary.get("mean_output_tokens", 0) + + total_throughput = ( + (mean_output_tokens * len(completed_requests)) / elapsed_time + if elapsed_time > 0 + else 0.0 + ) + incremental_throughput = ( + (mean_output_tokens * len(completed_requests)) / incremental_time_delay + if incremental_time_delay > 0 + else 0.0 + ) + return round(total_throughput, 4), round(incremental_throughput, 4) + + def metrics_summary( metrics: List[Dict[str, Any]], start_time: int, end_time: int ) -> Dict[str, Any]: @@ -191,6 +220,7 @@ def flatten(item): print(key) ret[key] = {} series = pd.Series(list(flatten(df_without_errored_req[key]))).dropna() + series = series[series > 0] # Calculate non-zero values quantiles = series.quantile([0.25, 0.5, 0.75, 0.9, 0.95, 0.99]).to_dict() quantiles_reformatted_keys = {} for quantile, value in quantiles.items(): @@ -247,7 +277,7 @@ def run_token_benchmark( model: str, test_timeout_s: int, max_num_completed_requests: int, - num_concurrent_requests: int, + concurrent_requests: int, mean_input_tokens: int, stddev_input_tokens: int, mean_output_tokens: int, @@ -265,7 +295,7 @@ def run_token_benchmark( model: The name of the model to query. max_num_completed_requests: The number of requests to complete before finishing the test. test_timeout_s: The amount of time to run the test for before reporting results. - num_concurrent_requests: The number of concurrent requests to make. Increase + concurrent_requests: The number of concurrent requests to make. Increase this to increase the amount of load and vice versa. mean_input_tokens: The mean number of tokens to send in the prompt for the request. stddev_input_tokens: The standard deviation of the number of tokens to send in the prompt for the request. @@ -282,7 +312,7 @@ def run_token_benchmark( " because of the prompting logic right now" ) - summary, individual_responses, elapsed_time, incremental_time_delay = get_token_throughput_latencies( + summary, completed_requests, elapsed_time, incremental_time_delay = get_token_throughput_latencies( model=model, llm_api=llm_api, test_timeout_s=test_timeout_s, @@ -291,14 +321,14 @@ def run_token_benchmark( stddev_input_tokens=stddev_input_tokens, mean_output_tokens=mean_output_tokens, stddev_output_tokens=stddev_output_tokens, - num_concurrent_requests=num_concurrent_requests, + concurrent_requests=concurrent_requests, additional_sampling_params=json.loads(additional_sampling_params), random_seed=random_seed, openai_api_base=openai_api_base, tokenizer_path=tokenizer_path, ) if mean_output_tokens == 2: - return summary, individual_responses, elapsed_time, incremental_time_delay + return summary, completed_requests, elapsed_time, incremental_time_delay timestamp = int(time.time() * 1000) if results_dir: @@ -309,8 +339,12 @@ def run_token_benchmark( # Update to metadata. summary.update(user_metadata) + total_tp, req_tp = compute_throughput(summary, completed_requests, elapsed_time, incremental_time_delay) + summary["num_completed_requests"] = len(completed_requests) summary["elapsed_time"] = elapsed_time summary["incremental_time_delay"] = incremental_time_delay + summary["total_throughput"] = total_tp + summary["incremental_throughput"] = req_tp results = LLMPerfResults(name=summary_filename, metadata=summary) results_dir = Path(results_dir) @@ -319,9 +353,16 @@ def run_token_benchmark( elif not results_dir.is_dir(): raise ValueError(f"{results_dir} is not a directory") + llmperf_dir = results_dir / "llmperf" + if not llmperf_dir.exists(): + llmperf_dir.mkdir(parents=True) + elif not llmperf_dir.is_dir(): + raise ValueError(f"{llmperf_dir} is not a directory") + try: - with open(results_dir / f"{summary_filename}.json", "w") as f: + with open(llmperf_dir / f"{summary_filename}.json", "w") as f: json.dump(results.to_dict(), f, indent=4, default=str) except Exception as e: print(results.to_dict()) - raise e \ No newline at end of file + raise e + return summary \ No newline at end of file diff --git a/test/config.yaml b/test/config.yaml index df1bb6a7..766cfeb6 100644 --- a/test/config.yaml +++ b/test/config.yaml @@ -1,50 +1,33 @@ reports: - base_dir: "reports" + base_dir: "results/reports" use_timestamp: true directory_prefix: "pytest" html: # pytest-html - enabled: false + enabled: true filename: "report.html" title: "UCM Pytest Test Report" - allure: - enabled: true - html_enable: true - serve_mode: true # 使用allure serve mode - serve_host: "localhost" - serve_port: 8081 - directory: "allure-results" - -log: - enabled: true - path: "logs" - filename: "pytest.log" - use_timestamp: false -# InfluxDB Configuration -influxdb: - host: localhost - port: 8086 - token: your-influxdb-token-here - org: your-organization - bucket: test-metrics - timeout: 10 +database: + backup: "results/" + enabled: true + host: "127.0.0.1" + port: 3306 + name: "ucm_pytest" + user: "root" + password: "123456" + charset: "utf8mb4" # LLM Connection Configuration llm_connection: model: "qwen3" server_url: "http://141.111.32.70:9382" tokenizer_path: "/home/models/QwQ-32B" + # Performance Test Configuration llmperf_test_cases: - - mean_input_tokens: 600 - mean_output_tokens: 300 - max_num_completed_requests: 1 - num_concurrent_requests: 1 - additional_sampling_params: "{}" - hit_rate: 0 - - mean_input_tokens: 600 + - mean_input_tokens: 6000 mean_output_tokens: 200 - max_num_completed_requests: 3 - num_concurrent_requests: 1 + max_num_completed_requests: 16 + concurrent_requests: 8 additional_sampling_params: "{}" - hit_rate: 0 + hit_rate: 0 \ No newline at end of file diff --git a/test/conftest.py b/test/conftest.py index 65ace924..15025795 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,79 +1,71 @@ from __future__ import annotations -import logging -from math import log -import shutil -import sys -import re -import pytest -import tempfile + import datetime as dt import platform as pf +import sys +from functools import wraps from pathlib import Path -from typing import Dict, Any, List -from common.config_utils import config_utils as config_instance -from common.allure_utils import setup_allure, generate_allure_html, serve_allure_report +import pytest +from common.config_utils import config_utils as config_instance +from common.db_utils import database_connection, write_to_db # ---------------- Constants ---------------- PRJ_ROOT = Path(__file__).resolve().parent -REPORT_DIR = PRJ_ROOT / "reports" sys.path.insert(0, str(PRJ_ROOT)) -# Global variables for Allure configuration -ALLURE_DIR = None -ALLURE_CONFIG = None - - -# ---------------- Logging ---------------- -# TODO:Unified log -def _init_logger() -> logging.Logger: - """Initialize and configure test logger.""" - log_config = config_instance.get_config("log", {}) - if not log_config.get("enabled", True): - return logging.getLogger("UCM_TEST") - - log = logging.getLogger("UCM_TEST") - log.setLevel(logging.DEBUG) - log.handlers.clear() - - log_path = Path(log_config.get("path", "logs")) - log_path.mkdir(parents=True, exist_ok=True) - filename = config_instance.get_nested_config("log.filename", "pytest.log") - use_timestamp = config_instance.get_nested_config("log.use_timestamp", True) - if use_timestamp: - ts = dt.datetime.now().strftime("%Y%m%d-%H%M%S") - stem, ext = Path(filename).stem, Path(filename).suffix - filename = f"{stem}_{ts}{ext}" +# ---------------- CLI Options ---------------- +def pytest_addoption(parser): + parser.addoption( + "--stage", action="store", default="", help="Filter by stage marker (1,2,3,+)" + ) + parser.addoption( + "--feature", action="store", default="", help="Filter by feature marker" + ) + parser.addoption( + "--platform", action="store", default="", help="Filter by platform marker" + ) - log_file = log_path / filename - # Common formatter - console_fmt = logging.Formatter("[%(levelname)s] %(name)s: %(message)s") +# ---------------- Test Filtering ---------------- +def pytest_collection_modifyitems(config, items): + kept = items[:] - # File handler - fh = logging.FileHandler(log_file, encoding="utf-8") - fh.setLevel(logging.INFO) - fh.setFormatter(console_fmt) - log.addHandler(fh) + markers = [m.split(":", 1)[0].strip() for m in config.getini("markers")] + for name in markers: + opt = config.getoption(f"--{name}", "").strip() + if not opt: + continue - # Console handler - ch = logging.StreamHandler() - ch.setLevel(logging.INFO) - ch.setFormatter(console_fmt) - log.addHandler(ch) + if name == "stage" and opt.endswith("+"): + min_stage = int(opt[:-1]) + kept = [ + it + for it in kept + if any(int(v) >= min_stage for v in _get_marker_args(it, "stage")) + ] + else: + wanted = {x.strip() for x in opt.split(",") if x.strip()} + kept = [ + it + for it in kept + if any(v in wanted for v in _get_marker_args(it, name)) + ] - log.propagate = False - return log + config.hook.pytest_deselected(items=[i for i in items if i not in kept]) + items[:] = kept -logger = _init_logger() -reports_config = config_instance.get_config("reports") +def _get_marker_args(item, marker_name): + """Extract only args (not kwargs) from markers, as strings.""" + return [ + str(arg) for mark in item.iter_markers(name=marker_name) for arg in mark.args + ] -# ---------------- pytest Hooks ---------------- +# ---------------- Report Setup ---------------- def _prepare_report_dir(config: pytest.Config) -> Path: - """Prepare report directory based on config.yaml.""" cfg = config_instance.get_config("reports", {}) base_dir = Path(cfg.get("base_dir", "reports")) prefix = cfg.get("directory_prefix", "pytest") @@ -87,302 +79,81 @@ def _prepare_report_dir(config: pytest.Config) -> Path: def _setup_html_report(config: pytest.Config, report_dir: Path) -> None: - """Configure pytest-html if enabled.""" + reports_config = config_instance.get_config("reports", {}) html_cfg = reports_config.get("html", {}) if not html_cfg.get("enabled", True): if hasattr(config.option, "htmlpath"): config.option.htmlpath = None - logger.info("HTML report disabled according to config.yaml") + print("HTML report disabled according to config.yaml") return html_filename = html_cfg.get("filename", "report.html") - html_path = report_dir / html_filename - config.option.htmlpath = str(html_path) + config.option.htmlpath = str(report_dir / html_filename) config.option.self_contained_html = True - logger.info(f"HTML report enabled → {html_path}") + print("HTML report enabled") -def pytest_configure(config: pytest.Config) -> None: - """Pytest entry hook: configure logging and reports.""" - logger.info(f"Starting Test Session: {dt.datetime.now():%Y-%m-%d %H:%M:%S}") - global REPORT_DIR, ALLURE_DIR, ALLURE_CONFIG - REPORT_DIR = _prepare_report_dir(config) - _setup_html_report(config, REPORT_DIR) - reports_cfg = config_instance.get_config("reports", {}) - - # Save Allure configuration globally - ALLURE_CONFIG = reports_cfg - allure_dir = setup_allure(reports_cfg) - ALLURE_DIR = allure_dir - - # Configure allure-pytest plugin if enabled - if allure_dir: - # Set allure results directory for pytest-allure plugin - if hasattr(config.option, 'allure_report_dir'): - config.option.allure_report_dir = str(allure_dir) - # Also set as environment variable - import os - os.environ["ALLURE_REPORT_DIR"] = str(allure_dir) - logger.info(f"Allure results will be stored at {allure_dir}") - else: - logger.info("Allure report disabled according to config.yaml") +# ---------------- Build ID & Session Init ---------------- +def _generate_build_id(config: pytest.Config) -> str: + ts = dt.datetime.now().strftime("%Y-%m-%d_%H:%M:%S") + cli_parts = [] + markers = [m.split(":", 1)[0].strip() for m in config.getini("markers")] + for opt in markers: + val = config.getoption(opt, "") + if val: + cli_parts.append(f"{opt}={val}") + args_part = "_".join(cli_parts) if cli_parts else "all_cases" + return f"pytest_{ts}_{args_part}" -# ---------------- Marker & Filter Logic ---------------- -def _load_markers_from_ini() -> Dict[str, Dict[str, Any]]: - """Parse pytest.ini markers section.""" - ini_path = Path(__file__).with_name("pytest.ini") - if not ini_path.exists(): - return {} +# ---------------- Pytest Hooks ---------------- +def pytest_configure(config: pytest.Config) -> None: + """The global configuration will be executed directly upon entering pytest.""" + print(f"Starting Test Session: {dt.datetime.now():%Y-%m-%d %H:%M:%S}") - markers: Dict[str, Dict[str, Any]] = {} - in_markers = False + # Set up report directory + report_dir = _prepare_report_dir(config) + config._report_dir = report_dir # Attach to config for later use + _setup_html_report(config, report_dir) - for raw in ini_path.read_text(encoding="utf-8").splitlines(): - line = raw.strip() - if line.startswith("markers"): - in_markers = True - continue - if not in_markers or not line or line.startswith("#"): - continue - if line == "# end of markers": - break - - m = re.match(r"(\w+)(?:\((\w+)\))?\s*:\s*(.+)", line) - if m: - name, arg, help_txt = m.groups() - markers[name] = {"name": name, "arg": arg, "help": help_txt.strip()} - return markers - - -_MARKER_DEFS = _load_markers_from_ini() - - -def pytest_addoption(parser: pytest.Parser) -> None: - """Add CLI options dynamically from marker definitions.""" - for info in _MARKER_DEFS.values(): - parser.addoption( - f"--{info['name']}", - action="store", - default="", - help=( - f"Filter by {info['name']} marker. " - "Syntax: val1,val2,... | all | empty(no filter). " - f"({info['help']})" - ), - ) - - -def _get_marker_values(item: pytest.Item, name: str) -> List[str]: - """Extract marker values from test item.""" - vals: List[str] = [] - mark_infos = [] - - for mark in item.iter_markers(name=name): - mark_val_list = [str(a) for a in mark.args] - - if name in mark.kwargs: - mark_val_list.append(str(mark.kwargs[name])) - - vals.extend(mark_val_list) - mark_infos.append(f"{name}: {', '.join(mark_val_list) if mark_val_list else 'None'}") - - return vals - - -@pytest.hookimpl(hookwrapper=True, tryfirst=True) -def pytest_runtest_makereport(item: pytest.Item, call: pytest.CallInfo): - """Attach test reports to item for access in fixtures.""" - outcome = yield - rep = outcome.get_result() - setattr(item, f"rep_{rep.when}", rep) - - -def pytest_collection_modifyitems(config: pytest.Config, items: List[pytest.Item]) -> None: - """Filter test collection based on CLI options.""" - # Store marker information for later use in test execution - for item in items: - markers_info = [] - for mark in item.iter_markers(): - # Skip pytest's built-in markers - if mark.name in ['parametrize', 'usefixtures', 'skip', 'skipif', 'xfail']: - continue - markers_info.append({ - 'name': mark.name, - 'args': mark.args - }) - # Store marker info in the item for later use - item._pytest_markers_info = markers_info - - # Original filtering logic - kept = items[:] + # Generate and register build ID into DB + build_id = _generate_build_id(config) + config._build_id = build_id + database_connection(build_id) - for name, info in _MARKER_DEFS.items(): - opt = config.getoption(f"--{name}", "").strip() - if not opt: - continue - # all means any marker value with the marker - if opt == "all": - kept = [it for it in kept if _get_marker_values(it, name)] - continue +def pytest_sessionstart(session): + print("") + print("-" * 60) + print(f"{'Python':<10} │ {pf.python_version()}") + print(f"{'Platform':<10} │ {pf.system()} {pf.release()}") + print("-" * 60) - # 特殊处理 stage - if name == "stage": - if opt.endswith("+"): - min_stage = int(opt[:-1]) - kept = [ - it for it in kept - if any(int(v) >= min_stage for v in _get_marker_values(it, "stage")) - ] - else: - wanted = {x.strip() for x in opt.split(",") if x.strip()} - kept = [ - it for it in kept - if any(v in wanted for v in _get_marker_values(it, "stage")) - ] - else: - wanted = {x.strip() for x in opt.split(",") if x.strip()} - kept = [ - it for it in kept - if any(v in wanted for v in _get_marker_values(it, name)) - ] - if not kept: - logger.warning( - "No tests matched filter conditions: %s", - {m: config.getoption(f"--{m}") for m in _MARKER_DEFS}, - ) - else: - logger.info( - "Filter %d / %d tests after applying markers %s", - len(kept), len(items), - {m: config.getoption(f'--{m}') for m in _MARKER_DEFS if config.getoption(f'--{m}')} - ) +def pytest_sessionfinish(session, exitstatus): + report_dir = getattr(session.config, "_report_dir", "reports") + print("") + print("-" * 60) + print(f"{'Reports at':<10} │ {report_dir}") + print("Test session ended") + print("-" * 60) - items[:] = kept +# ---------------- Fixtures ---------------- -@pytest.hookimpl(tryfirst=True) -def pytest_runtest_setup(item): - """Add pytest markers as Allure labels during test setup.""" - # Add pytest markers as Allure labels - if hasattr(item, '_pytest_markers_info'): - import allure - for marker_info in item._pytest_markers_info: - marker_name = marker_info['name'] - marker_args = marker_info['args'] - - # Add marker as Allure label - label_name = f"pytest_{marker_name}" - if marker_args: - # If marker has arguments, add each as a separate label - for arg in marker_args: - allure.dynamic.label(label_name, str(arg)) - else: - # If marker has no arguments, just add the marker name - allure.dynamic.label(label_name, marker_name) +def pytest_runtest_logreport(report): + """ + Called after each test phase. We only care about 'call' (the actual test). + """ + if report.when != "call": + return -# ---------------- Fixtures ---------------- -@pytest.fixture(scope="session", autouse=True) -def session_logger() -> None: - """Session-level setup and teardown with system info logging.""" - logger.info("-" * 60) - logger.info(f"{'Python':<10} │ {pf.python_version()}") - logger.info(f"{'Platform':<10} │ {pf.system()} {pf.release()}") - logger.info("-" * 60) - yield - logger.info("-" * 60) - logger.info(f"{'Reports at':<10} │ {REPORT_DIR}") - logger.info("Test session ended") - logger.info("-" * 60) - - -@pytest.fixture(scope="function", autouse=True) -def test_logger(request): - """Function-level logging before and after each test.""" - node = request.node - klass = f"{node.cls.__name__}::" if node.cls else "" - identifier = f"{node.path.relative_to(Path.cwd())}::{klass}{node.name}" - print() - logger.info("-" * 60) - logger.info(f"[TEST_CLASS] {identifier}") - logger.info(f"[START] {node.name}") - yield - - result = getattr(node, "rep_call", None) - status = "PASSED" if result and result.outcome == "passed" else "FAILED" - logger.info(f"[ END ] {node.name} - {status}") - if result and getattr(result, "longrepr", None): - logger.error(f"Error details: {result.longrepr}") - - -@pytest.hookimpl(hookwrapper=True, tryfirst=True) -def pytest_runtest_makereport(item: pytest.Item, call: pytest.CallInfo): - """Attach test reports to item for access in fixtures.""" - outcome = yield - rep = outcome.get_result() - setattr(item, f"rep_{rep.when}", rep) - - -@pytest.fixture(scope="session", autouse=True) -def cleanup() -> None: - """Cleanup temporary pytest directories after test session.""" - yield - tmp_root = Path(tempfile.gettempdir()) - for d in tmp_root.iterdir(): - if d.is_dir() and d.name.startswith(("pytest_", "test_")): - shutil.rmtree(d, ignore_errors=True) - - -def pytest_unconfigure(config: pytest.Config) -> None: - """Pytest cleanup hook: generate Allure HTML report or start server if configured.""" - global ALLURE_DIR, ALLURE_CONFIG - - if ALLURE_DIR and ALLURE_CONFIG: - allure_cfg = ALLURE_CONFIG.get("allure", {}) - - # Check if HTML generation is enabled - if allure_cfg.get("html_enable", False): - serve_mode = allure_cfg.get("serve_mode", False) - - if serve_mode: - # Start Allure server - serve_host = allure_cfg.get("serve_host", "localhost") - serve_port = allure_cfg.get("serve_port", 8080) - - logger.info("Starting Allure server...") - logger.info(f"Server will be available at http://{serve_host}:{serve_port}") - - server_process = serve_allure_report( - ALLURE_DIR, - host=serve_host, - port=serve_port, - - ) - - if server_process: - logger.info("Allure server started successfully") - else: - logger.warning("Failed to start Allure server, falling back to static HTML generation...") - # Fallback to static HTML - html_dir = generate_allure_html(ALLURE_DIR, clean=True) - if html_dir: - logger.info(f"Static HTML report generated: {html_dir}") - else: - logger.warning("Failed to generate static HTML report") - else: - # Generate static HTML report - logger.info("Generating Allure HTML report...") - html_dir = generate_allure_html(ALLURE_DIR, clean=True) - - if html_dir: - logger.info(f"Allure HTML report generated: {html_dir}") - logger.info("Tip: If the report doesn't load properly, enable serve_mode in config.yaml") - else: - logger.warning("Failed to generate Allure HTML report") - else: - logger.info("Allure HTML generation disabled in configuration") - else: - logger.info("Allure not configured, skipping HTML generation") + status = report.outcome.upper() # 'passed', 'failed', 'skipped' → 'PASSED', etc. + test_result = { + "test_case": report.nodeid, + "status": status, + # "duration": report.duration, + "error": str(report.longrepr) if report.failed else None, + } + write_to_db("test_case_info", test_result) diff --git a/test/pytest.ini b/test/pytest.ini index d5ff2635..4be3cf47 100644 --- a/test/pytest.ini +++ b/test/pytest.ini @@ -1,15 +1,15 @@ [pytest] -# 0. Test Discovery Rules testpaths = suites python_files = test_*.py python_classes = Test* python_functions = test_* - addopts = -ra --strict-markers --capture=no +filterwarnings = + ignore::pytest.PytestReturnNotNoneWarning log_cli = 1 log_cli_level = INFO @@ -22,5 +22,4 @@ markers = # -------- Features (Recommended) -------- feature: Feature tag platform(name): Platform tag(gpu/npu) - reliability: Reliability tag -# end of markers +# end of markers \ No newline at end of file diff --git a/test/requirements.txt b/test/requirements.txt index 2d2f2d19..d26c4ec3 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -1,9 +1,8 @@ pytest>=7.0.0 -pytest-xdist>=3.0.0 pytest-html>=3.1.1 -pytest-json-report>=1.5.0 -allure-pytest>=2.12.0 -influxdb-client>=1.36.0 PyYAML>=6.0 -python-dotenv>=1.0.0 -requests>=2.28.0 \ No newline at end of file +pandas>=2.0.0 +pydantic>=2.0.0 +# MySQL +peewee>=3.14.5 +pymysql>=1.0.2 \ No newline at end of file diff --git a/test/suites/E2E/test_demo_function.py b/test/suites/E2E/test_demo_function.py new file mode 100644 index 00000000..d4ccd74a --- /dev/null +++ b/test/suites/E2E/test_demo_function.py @@ -0,0 +1,66 @@ +import pytest +from common.config_utils import config_utils as config_instance + + +# ---------------- Fixture Example ---------------- +class Calculator: + def __init__(self): + print("[Calculator Initialization]") + pass + + def add(self, a, b): + return a + b + + def divide(self, a, b): + if b == 0: + raise ZeroDivisionError("Cannot divide by zero") + return a / b + + +@pytest.fixture(scope="module", name="calc") +def calculator(): + return Calculator() + + +@pytest.mark.feature("mark") +class TestCalculator: + # The calc instance will only be initialized on the first call, see the pytest documentation for more usage + def test_add(self, calc): + assert calc.add(1, 2) == 3 + + def test_divide(self, calc): + assert calc.divide(6, 2) == 3 + + def test_divide_by_zero(self, calc): + with pytest.raises(ZeroDivisionError): + calc.divide(6, 0) + + +# ---------------- Write to DB Example ---------------- +from common.capture_utils import * + + +@pytest.mark.feature("capture") # pytest must be the top +@export_vars +def test_capture_mix(): + """Mixed single + lists via '_name' + '_data'""" + assert 1 == 1 + return { + "_name": "demo", + "_data": { + "length": 10086, # single value + "accuracy": [0.1, 0.2, 0.3], # list + "loss": [0.1, 0.2, 0.3], # list + }, + } + + +# ---------------- Read Config Example ---------------- +from common.config_utils import config_utils as config_instance + + +@pytest.mark.feature("config") +def test_config(): + assert ( + config_instance.get_nested_config("database.host", "localhost") == "127.0.0.1" + ) \ No newline at end of file diff --git a/test/suites/E2E/test_uc_performance.py b/test/suites/E2E/test_uc_performance.py new file mode 100644 index 00000000..9bc26092 --- /dev/null +++ b/test/suites/E2E/test_uc_performance.py @@ -0,0 +1,121 @@ +import pytest + +from common.llmperf.run_inference import inference_results + +from common.capture_utils import export_vars + + +@pytest.mark.feature("uc_performance_test") +@export_vars +def test_performance(): + all_summaries = inference_results() + failed_cases = [] + + value_lists = { + 'mean_input_tokens': [], + 'mean_output_tokens': [], + 'results_inter_token_latency_s_quantiles_p50': [], + 'results_inter_token_latency_s_quantiles_p90': [], + 'results_inter_token_latency_s_quantiles_p99': [], + 'results_inter_token_latency_s_mean': [], + 'results_ttft_s_quantiles_p50': [], + 'results_ttft_s_quantiles_p90': [], + 'results_ttft_s_quantiles_p99': [], + 'results_ttft_s_mean': [], + 'results_end_to_end_latency_s_quantiles_p50': [], + 'results_end_to_end_latency_s_quantiles_p90': [], + 'results_end_to_end_latency_s_quantiles_p99': [], + 'results_end_to_end_latency_s_mean': [], + 'num_completed_requests': [], + 'elapsed_time': [], + 'incremental_time_delay': [], + 'total_throughput': [], + 'incremental_throughput': [], + } + + for i, summary in enumerate(all_summaries): + mean_input_tokens = summary["mean_input_tokens"] + mean_output_tokens = summary["mean_output_tokens"] + + results_inter_token_latency_s_quantiles_p50 = summary["results"]["inter_token_latency_s"]["quantiles"]["p50"] + results_inter_token_latency_s_quantiles_p90 = summary["results"]["inter_token_latency_s"]["quantiles"]["p90"] + results_inter_token_latency_s_quantiles_p99 = summary["results"]["inter_token_latency_s"]["quantiles"]["p99"] + results_inter_token_latency_s_mean = summary["results"]["inter_token_latency_s"]["mean"] + + results_ttft_s_quantiles_p50 = summary["results"]["ttft_s"]["quantiles"]["p50"] + results_ttft_s_quantiles_p90 = summary["results"]["ttft_s"]["quantiles"]["p90"] + results_ttft_s_quantiles_p99 = summary["results"]["ttft_s"]["quantiles"]["p99"] + results_ttft_s_mean = summary["results"]["ttft_s"]["mean"] + + results_end_to_end_latency_s_quantiles_p50 = summary["results"]["end_to_end_latency_s"]["quantiles"]["p50"] + results_end_to_end_latency_s_quantiles_p90 = summary["results"]["end_to_end_latency_s"]["quantiles"]["p90"] + results_end_to_end_latency_s_quantiles_p99 = summary["results"]["end_to_end_latency_s"]["quantiles"]["p99"] + results_end_to_end_latency_s_mean = summary["results"]["end_to_end_latency_s"]["mean"] + + num_completed_requests = summary["num_completed_requests"] + elapsed_time = summary["elapsed_time"] + incremental_time_delay = summary["incremental_time_delay"] + total_throughput = summary["total_throughput"] + incremental_throughput = summary["incremental_throughput"] + + values = [ + mean_input_tokens, + mean_output_tokens, + results_inter_token_latency_s_quantiles_p50, + results_inter_token_latency_s_quantiles_p90, + results_inter_token_latency_s_quantiles_p99, + results_inter_token_latency_s_mean, + results_ttft_s_quantiles_p50, + results_ttft_s_quantiles_p90, + results_ttft_s_quantiles_p99, + results_ttft_s_mean, + results_end_to_end_latency_s_quantiles_p50, + results_end_to_end_latency_s_quantiles_p90, + results_end_to_end_latency_s_quantiles_p99, + results_end_to_end_latency_s_mean, + num_completed_requests, + elapsed_time, + incremental_time_delay, + total_throughput, + incremental_throughput + ] + + for var_name, val in zip([ + 'mean_input_tokens', + 'mean_output_tokens', + 'results_inter_token_latency_s_quantiles_p50', + 'results_inter_token_latency_s_quantiles_p90', + 'results_inter_token_latency_s_quantiles_p99', + 'results_inter_token_latency_s_mean', + 'results_ttft_s_quantiles_p50', + 'results_ttft_s_quantiles_p90', + 'results_ttft_s_quantiles_p99', + 'results_ttft_s_mean', + 'results_end_to_end_latency_s_quantiles_p50', + 'results_end_to_end_latency_s_quantiles_p90', + 'results_end_to_end_latency_s_quantiles_p99', + 'results_end_to_end_latency_s_mean', + 'num_completed_requests', + 'elapsed_time', + 'incremental_time_delay', + 'total_throughput', + 'incremental_throughput' + ], values): + value_lists[var_name].append(val) + if val is None: + failed_cases.append((i, var_name, "missing")) + + try: + assert val > 0, f"value <= 0" + except AssertionError as e: + failed_cases.append((i, var_name, str(e))) + + # Output final result + if failed_cases: + print(f"\n[WARNING] Assertion failed: {len(failed_cases)} abnormal cases found") + for i, key, reason in failed_cases: + print(f" Iteration={i + 1}, key='{key}' -> {reason}") + else: + print("\n[INFO] All values are greater than 0. Assertion passed!") + + return value_lists \ No newline at end of file diff --git a/test/suites/test_demo_function.py b/test/suites/test_demo_function.py deleted file mode 100644 index 67433ebb..00000000 --- a/test/suites/test_demo_function.py +++ /dev/null @@ -1,185 +0,0 @@ -# tests/test_demo.py -import pytest -import allure - -@pytest.mark.stage(1) -@pytest.mark.feature("mark") -@pytest.mark.platform("gpu") -def test_gpu_smoke(): - assert 1 == 1 - -@pytest.mark.stage(1) -@pytest.mark.feature("mark") -def test_regress_accuracy(): - assert 2 + 2 <= 5 - -@pytest.mark.stage(1) -@pytest.mark.feature("mark") -@pytest.mark.platform("npu") -def test_performance_accuracy(): - assert 2 + 2 <= 5 - -# Example of new mark -@pytest.mark.feature("mark") -@pytest.mark.reliability("high") -def test_llm_reliability(): - assert True - - -# Example of importing configuration file parameters -from common.config_utils import config_utils as config_instance -@pytest.mark.feature("config") -def test_llm_config(): - llm_config = config_instance.get_config("llm_connection") - assert llm_config["type"] == "openai" - assert config_instance.get_nested_config("llm_connection.model") == "gpt-3.5-turbo" - assert config_instance.get_nested_config("llm_connection.models", "gpt-3.5-turbo") == "gpt-3.5-turbo" - - - -# Example of using allure -@pytest.mark.feature("allure1") -@allure.feature('test_success') -def test_success(): - """this test succeeds""" - assert True - -@allure.feature('test_failure') -@pytest.mark.feature("allure1") -def test_failure(): - """this test fails""" - assert False - -@allure.feature('test_skip') -@pytest.mark.feature("allure1") -def test_skip(): - """this test is skipped""" - pytest.skip('for a reason!') - -@allure.feature('test_broken') -@pytest.mark.feature("allure1") -def test_broken(): - raise Exception('oops') - -@pytest.mark.feature("allure2") -@pytest.mark.parametrize('param1', ["Hello", "World"]) -@pytest.mark.parametrize('param2', ['Hello', "Hello"]) -def test_parametrize_with_two_parameters(param1, param2): - assert param1 == param2 - -@pytest.mark.feature("allure3") -@allure.description_html(""" -

This is HTML description

- - - - - - - - - - - - - - - - -
FirstnameLastnameAge
jademr18
roadTester18
-""") -def test_html_description(): - assert True - -@pytest.mark.feature("allure3") -@allure.description("""Multi-line description""") -def test_description_from_decorator(): - assert 42 == int(6 * 7) - -@pytest.mark.feature("allure3") -def test_unicode_in_docstring_description(): - """Description can also be below the function""" - assert 42 == int(6 * 7) - -@pytest.mark.feature("allure4") -@allure.title("Assert that 2+2=4") -def test_with_a_title(): - assert 2 + 2 == 4 - -@pytest.mark.feature("allure4") -@allure.title("Dynamic title: {param1} + {param2} = {expected}") -@pytest.mark.parametrize('param1,param2,expected', [(2, 2, 4),(1, 2, 5)]) -def test_with_parameterized_title(param1, param2, expected): - assert param1 + param2 == expected - -@pytest.mark.feature("allure4") -@allure.title("This is a dynamic title that will be replaced") -def test_with_dynamic_title(): - assert 2 + 2 == 4 - allure.dynamic.title('Test completed, used as title') - - -@pytest.mark.feature("allure5") -def test_with_steps(): - """Example test case with steps""" - with allure.step("Step 1: Initialize variables"): - a = 2 - b = 3 - - with allure.step("Step 2: Perform addition"): - result = a + b - - with allure.step("Step 3: Verify result"): - assert result == 5 - -import tempfile -import os -@pytest.mark.feature("allure6") -def test_with_attachment(): - """Example test case with attachment""" - # Create some data to attach - data = "This is sample data for attachment\nLine 2\nLine 3" - - # Attach text data - allure.attach(data, name="Sample Data", attachment_type=allure.attachment_type.TEXT) - - # Create and attach a simple file - with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt') as f: - f.write("Sample file content\nFor testing attachment feature") - temp_file_path = f.name - - # Attach the file - allure.attach.file(temp_file_path, name="Attached File", - attachment_type=allure.attachment_type.TEXT) - - # Clean up temporary file - os.unlink(temp_file_path) - - assert True - -@pytest.mark.feature("allure7") -def test_mixed_steps_and_attachments(): - """Example test case combining steps and attachments""" - with allure.step("Initialize test data"): - test_data = {"name": "John", "age": 30, "city": "New York"} - - with allure.step("Convert data to JSON string"): - import json - json_data = json.dumps(test_data, indent=2) - allure.attach(json_data, name="JSON Data", attachment_type=allure.attachment_type.JSON) - - with allure.step("Validate data"): - assert test_data["name"] == "John" - assert test_data["age"] == 30 - - with allure.step("Create and attach report"): - report_content = f""" - Test Report - =========== - Name: {test_data['name']} - Age: {test_data['age']} - City: {test_data['city']} - Status: PASSED - """ - allure.attach(report_content, name="Test Report", - attachment_type=allure.attachment_type.TEXT) \ No newline at end of file diff --git a/test/suites/test_uc_performance.py b/test/suites/test_uc_performance.py deleted file mode 100644 index 7fe425c7..00000000 --- a/test/suites/test_uc_performance.py +++ /dev/null @@ -1,159 +0,0 @@ -import pytest - -from common.llmperf.run_inference import inference_results - -mean_output_tokens = [] -num_completed_requests = [] -total_e2e_latency_s = [] -total_generation_time_s = [] - -@pytest.mark.feature("mean_input_tokens") -def test_mean_input_tokens(): - result = inference_results("mean_input_tokens") - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("mean_output_tokens") -def test_mean_output_tokens(): - global mean_output_tokens - result = inference_results("mean_output_tokens") - mean_output_tokens = result[:] - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("results_inter_token_latency_s_quantiles_p50") -def test_inter_token_latency_s_quantiles_p50(): - result = inference_results("results_inter_token_latency_s_quantiles_p50") - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("results_inter_token_latency_s_quantiles_p90") -def test_inter_token_latency_s_quantiles_p90(): - result = inference_results("results_inter_token_latency_s_quantiles_p90") - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("results_inter_token_latency_s_quantiles_p99") -def test_inter_token_latency_s_quantiles_p99(): - result = inference_results("results_inter_token_latency_s_quantiles_p99") - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("results_inter_token_latency_s_mean") -def test_inter_token_latency_s_mean(): - result = inference_results("results_inter_token_latency_s_mean") - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("results_ttft_s_quantiles_p50") -def test_ttft_s_quantiles_p50(): - result = inference_results("results_ttft_s_quantiles_p50") - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("results_ttft_s_quantiles_p90") -def test_ttft_s_quantiles_p90(): - result = inference_results("results_ttft_s_quantiles_p90") - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("results_ttft_s_quantiles_p99") -def test_ttft_s_quantiles_p99(): - result = inference_results("results_ttft_s_quantiles_p99") - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("results_ttft_s_mean") -def test_ttft_s_mean(): - result = inference_results("results_ttft_s_mean") - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("results_end_to_end_latency_s_quantiles_p50") -def test_end_to_end_latency_s_quantiles_p50(): - result = inference_results("results_end_to_end_latency_s_quantiles_p50") - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("results_end_to_end_latency_s_quantiles_p90") -def test_end_to_end_latency_s_quantiles_p90(): - result = inference_results("results_end_to_end_latency_s_quantiles_p90") - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("results_end_to_end_latency_s_quantiles_p99") -def test_end_to_end_latency_s_quantiles_p99(): - result = inference_results("results_end_to_end_latency_s_quantiles_p99") - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("results_end_to_end_latency_s_mean") -def test_end_to_end_latency_s_mean(): - result = inference_results("results_end_to_end_latency_s_mean") - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("results_num_completed_requests") -def test_num_completed_requests(): - global num_completed_requests - result = inference_results("results_num_completed_requests") - num_completed_requests = result[:] - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("elapsed_time") -def test_elapsed_time(): - global total_e2e_latency_s - result = inference_results("elapsed_time") - total_e2e_latency_s = result[:] - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("incremental_time_delay") -def test_incremental_time_delay(): - global total_generation_time_s - result = inference_results("incremental_time_delay") - total_generation_time_s = result[:] - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("total_throughput") -def test_total_throughput(): - result = [] - n = len(mean_output_tokens) - for i in range(n): - total_throughput = (mean_output_tokens[i] * num_completed_requests[i] / total_e2e_latency_s[i] - if total_e2e_latency_s[i] > 0 else 0.0) - result.append(total_throughput) - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" - -@pytest.mark.feature("incremental_throughput") -def test_incremental_throughput(): - result = [] - n = len(mean_output_tokens) - for i in range(n): - incremental_throughput = (mean_output_tokens[i] * num_completed_requests[i] / total_generation_time_s[i] - if total_generation_time_s[i] > 0 else 0.0) - result.append(incremental_throughput) - assert len(result) > 0, "result list is empty! Please check data source or inference process." - non_positive = [x for x in result if x <= 0] - assert all(x > 0 for x in result), f"Non-positive values found in list: {non_positive}" \ No newline at end of file diff --git a/test/test_uc_connector.py b/test/test_uc_connector.py index 0c2261d8..d4a0caeb 100644 --- a/test/test_uc_connector.py +++ b/test/test_uc_connector.py @@ -25,7 +25,6 @@ import random import secrets import unittest -from collections import defaultdict from typing import List, Union from unittest.mock import MagicMock, Mock, patch @@ -107,14 +106,12 @@ def init_uc( ucconnector.dump_tasks: dict[str, dict[str, List[Task]]] = {} ucconnector.total_tp_size = self.total_tp_size ucconnector._connector_metadata = metadata - ucconnector.layerwise_load_tasks: dict[str, dict[str, Task]] = defaultdict( - dict - ) + ucconnector.layerwise_load_tasks: dict[ + str, dict[str, tuple[Task, Task]] + ] = {} ucconnector._need_load_reqs: dict[str, Union[list[int], list[Task]]] = {} ucconnector._load_failed_reqs: set[str] = set() ucconnector._load_req_to_blocks: dict[str, set[int]] = {} - ucconnector.num_layers = 48 - ucconnector.is_mla = False return ucconnector def test_get_num_new_matched_tokens_hit_all_on_storage(self): @@ -511,7 +508,6 @@ def test_wait_for_save_not_layerwise_invalid_para(self): ucconnector.block_size = self.block_size ucconnector.use_layerwise = False ucconnector._connector_metadata = Mock() - ucconnector.is_mla = False with self.assertRaises(AssertionError): ucconnector.wait_for_save() @@ -546,7 +542,6 @@ def mock_wait(task: Task) -> int: ) forward_context = Mock() ucconnector.start_load_kv(forward_context) - assert mock_connector.load.call_count == 1 def test_start_load_kv_invalid_para(self): with patch.object(UnifiedCacheConnectorV1, "__init__", return_value=None): @@ -564,7 +559,6 @@ def test_start_load_kv_layerwise_success(self): req_meta1.load_blocks = [ (secrets.token_hex(8), i) for i in range(self.block_number) ] - req_meta1.load_async = False metadata = UCConnectorV1Metadata() metadata.requests = [req_meta1] @@ -581,7 +575,7 @@ def mock_load( ucconnector = self.init_uc(mock_connector, metadata=metadata) forward_context = Mock() ucconnector.start_load_kv(forward_context) - assert mock_connector.load.call_count == self.num_layers + assert mock_connector.load.call_count == 2 * self.num_layers if __name__ == "__main__": diff --git a/test/test_ucm_dram.py b/test/test_ucm_dram.py new file mode 100644 index 00000000..020405d1 --- /dev/null +++ b/test/test_ucm_dram.py @@ -0,0 +1,250 @@ +# +# MIT License +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + +import random +import unittest +import unittest.mock as mock +from contextlib import contextmanager +from typing import List +from unittest.mock import MagicMock + +import torch +from vllm.multimodal.inputs import MultiModalKwargs +from vllm.sampling_params import SamplingParams +from vllm.utils import sha256 +from vllm.v1.core.kv_cache_utils import hash_request_tokens +from vllm.v1.request import Request + + +@contextmanager +def mock_stream_context(stream=None): + yield + + +class MockStream: + def __init__(self, device=None): + self.device = device or torch.device("cpu") + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + def synchronize(self): + pass + + def record_event(self, event=None): + return event or MockEvent() + + def wait_stream(self, stream): + pass + + +class MockEvent: + def __init__(self, enable_timing=False): + self.enable_timing = enable_timing + + def record(self, stream=None): + pass + + def wait(self, stream=None): + pass + + def synchronize(self): + pass + + +def patch_cuda_for_cpu(): + mock.patch("torch.cuda.Stream", MockStream).start() + mock.patch("torch.cuda.Event", MockEvent).start() + mock.patch("torch.cuda.current_stream", return_value=MockStream()).start() + mock.patch("torch.cuda.synchronize", side_effect=lambda *a, **k: None).start() + mock.patch("torch.cuda.is_available", return_value=True).start() + mock.patch("torch.cuda.stream", mock_stream_context).start() + + +patch_cuda_for_cpu() +from ucm.store.dramstore.dramstore_connector import ( # isort: skip + DramTask, + UcmDramStore, +) + + +def make_request( + request_id, prompt_token_ids, mm_positions=None, mm_hashes=None, cache_salt=None +): + if mm_positions is None: + multi_modal_inputs = None + else: + multi_modal_inputs = [MultiModalKwargs({})] * len(mm_positions) + + return Request( + request_id=request_id, + prompt_token_ids=prompt_token_ids, + multi_modal_inputs=multi_modal_inputs, + multi_modal_hashes=mm_hashes, + multi_modal_placeholders=mm_positions, + sampling_params=SamplingParams(max_tokens=17), + pooling_params=None, + eos_token_id=100, + arrival_time=0, + lora_request=None, + cache_salt=cache_salt, + ) + + +class TestUcmDram(unittest.TestCase): + + @classmethod + def setUpClass(cls): + print("===> Before all tests (setUpClass)") + + @classmethod + def tearDownClass(cls): + print("===> After all tests (setUpClass)") + + def setUp(self): + self.config = {"block_size": 4} + self.scheduler_config = { + "role": "scheduler", + "max_cache_size": 1073741824, + "kv_block_size": 262144, + } + self.worker_config = { + "role": "worker", + "max_cache_size": 1073741824, + "kv_block_size": 262144, + } + + self.block_number = 4 + self.block_size = int(self.config["block_size"]) + self.scheduler_dram = UcmDramStore(self.scheduler_config) + self.worker_dram = UcmDramStore(self.worker_config) + random.seed(20250728) + self.request = make_request( + request_id=1, + prompt_token_ids=random.sample( + range(0, 10000), self.block_number * self.block_size + ), + mm_positions=None, + mm_hashes=None, + ) + block_hash_types = hash_request_tokens(sha256, self.block_size, self.request) + self.block_hashes: List[str] = [str(x.hash_value) for x in block_hash_types] + + def test_look_up_all_hit(self): + """ + Test for all blocks hitten in cache + """ + expected = [True] * len(self.block_hashes) + self.scheduler_dram.cached_blocks.update(self.block_hashes) + actual = self.scheduler_dram.lookup(self.block_hashes) + + self.assertEqual(actual, expected) + + def test_lookup_partial_hit(self): + """ + Test for part of the blocks hitten in cache + """ + partial_index = random.randint(0, 4) + partial_hashes = self.block_hashes[:partial_index] + self.scheduler_dram.cached_blocks.update(partial_hashes) + actual = self.scheduler_dram.lookup(self.block_hashes) + expected = [True] * partial_index + [False] * (self.block_size - partial_index) + self.assertEqual(actual, expected) + + def test_lookup_none_hit(self): + """ + Test for none of the blocks hitten in cache + """ + actual = self.scheduler_dram.lookup(self.block_hashes) + expected = [False] * len(self.block_hashes) + self.assertEqual(actual, expected) + + def test_load_success(self): + """ + Test for load from cache successfully + """ + src_tensors = [ + torch.randint(0, 100, (self.block_size,), dtype=torch.int8) + for _ in range(len(self.block_hashes)) + ] + offsets = [i for i in range(len(self.block_hashes))] + dump_task = self.worker_dram.dump(self.block_hashes, offsets, src_tensors) + self.worker_dram.wait(dump_task) + dst_tensors = [ + torch.zeros(self.block_size, dtype=torch.int8) + for _ in range(len(self.block_hashes)) + ] + load_task = self.worker_dram.load(self.block_hashes, offsets, dst_tensors) + + self.assertIsInstance(load_task, DramTask) + self.assertIsNotNone(load_task.event) + for i, (src_tensor, dst_tensor) in enumerate(zip(src_tensors, dst_tensors)): + self.assertEqual(dst_tensor.shape[0], self.block_size) + self.assertTrue( + torch.equal(src_tensor, dst_tensor), + f"Block {i} loaded data is different", + ) + + def test_dump_success(self): + """ + Test data dump successfully + """ + src_tensors = [ + torch.randint(0, 100, (self.block_size,), dtype=torch.int8) + for _ in range(len(self.block_hashes)) + ] + offsets = [i for i in range(len(self.block_hashes))] + original_data = [tensor.clone() for tensor in src_tensors] + dump_task = self.worker_dram.dump(self.block_hashes, offsets, src_tensors) + self.assertIsInstance(dump_task, DramTask) + self.assertIsNotNone(dump_task.event) + self.worker_dram.wait(dump_task) + for i, block_id in enumerate(self.block_hashes): + key = block_id + "_" + str(offsets[i]) + cached_data = self.worker_dram.dram_cache[key] + self.assertEqual(cached_data.shape[0], self.block_size) + self.assertTrue(torch.equal(cached_data, original_data[i])) + + def test_wait_success(self): + """ + Test wait for task successfully + """ + task = DramTask() + task.event = MagicMock() + result = self.worker_dram.wait(task) + self.assertEqual(result, 0) + task.event.synchronize.assert_called_once() + + def test_wait_failure(self): + task = DramTask() + task.event = None + result = self.worker_dram.wait(task) + self.assertEqual(result, -1) + + +if __name__ == "__main__": + unittest.main()