feat: bench.py 加命令行参数,支持子进程方式跑(绕开内核torch限制)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
OwnerSunshine530
2026-06-14 19:53:21 +08:00
parent ab9c624167
commit 8c1d1cbaa5
+34 -1
View File
@@ -115,5 +115,38 @@ def run_once(config_override=None, batch_size=50, max_batches=None, max_feasign_
return res
def _parse_args():
import argparse
ap = argparse.ArgumentParser(description="CTI 推理测量闭环(以子进程方式跑:!python bench.py ...")
ap.add_argument("--smoke", type=int, default=None, help="只跑前 N 个 batch(冒烟)")
ap.add_argument("--bs", type=int, default=50, help="batch_size(本地参考)")
ap.add_argument("--fp32", action="store_true", help="FP32 天花板 = 关 fp16 + 关 expert 合并")
ap.add_argument("--no-fp16", action="store_true", help="关闭半精度")
ap.add_argument("--no-merge", action="store_true", help="关闭 expert 合并")
ap.add_argument("--signid", choices=["clamp", "modulo"], default=None, help="sign-id 处理方式")
ap.add_argument("--merge-th", type=float, default=None, help="expert 合并余弦阈值")
ap.add_argument("--keep", type=str, default=None,
help="逗号分隔的 keep_fp32_modules,如 linear,rep_encoder.input_norm")
ap.add_argument("--feasign-none", action="store_true",
help="不截断特征(max_feasign_per_slot=None")
return ap.parse_args()
if __name__ == "__main__":
run_once({}) # 默认配置基准
a = _parse_args()
cfg = {}
if a.fp32:
cfg["fp16"] = False
cfg["expert_merge"] = False
if a.no_fp16:
cfg["fp16"] = False
if a.no_merge:
cfg["expert_merge"] = False
if a.signid:
cfg["signid_mode"] = a.signid
if a.merge_th is not None:
cfg["merge_threshold"] = a.merge_th
if a.keep is not None:
cfg["keep_fp32_modules"] = tuple(x for x in a.keep.split(",") if x)
mf = None if a.feasign_none else {1: 2}
run_once(cfg, batch_size=a.bs, max_batches=a.smoke, max_feasign_per_slot=mf)