feat: bench.py 加命令行参数,支持子进程方式跑(绕开内核torch限制)
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+34
-1
@@ -115,5 +115,38 @@ def run_once(config_override=None, batch_size=50, max_batches=None, max_feasign_
|
||||
return res
|
||||
|
||||
|
||||
def _parse_args():
|
||||
import argparse
|
||||
ap = argparse.ArgumentParser(description="CTI 推理测量闭环(以子进程方式跑:!python bench.py ...)")
|
||||
ap.add_argument("--smoke", type=int, default=None, help="只跑前 N 个 batch(冒烟)")
|
||||
ap.add_argument("--bs", type=int, default=50, help="batch_size(本地参考)")
|
||||
ap.add_argument("--fp32", action="store_true", help="FP32 天花板 = 关 fp16 + 关 expert 合并")
|
||||
ap.add_argument("--no-fp16", action="store_true", help="关闭半精度")
|
||||
ap.add_argument("--no-merge", action="store_true", help="关闭 expert 合并")
|
||||
ap.add_argument("--signid", choices=["clamp", "modulo"], default=None, help="sign-id 处理方式")
|
||||
ap.add_argument("--merge-th", type=float, default=None, help="expert 合并余弦阈值")
|
||||
ap.add_argument("--keep", type=str, default=None,
|
||||
help="逗号分隔的 keep_fp32_modules,如 linear,rep_encoder.input_norm")
|
||||
ap.add_argument("--feasign-none", action="store_true",
|
||||
help="不截断特征(max_feasign_per_slot=None)")
|
||||
return ap.parse_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_once({}) # 默认配置基准
|
||||
a = _parse_args()
|
||||
cfg = {}
|
||||
if a.fp32:
|
||||
cfg["fp16"] = False
|
||||
cfg["expert_merge"] = False
|
||||
if a.no_fp16:
|
||||
cfg["fp16"] = False
|
||||
if a.no_merge:
|
||||
cfg["expert_merge"] = False
|
||||
if a.signid:
|
||||
cfg["signid_mode"] = a.signid
|
||||
if a.merge_th is not None:
|
||||
cfg["merge_threshold"] = a.merge_th
|
||||
if a.keep is not None:
|
||||
cfg["keep_fp32_modules"] = tuple(x for x in a.keep.split(",") if x)
|
||||
mf = None if a.feasign_none else {1: 2}
|
||||
run_once(cfg, batch_size=a.bs, max_batches=a.smoke, max_feasign_per_slot=mf)
|
||||
|
||||
Reference in New Issue
Block a user