From 9461d97173285043cb776fc34981e686574ff9ac Mon Sep 17 00:00:00 2001 From: OwnerSunshine530 Date: Sat, 20 Jun 2026 01:54:40 +0800 Subject: [PATCH] =?UTF-8?q?doc:=20INT8=20MoE=E6=A0=87=E8=AE=B0=E6=AD=BB?= =?UTF-8?q?=E8=B7=AF(AUC=E5=AE=89=E5=85=A80.7589=E4=BD=86=E6=9C=AC?= =?UTF-8?q?=E5=9C=B010.15s,=5Fint=5Fmm=E6=85=A2+fp32=E5=8F=8D=E9=87=8F?= =?UTF-8?q?=E5=8C=96=E5=B7=A8=E5=A4=A7=E4=B8=AD=E9=97=B4=E5=BC=A0=E9=87=8F?= =?UTF-8?q?)=E3=80=82=E9=94=81=E5=AE=9A71.34?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 代码/code/infer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/代码/code/infer.py b/代码/code/infer.py index 36d7153..4b3be79 100644 --- a/代码/code/infer.py +++ b/代码/code/infer.py @@ -157,7 +157,8 @@ CONFIG = { "moe_fused_weight": False, # True=top-k加权用scatter+mul+sum(评测慢,勿开) # 真稀疏MoE实测评测净负:lat 34.64->37.64s(本地快15%但argsort/scatter开销评测放大,如varlen) # +容量丢弃降AUC(0.7525->0.7507)。已退回 dense。 - "moe_int8": False, # True=INT8 dense MoE(torch._int_mm,2D拼接);计算减半但加quant kernel,有AUC风险 + # 实测:AUC安全(0.7589)但本地10.15s(_int_mm不如cutlass+fp32反量化[N,8192]巨大中间张量)。死路,勿开。 + "moe_int8": False, # True=INT8 dense MoE(本地慢2.5倍,已验证死路) "moe_sparse": False, # True=真稀疏MoE(评测净负,勿开) "moe_capacity": 2.0, "skip_moe_loss": True, # 推理跳过 moe_loss(load-balance,推理无用),省 importance/std/mean kernel