doc: INT8 MoE标记死路(AUC安全0.7589但本地10.15s,_int_mm慢+fp32反量化巨大中间张量)。锁定71.34

This commit is contained in:
OwnerSunshine530
2026-06-20 01:54:40 +08:00
parent 3c9da9a47d
commit 9461d97173
+2 -1
View File
@@ -157,7 +157,8 @@ CONFIG = {
"moe_fused_weight": False, # True=top-k加权用scatter+mul+sum(评测慢,勿开)
# 真稀疏MoE实测评测净负:lat 34.64->37.64s(本地快15%但argsort/scatter开销评测放大,如varlen)
# +容量丢弃降AUC(0.7525->0.7507)。已退回 dense。
"moe_int8": False, # True=INT8 dense MoE(torch._int_mm,2D拼接);计算减半但加quant kernel,有AUC风险
# 实测:AUC安全(0.7589)但本地10.15s(_int_mm不如cutlass+fp32反量化[N,8192]巨大中间张量)。死路,勿开。
"moe_int8": False, # True=INT8 dense MoE(本地慢2.5倍,已验证死路)
"moe_sparse": False, # True=真稀疏MoE(评测净负,勿开)
"moe_capacity": 2.0,
"skip_moe_loss": True, # 推理跳过 moe_loss(load-balance,推理无用),省 importance/std/mean kernel