doc: INT8 MoE标记死路(AUC安全0.7589但本地10.15s,_int_mm慢+fp32反量化巨大中间张量)。锁定71.34
This commit is contained in:
+2
-1
@@ -157,7 +157,8 @@ CONFIG = {
|
|||||||
"moe_fused_weight": False, # True=top-k加权用scatter+mul+sum(评测慢,勿开)
|
"moe_fused_weight": False, # True=top-k加权用scatter+mul+sum(评测慢,勿开)
|
||||||
# 真稀疏MoE实测评测净负:lat 34.64->37.64s(本地快15%但argsort/scatter开销评测放大,如varlen)
|
# 真稀疏MoE实测评测净负:lat 34.64->37.64s(本地快15%但argsort/scatter开销评测放大,如varlen)
|
||||||
# +容量丢弃降AUC(0.7525->0.7507)。已退回 dense。
|
# +容量丢弃降AUC(0.7525->0.7507)。已退回 dense。
|
||||||
"moe_int8": False, # True=INT8 dense MoE(torch._int_mm,2D拼接);计算减半但加quant kernel,有AUC风险
|
# 实测:AUC安全(0.7589)但本地10.15s(_int_mm不如cutlass+fp32反量化[N,8192]巨大中间张量)。死路,勿开。
|
||||||
|
"moe_int8": False, # True=INT8 dense MoE(本地慢2.5倍,已验证死路)
|
||||||
"moe_sparse": False, # True=真稀疏MoE(评测净负,勿开)
|
"moe_sparse": False, # True=真稀疏MoE(评测净负,勿开)
|
||||||
"moe_capacity": 2.0,
|
"moe_capacity": 2.0,
|
||||||
"skip_moe_loss": True, # 推理跳过 moe_loss(load-balance,推理无用),省 importance/std/mean kernel
|
"skip_moe_loss": True, # 推理跳过 moe_loss(load-balance,推理无用),省 importance/std/mean kernel
|
||||||
|
|||||||
Reference in New Issue
Block a user