From 6278d4a050f99af1ad2da31fccb83ea4f76341e3 Mon Sep 17 00:00:00 2001 From: OwnerSunshine530 Date: Wed, 17 Jun 2026 21:36:23 +0800 Subject: [PATCH] =?UTF-8?q?revert:=20=E7=9C=9F=E7=A8=80=E7=96=8FMoE?= =?UTF-8?q?=E9=BB=98=E8=AE=A4=E5=85=B3=20=E2=80=94=20=E8=AF=84=E6=B5=8B?= =?UTF-8?q?=E5=87=80=E8=B4=9F(lat34.64->37.64,=E6=9C=AC=E5=9C=B0=E5=BF=AB?= =?UTF-8?q?=E8=AF=84=E6=B5=8B=E6=85=A2=E5=A6=82varlen;+=E5=AE=B9=E9=87=8F?= =?UTF-8?q?=E4=B8=A2=E5=BC=83=E9=99=8DAUC)=E3=80=82=E5=9B=9E=E5=88=B0=20de?= =?UTF-8?q?nse/70.96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- 代码/code/infer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/代码/code/infer.py b/代码/code/infer.py index 6f76508..9915f44 100644 --- a/代码/code/infer.py +++ b/代码/code/infer.py @@ -144,8 +144,10 @@ CONFIG = { # 须靠提交验证。AUC中性、MoE仅占2%算力故风险极低。 "vectorize_moe": True, # True=稠密向量化MoE(无同步点);False=原逐expert循环(.nonzero同步) "moe_baddbmm": True, # MoE FFN 用 baddbmm(cutlass GEMM+bias epilogue融合),省 bias add kernel - "moe_sparse": True, # 真稀疏MoE(只算top-k,capacity分组),本地4.77->4.05s(-15%),AUC微降无碍 - "moe_capacity": 2.0, # 每expert容量 = ceil(Nk/E*factor);cap=2.0 PCOC1.105在区间(1.25会炸到1.418) + # 真稀疏MoE实测评测净负:lat 34.64->37.64s(本地快15%但argsort/scatter开销评测放大,如varlen) + # +容量丢弃降AUC(0.7525->0.7507)。已退回 dense。 + "moe_sparse": False, # True=真稀疏MoE(评测净负,勿开) + "moe_capacity": 2.0, "skip_moe_loss": True, # 推理跳过 moe_loss(load-balance,推理无用),省 importance/std/mean kernel # PCOC 校准:本地拟合-0.1067(本地PCOC1.109),但评测PCOC稳定1.059,按斜率换算评测最优≈-0.059。 "logit_bias": -0.06, # logit 加常数偏移使评测 PCOC→~1.0(单调,AUC不变,免费+~0.33分)