feat: 真稀疏MoE(capacity分组,只算top-k,cutlass baddbmm,无host同步)

按expert排序token+固定capacity分桶,每桶dense baddbmm,减GEMM~3x。argsort/where/ scatter/index_add无.item()/bincount同步(不同于loop MoE)。超容量token丢弃(capacity_factor控)。等价测试(大capacity无丢弃==dense)。bench --moe-sparse/--moe-cap。默认关待验证。 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-17 21:05:55 +08:00
parent aacfe904fd
commit b397c142fa
3 changed files with 64 additions and 0 deletions
@@ -347,6 +347,8 @@ def _parse_args():
    ap.add_argument("--no-moe-baddbmm", action="store_true", help="关闭 MoE baddbmm(用 einsum 对照)")
    ap.add_argument("--no-skip-moe-loss", action="store_true", help="不跳过 moe_loss(对照)")
    ap.add_argument("--logit-bias", type=float, default=None, help="PCOC校准:logit偏移(本地验证PCOC→1.0)")
+    ap.add_argument("--moe-sparse", action="store_true", help="真稀疏MoE(只算top-k,capacity分组)")
+    ap.add_argument("--moe-cap", type=float, default=None, help="MoE capacity factor")
    ap.add_argument("--sparse-pool", action="store_true", help="稀疏矩阵乘做池化(段内高重复时省)")
    ap.add_argument("--precompute-rep", action="store_true",
                    help="预计算RepEncoder缓存,model(batch)跳过embedding层(从batches自建)")
@@ -401,6 +403,10 @@ if __name__ == "__main__":
        cfg["skip_moe_loss"] = False
    if a.logit_bias is not None:
        cfg["logit_bias"] = a.logit_bias
+    if a.moe_sparse:
+        cfg["moe_sparse"] = True
+    if a.moe_cap is not None:
+        cfg["moe_capacity"] = a.moe_cap
    if a.sparse_pool:
        cfg["sparse_pool"] = True
    if a.precompute_rep: