feat: 真稀疏MoE(capacity分组,只算top-k,cutlass baddbmm,无host同步)
按expert排序token+固定capacity分桶,每桶dense baddbmm,减GEMM~3x。argsort/where/ scatter/index_add无.item()/bincount同步(不同于loop MoE)。超容量token丢弃(capacity_factor控)。 等价测试(大capacity无丢弃==dense)。bench --moe-sparse/--moe-cap。默认关待验证。 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -347,6 +347,8 @@ def _parse_args():
|
||||
ap.add_argument("--no-moe-baddbmm", action="store_true", help="关闭 MoE baddbmm(用 einsum 对照)")
|
||||
ap.add_argument("--no-skip-moe-loss", action="store_true", help="不跳过 moe_loss(对照)")
|
||||
ap.add_argument("--logit-bias", type=float, default=None, help="PCOC校准:logit偏移(本地验证PCOC→1.0)")
|
||||
ap.add_argument("--moe-sparse", action="store_true", help="真稀疏MoE(只算top-k,capacity分组)")
|
||||
ap.add_argument("--moe-cap", type=float, default=None, help="MoE capacity factor")
|
||||
ap.add_argument("--sparse-pool", action="store_true", help="稀疏矩阵乘做池化(段内高重复时省)")
|
||||
ap.add_argument("--precompute-rep", action="store_true",
|
||||
help="预计算RepEncoder缓存,model(batch)跳过embedding层(从batches自建)")
|
||||
@@ -401,6 +403,10 @@ if __name__ == "__main__":
|
||||
cfg["skip_moe_loss"] = False
|
||||
if a.logit_bias is not None:
|
||||
cfg["logit_bias"] = a.logit_bias
|
||||
if a.moe_sparse:
|
||||
cfg["moe_sparse"] = True
|
||||
if a.moe_cap is not None:
|
||||
cfg["moe_capacity"] = a.moe_cap
|
||||
if a.sparse_pool:
|
||||
cfg["sparse_pool"] = True
|
||||
if a.precompute_rep:
|
||||
|
||||
Reference in New Issue
Block a user