feat/auc-recovery-plan #1
+15
-5
@@ -105,17 +105,27 @@ def _load_filtered(history_dir, test_csv, test_users):
|
|||||||
|
|
||||||
|
|
||||||
def _get_data(cur, ref, rebuild=False):
|
def _get_data(cur, ref, rebuild=False):
|
||||||
"""取过滤后的 (item_dict, user_seq),优先读磁盘缓存。"""
|
"""取过滤后的 (item_dict, user_seq),优先读磁盘缓存。
|
||||||
cache = cur / "bench_filtered_cache.pt"
|
|
||||||
|
用 pickle 而非 torch.save/load:AI Studio overlay 文件系统对 torch 的
|
||||||
|
zip/mmap 读取会间歇性报 [Errno 38] Function not implemented。
|
||||||
|
"""
|
||||||
|
import pickle
|
||||||
|
cache = cur / "bench_filtered_cache.pkl"
|
||||||
test_csv = ref / "test.csv"
|
test_csv = ref / "test.csv"
|
||||||
history = ref / "history"
|
history = ref / "history"
|
||||||
if cache.exists() and not rebuild:
|
if cache.exists() and not rebuild:
|
||||||
print(f"[BENCH] 读取过滤缓存:{cache}")
|
print(f"[BENCH] 读取过滤缓存:{cache}")
|
||||||
d = torch.load(cache, weights_only=False)
|
try:
|
||||||
return d["item_dict"], d["user_seq"]
|
with open(cache, "rb") as f:
|
||||||
|
d = pickle.load(f)
|
||||||
|
return d["item_dict"], d["user_seq"]
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[BENCH][WARN] 缓存读取失败({e}),重新构建")
|
||||||
test_users = _test_user_ids(test_csv)
|
test_users = _test_user_ids(test_csv)
|
||||||
item_dict, user_seq = _load_filtered(history, test_csv, test_users)
|
item_dict, user_seq = _load_filtered(history, test_csv, test_users)
|
||||||
torch.save({"item_dict": item_dict, "user_seq": user_seq}, cache)
|
with open(cache, "wb") as f:
|
||||||
|
pickle.dump({"item_dict": item_dict, "user_seq": user_seq}, f, protocol=4)
|
||||||
print(f"[BENCH] 已缓存 -> {cache}")
|
print(f"[BENCH] 已缓存 -> {cache}")
|
||||||
return item_dict, user_seq
|
return item_dict, user_seq
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user