GenSeg-Baselines / code /scripts /h800_fetch_data.py
MaybeRichard's picture
code: complete eval pipeline (7 metrics + per-class + Wilcoxon) + Swin-UNet/TransUNet networks; remove backups/obsolete
1a18f22 verified
Raw
History Blame Contribute Delete
1.05 kB
"""On h800: download GenSegDataset tars from HF (via proxy+token), extract into Data/,
then remove the tars. Produces the processed_unified layout under Data/."""
import os, glob, tarfile
from huggingface_hub import snapshot_download
BASE = "/mnt/tidal-alsh-share2/dataset/qinshengqian/research/c3/NPJ-ACM/Data"
TARS = os.path.join(BASE, "_tars")
print("[1] downloading tars ...", flush=True)
snapshot_download("MaybeRichard/GenSegDataset", repo_type="dataset",
allow_patterns=["*.tar", "README.md"], local_dir=TARS)
print("[2] extracting ...", flush=True)
for t in sorted(glob.glob(os.path.join(TARS, "*.tar"))):
print(" extract", os.path.basename(t), flush=True)
with tarfile.open(t) as tf:
tf.extractall(BASE)
rd = os.path.join(TARS, "README.md")
if os.path.isfile(rd):
os.replace(rd, os.path.join(BASE, "README.md"))
print("[3] cleanup tars ...", flush=True)
for t in glob.glob(os.path.join(TARS, "*.tar")):
os.remove(t)
try:
os.rmdir(TARS)
except OSError:
pass
print("DONE_DATA", flush=True)