Win and linux can run the code
This commit is contained in:
@@ -6,28 +6,30 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from data_utils import compute_cont_stats, build_vocab, load_split
|
||||
from platform_utils import safe_path, ensure_dir
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent
|
||||
REPO_DIR = BASE_DIR.parent.parent
|
||||
DATA_PATH = str(REPO_DIR / "dataset" / "hai" / "hai-21.03" / "train1.csv.gz")
|
||||
SPLIT_PATH = str(BASE_DIR / "feature_split.json")
|
||||
OUT_STATS = str(BASE_DIR / "results" / "cont_stats.json")
|
||||
OUT_VOCAB = str(BASE_DIR / "results" / "disc_vocab.json")
|
||||
DATA_PATH = REPO_DIR / "dataset" / "hai" / "hai-21.03" / "train1.csv.gz"
|
||||
SPLIT_PATH = BASE_DIR / "feature_split.json"
|
||||
OUT_STATS = BASE_DIR / "results" / "cont_stats.json"
|
||||
OUT_VOCAB = BASE_DIR / "results" / "disc_vocab.json"
|
||||
|
||||
|
||||
def main(max_rows: Optional[int] = None):
|
||||
split = load_split(SPLIT_PATH)
|
||||
split = load_split(safe_path(SPLIT_PATH))
|
||||
time_col = split.get("time_column", "time")
|
||||
cont_cols = [c for c in split["continuous"] if c != time_col]
|
||||
disc_cols = [c for c in split["discrete"] if not c.startswith("attack") and c != time_col]
|
||||
|
||||
mean, std = compute_cont_stats(DATA_PATH, cont_cols, max_rows=max_rows)
|
||||
vocab = build_vocab(DATA_PATH, disc_cols, max_rows=max_rows)
|
||||
mean, std = compute_cont_stats(safe_path(DATA_PATH), cont_cols, max_rows=max_rows)
|
||||
vocab = build_vocab(safe_path(DATA_PATH), disc_cols, max_rows=max_rows)
|
||||
|
||||
with open(OUT_STATS, "w", encoding="ascii") as f:
|
||||
ensure_dir(OUT_STATS.parent)
|
||||
with open(safe_path(OUT_STATS), "w", encoding="utf-8") as f:
|
||||
json.dump({"mean": mean, "std": std, "max_rows": max_rows}, f, indent=2)
|
||||
|
||||
with open(OUT_VOCAB, "w", encoding="ascii") as f:
|
||||
with open(safe_path(OUT_VOCAB), "w", encoding="utf-8") as f:
|
||||
json.dump({"vocab": vocab, "max_rows": max_rows}, f, indent=2)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user