update
This commit is contained in:
@@ -107,6 +107,14 @@ def lag1_corr(values: List[float]) -> float:
|
||||
def resolve_reference_paths(path: str) -> List[str]:
|
||||
if not path:
|
||||
return []
|
||||
if path.endswith(".json") and Path(path).exists():
|
||||
try:
|
||||
cfg = load_json(path)
|
||||
ref = cfg.get("data_glob") or cfg.get("data_path") or ""
|
||||
if ref:
|
||||
return resolve_reference_paths(str(ref))
|
||||
except Exception:
|
||||
return []
|
||||
if any(ch in path for ch in ["*", "?", "["]):
|
||||
base = Path(path).parent.resolve()
|
||||
pat = Path(path).name
|
||||
@@ -151,10 +159,11 @@ def main():
|
||||
std_ref = stats_json.get("raw_std", stats_json.get("std"))
|
||||
transforms = stats_json.get("transform", {})
|
||||
vocab = load_json(args.vocab)["vocab"]
|
||||
vocab_sets = {c: set(vocab[c].keys()) for c in disc_cols}
|
||||
vocab_sets = {c: set(vocab.get(c, {}).keys()) for c in disc_cols}
|
||||
|
||||
cont_stats = init_stats(cont_cols)
|
||||
disc_invalid = {c: 0 for c in disc_cols}
|
||||
missing_generated = {c: 0 for c in disc_cols}
|
||||
rows = 0
|
||||
|
||||
with open_csv(args.generated) as f:
|
||||
@@ -172,7 +181,11 @@ def main():
|
||||
if ref_paths:
|
||||
pass
|
||||
for c in disc_cols:
|
||||
if row[c] not in vocab_sets[c]:
|
||||
tok = row.get(c, None)
|
||||
if tok is None:
|
||||
missing_generated[c] += 1
|
||||
continue
|
||||
if tok not in vocab_sets[c]:
|
||||
disc_invalid[c] += 1
|
||||
|
||||
cont_summary = finalize_stats(cont_stats)
|
||||
@@ -192,6 +205,7 @@ def main():
|
||||
"continuous_summary": cont_summary,
|
||||
"continuous_error": cont_err,
|
||||
"discrete_invalid_counts": disc_invalid,
|
||||
"missing_generated_columns": {k: v for k, v in missing_generated.items() if v > 0},
|
||||
}
|
||||
|
||||
# Optional richer metrics using reference data
|
||||
@@ -212,7 +226,7 @@ def main():
|
||||
except Exception:
|
||||
gen_cont[c].append(0.0)
|
||||
for c in disc_cols:
|
||||
tok = row[c]
|
||||
tok = row.get(c, "")
|
||||
gen_disc[c][tok] = gen_disc[c].get(tok, 0) + 1
|
||||
|
||||
loaded = 0
|
||||
@@ -228,7 +242,7 @@ def main():
|
||||
except Exception:
|
||||
ref_cont[c].append(0.0)
|
||||
for c in disc_cols:
|
||||
tok = row[c]
|
||||
tok = row.get(c, "")
|
||||
ref_disc[c][tok] = ref_disc[c].get(tok, 0) + 1
|
||||
loaded += 1
|
||||
if args.max_rows and loaded >= args.max_rows:
|
||||
|
||||
Reference in New Issue
Block a user