Add soft bounds, per-feature scaling, and full pipeline runner
This commit is contained in:
@@ -13,3 +13,4 @@ Conventions:
|
||||
|
||||
Tools:
|
||||
- `example/diagnose_ks.py` for per-feature KS + CDF plots.
|
||||
- `example/run_all_full.py` for one-command full pipeline + diagnostics.
|
||||
|
||||
@@ -55,3 +55,10 @@
|
||||
- **Files**:
|
||||
- `example/data_utils.py`
|
||||
- `example/export_samples.py`
|
||||
|
||||
## 2026-01-27 — Soft bounds + post-scale for boundary pile-up
|
||||
- **Decision**: Replace hard sigmoid bounds with soft tanh bounds and allow per-feature post-scaling.
|
||||
- **Why**: Many continuous features collapsed to max bound (KS=1.0).
|
||||
- **Files**:
|
||||
- `example/export_samples.py`
|
||||
- `example/config.json`
|
||||
|
||||
@@ -44,7 +44,21 @@
|
||||
"cont_clamp_x0": 5.0,
|
||||
"use_quantile_transform": true,
|
||||
"quantile_bins": 1001,
|
||||
"cont_bound_mode": "sigmoid",
|
||||
"cont_bound_mode": "soft_tanh",
|
||||
"cont_bound_strength": 2.0,
|
||||
"cont_post_scale": {
|
||||
"P1_B4002": 0.8,
|
||||
"P1_B400B": 0.8,
|
||||
"P1_FT02Z": 0.8,
|
||||
"P1_PCV01D": 0.8,
|
||||
"P1_PCV01Z": 0.8,
|
||||
"P1_PCV02Z": 0.8,
|
||||
"P2_24Vdc": 0.8,
|
||||
"P2_MSD": 0.8,
|
||||
"P3_LCP01D": 0.8,
|
||||
"P4_ST_PT01": 0.8,
|
||||
"P4_ST_TT01": 0.8
|
||||
},
|
||||
"shuffle_buffer": 256,
|
||||
"use_temporal_stage1": true,
|
||||
"temporal_hidden_dim": 256,
|
||||
|
||||
@@ -44,7 +44,21 @@
|
||||
"cont_clamp_x0": 5.0,
|
||||
"use_quantile_transform": true,
|
||||
"quantile_bins": 1001,
|
||||
"cont_bound_mode": "sigmoid",
|
||||
"cont_bound_mode": "soft_tanh",
|
||||
"cont_bound_strength": 2.0,
|
||||
"cont_post_scale": {
|
||||
"P1_B4002": 0.8,
|
||||
"P1_B400B": 0.8,
|
||||
"P1_FT02Z": 0.8,
|
||||
"P1_PCV01D": 0.8,
|
||||
"P1_PCV01Z": 0.8,
|
||||
"P1_PCV02Z": 0.8,
|
||||
"P2_24Vdc": 0.8,
|
||||
"P2_MSD": 0.8,
|
||||
"P3_LCP01D": 0.8,
|
||||
"P4_ST_PT01": 0.8,
|
||||
"P4_ST_TT01": 0.8
|
||||
},
|
||||
"shuffle_buffer": 1024,
|
||||
"use_temporal_stage1": false,
|
||||
"sample_batch_size": 4,
|
||||
|
||||
@@ -44,7 +44,21 @@
|
||||
"cont_clamp_x0": 5.0,
|
||||
"use_quantile_transform": true,
|
||||
"quantile_bins": 1001,
|
||||
"cont_bound_mode": "sigmoid",
|
||||
"cont_bound_mode": "soft_tanh",
|
||||
"cont_bound_strength": 2.0,
|
||||
"cont_post_scale": {
|
||||
"P1_B4002": 0.8,
|
||||
"P1_B400B": 0.8,
|
||||
"P1_FT02Z": 0.8,
|
||||
"P1_PCV01D": 0.8,
|
||||
"P1_PCV01Z": 0.8,
|
||||
"P1_PCV02Z": 0.8,
|
||||
"P2_24Vdc": 0.8,
|
||||
"P2_MSD": 0.8,
|
||||
"P3_LCP01D": 0.8,
|
||||
"P4_ST_PT01": 0.8,
|
||||
"P4_ST_TT01": 0.8
|
||||
},
|
||||
"shuffle_buffer": 1024,
|
||||
"use_temporal_stage1": true,
|
||||
"temporal_hidden_dim": 512,
|
||||
|
||||
@@ -144,6 +144,8 @@ def main():
|
||||
cont_clamp_x0 = float(cfg.get("cont_clamp_x0", 0.0))
|
||||
use_quantile = bool(cfg.get("use_quantile_transform", False))
|
||||
cont_bound_mode = str(cfg.get("cont_bound_mode", "clamp"))
|
||||
cont_bound_strength = float(cfg.get("cont_bound_strength", 1.0))
|
||||
cont_post_scale = cfg.get("cont_post_scale", {}) if isinstance(cfg.get("cont_post_scale", {}), dict) else {}
|
||||
use_temporal_stage1 = bool(cfg.get("use_temporal_stage1", False))
|
||||
temporal_hidden_dim = int(cfg.get("temporal_hidden_dim", 256))
|
||||
temporal_num_layers = int(cfg.get("temporal_num_layers", 1))
|
||||
@@ -287,10 +289,28 @@ def main():
|
||||
hi = vmax.get(c, None)
|
||||
if lo is None or hi is None:
|
||||
continue
|
||||
lo = float(lo)
|
||||
hi = float(hi)
|
||||
if cont_bound_mode == "sigmoid":
|
||||
x_cont[:, :, i] = float(lo) + (float(hi) - float(lo)) * torch.sigmoid(x_cont[:, :, i])
|
||||
x_cont[:, :, i] = lo + (hi - lo) * torch.sigmoid(x_cont[:, :, i])
|
||||
elif cont_bound_mode == "soft_tanh":
|
||||
# Soft bound without hard piling at edges
|
||||
mid = 0.5 * (lo + hi)
|
||||
half = 0.5 * (hi - lo)
|
||||
denom = cont_bound_strength if cont_bound_strength > 0 else 1.0
|
||||
x_cont[:, :, i] = mid + half * torch.tanh(x_cont[:, :, i] / denom)
|
||||
else:
|
||||
x_cont[:, :, i] = torch.clamp(x_cont[:, :, i], float(lo), float(hi))
|
||||
x_cont[:, :, i] = torch.clamp(x_cont[:, :, i], lo, hi)
|
||||
|
||||
# optional post-scaling for problematic features
|
||||
if cont_post_scale:
|
||||
for i, c in enumerate(cont_cols):
|
||||
if c in cont_post_scale:
|
||||
try:
|
||||
scale = float(cont_post_scale[c])
|
||||
except Exception:
|
||||
scale = 1.0
|
||||
x_cont[:, :, i] = x_cont[:, :, i] * scale
|
||||
|
||||
header = read_header(data_path)
|
||||
out_cols = [c for c in header if c != time_col or args.include_time]
|
||||
|
||||
107
example/run_all_full.py
Normal file
107
example/run_all_full.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Run full pipeline + diagnostics in one command."""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from platform_utils import safe_path, is_windows, resolve_path
|
||||
|
||||
|
||||
def run(cmd):
|
||||
print("running:", " ".join(cmd))
|
||||
cmd = [safe_path(arg) for arg in cmd]
|
||||
if is_windows():
|
||||
subprocess.run(cmd, check=True, shell=False)
|
||||
else:
|
||||
subprocess.run(cmd, check=True)
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Run prepare -> train -> export -> eval -> diagnostics.")
|
||||
base_dir = Path(__file__).resolve().parent
|
||||
parser.add_argument("--config", default=str(base_dir / "config.json"))
|
||||
parser.add_argument("--device", default="auto", help="cpu, cuda, or auto")
|
||||
parser.add_argument("--skip-prepare", action="store_true")
|
||||
parser.add_argument("--skip-train", action="store_true")
|
||||
parser.add_argument("--skip-export", action="store_true")
|
||||
parser.add_argument("--skip-eval", action="store_true")
|
||||
parser.add_argument("--skip-diagnose", action="store_true")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def resolve_config(base_dir: Path, cfg_arg: str) -> Path:
|
||||
config_path = Path(cfg_arg)
|
||||
if config_path.is_absolute():
|
||||
return Path(resolve_path(config_path.parent, config_path))
|
||||
candidate = base_dir / config_path
|
||||
if candidate.exists():
|
||||
return Path(resolve_path(candidate.parent, candidate))
|
||||
if config_path.exists():
|
||||
return Path(resolve_path(config_path.parent, config_path))
|
||||
return Path(resolve_path(base_dir, config_path))
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
base_dir = Path(__file__).resolve().parent
|
||||
config_path = resolve_config(base_dir, args.config)
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
cfg = json.load(f)
|
||||
|
||||
timesteps = cfg.get("timesteps", 200)
|
||||
seq_len = cfg.get("sample_seq_len", cfg.get("seq_len", 64))
|
||||
batch_size = cfg.get("sample_batch_size", cfg.get("batch_size", 2))
|
||||
clip_k = cfg.get("clip_k", 5.0)
|
||||
|
||||
if not args.skip_prepare:
|
||||
run([sys.executable, str(base_dir / "prepare_data.py")])
|
||||
if not args.skip_train:
|
||||
run([sys.executable, str(base_dir / "train.py"), "--config", str(config_path), "--device", args.device])
|
||||
if not args.skip_export:
|
||||
run(
|
||||
[
|
||||
sys.executable,
|
||||
str(base_dir / "export_samples.py"),
|
||||
"--include-time",
|
||||
"--device",
|
||||
args.device,
|
||||
"--config",
|
||||
str(config_path),
|
||||
"--timesteps",
|
||||
str(timesteps),
|
||||
"--seq-len",
|
||||
str(seq_len),
|
||||
"--batch-size",
|
||||
str(batch_size),
|
||||
"--clip-k",
|
||||
str(clip_k),
|
||||
"--use-ema",
|
||||
]
|
||||
)
|
||||
if not args.skip_eval:
|
||||
ref = cfg.get("data_glob") or cfg.get("data_path") or ""
|
||||
if ref:
|
||||
run([sys.executable, str(base_dir / "evaluate_generated.py"), "--reference", str(ref)])
|
||||
else:
|
||||
run([sys.executable, str(base_dir / "evaluate_generated.py")])
|
||||
run([sys.executable, str(base_dir / "summary_metrics.py")])
|
||||
if not args.skip_diagnose:
|
||||
run(
|
||||
[
|
||||
sys.executable,
|
||||
str(base_dir / "diagnose_ks.py"),
|
||||
"--generated",
|
||||
str(base_dir / "results" / "generated.csv"),
|
||||
"--reference",
|
||||
str(config_path),
|
||||
"--top-k",
|
||||
"10",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -194,6 +194,7 @@ Recent run (user-reported, Windows):
|
||||
|
||||
## 10. Automation / 自动化
|
||||
`example/run_all.py` runs all stages with config-driven paths.
|
||||
`example/run_all_full.py` runs prepare/train/export/eval + KS diagnostics in one command.
|
||||
`example/run_compare.py` can run a baseline vs temporal config and compute metric deltas.
|
||||
|
||||
---
|
||||
|
||||
Reference in New Issue
Block a user