Add soft bounds, per-feature scaling, and full pipeline runner
This commit is contained in:
@@ -13,3 +13,4 @@ Conventions:
|
|||||||
|
|
||||||
Tools:
|
Tools:
|
||||||
- `example/diagnose_ks.py` for per-feature KS + CDF plots.
|
- `example/diagnose_ks.py` for per-feature KS + CDF plots.
|
||||||
|
- `example/run_all_full.py` for one-command full pipeline + diagnostics.
|
||||||
|
|||||||
@@ -55,3 +55,10 @@
|
|||||||
- **Files**:
|
- **Files**:
|
||||||
- `example/data_utils.py`
|
- `example/data_utils.py`
|
||||||
- `example/export_samples.py`
|
- `example/export_samples.py`
|
||||||
|
|
||||||
|
## 2026-01-27 — Soft bounds + post-scale for boundary pile-up
|
||||||
|
- **Decision**: Replace hard sigmoid bounds with soft tanh bounds and allow per-feature post-scaling.
|
||||||
|
- **Why**: Many continuous features collapsed to max bound (KS=1.0).
|
||||||
|
- **Files**:
|
||||||
|
- `example/export_samples.py`
|
||||||
|
- `example/config.json`
|
||||||
|
|||||||
@@ -44,7 +44,21 @@
|
|||||||
"cont_clamp_x0": 5.0,
|
"cont_clamp_x0": 5.0,
|
||||||
"use_quantile_transform": true,
|
"use_quantile_transform": true,
|
||||||
"quantile_bins": 1001,
|
"quantile_bins": 1001,
|
||||||
"cont_bound_mode": "sigmoid",
|
"cont_bound_mode": "soft_tanh",
|
||||||
|
"cont_bound_strength": 2.0,
|
||||||
|
"cont_post_scale": {
|
||||||
|
"P1_B4002": 0.8,
|
||||||
|
"P1_B400B": 0.8,
|
||||||
|
"P1_FT02Z": 0.8,
|
||||||
|
"P1_PCV01D": 0.8,
|
||||||
|
"P1_PCV01Z": 0.8,
|
||||||
|
"P1_PCV02Z": 0.8,
|
||||||
|
"P2_24Vdc": 0.8,
|
||||||
|
"P2_MSD": 0.8,
|
||||||
|
"P3_LCP01D": 0.8,
|
||||||
|
"P4_ST_PT01": 0.8,
|
||||||
|
"P4_ST_TT01": 0.8
|
||||||
|
},
|
||||||
"shuffle_buffer": 256,
|
"shuffle_buffer": 256,
|
||||||
"use_temporal_stage1": true,
|
"use_temporal_stage1": true,
|
||||||
"temporal_hidden_dim": 256,
|
"temporal_hidden_dim": 256,
|
||||||
|
|||||||
@@ -44,7 +44,21 @@
|
|||||||
"cont_clamp_x0": 5.0,
|
"cont_clamp_x0": 5.0,
|
||||||
"use_quantile_transform": true,
|
"use_quantile_transform": true,
|
||||||
"quantile_bins": 1001,
|
"quantile_bins": 1001,
|
||||||
"cont_bound_mode": "sigmoid",
|
"cont_bound_mode": "soft_tanh",
|
||||||
|
"cont_bound_strength": 2.0,
|
||||||
|
"cont_post_scale": {
|
||||||
|
"P1_B4002": 0.8,
|
||||||
|
"P1_B400B": 0.8,
|
||||||
|
"P1_FT02Z": 0.8,
|
||||||
|
"P1_PCV01D": 0.8,
|
||||||
|
"P1_PCV01Z": 0.8,
|
||||||
|
"P1_PCV02Z": 0.8,
|
||||||
|
"P2_24Vdc": 0.8,
|
||||||
|
"P2_MSD": 0.8,
|
||||||
|
"P3_LCP01D": 0.8,
|
||||||
|
"P4_ST_PT01": 0.8,
|
||||||
|
"P4_ST_TT01": 0.8
|
||||||
|
},
|
||||||
"shuffle_buffer": 1024,
|
"shuffle_buffer": 1024,
|
||||||
"use_temporal_stage1": false,
|
"use_temporal_stage1": false,
|
||||||
"sample_batch_size": 4,
|
"sample_batch_size": 4,
|
||||||
|
|||||||
@@ -44,7 +44,21 @@
|
|||||||
"cont_clamp_x0": 5.0,
|
"cont_clamp_x0": 5.0,
|
||||||
"use_quantile_transform": true,
|
"use_quantile_transform": true,
|
||||||
"quantile_bins": 1001,
|
"quantile_bins": 1001,
|
||||||
"cont_bound_mode": "sigmoid",
|
"cont_bound_mode": "soft_tanh",
|
||||||
|
"cont_bound_strength": 2.0,
|
||||||
|
"cont_post_scale": {
|
||||||
|
"P1_B4002": 0.8,
|
||||||
|
"P1_B400B": 0.8,
|
||||||
|
"P1_FT02Z": 0.8,
|
||||||
|
"P1_PCV01D": 0.8,
|
||||||
|
"P1_PCV01Z": 0.8,
|
||||||
|
"P1_PCV02Z": 0.8,
|
||||||
|
"P2_24Vdc": 0.8,
|
||||||
|
"P2_MSD": 0.8,
|
||||||
|
"P3_LCP01D": 0.8,
|
||||||
|
"P4_ST_PT01": 0.8,
|
||||||
|
"P4_ST_TT01": 0.8
|
||||||
|
},
|
||||||
"shuffle_buffer": 1024,
|
"shuffle_buffer": 1024,
|
||||||
"use_temporal_stage1": true,
|
"use_temporal_stage1": true,
|
||||||
"temporal_hidden_dim": 512,
|
"temporal_hidden_dim": 512,
|
||||||
|
|||||||
@@ -144,6 +144,8 @@ def main():
|
|||||||
cont_clamp_x0 = float(cfg.get("cont_clamp_x0", 0.0))
|
cont_clamp_x0 = float(cfg.get("cont_clamp_x0", 0.0))
|
||||||
use_quantile = bool(cfg.get("use_quantile_transform", False))
|
use_quantile = bool(cfg.get("use_quantile_transform", False))
|
||||||
cont_bound_mode = str(cfg.get("cont_bound_mode", "clamp"))
|
cont_bound_mode = str(cfg.get("cont_bound_mode", "clamp"))
|
||||||
|
cont_bound_strength = float(cfg.get("cont_bound_strength", 1.0))
|
||||||
|
cont_post_scale = cfg.get("cont_post_scale", {}) if isinstance(cfg.get("cont_post_scale", {}), dict) else {}
|
||||||
use_temporal_stage1 = bool(cfg.get("use_temporal_stage1", False))
|
use_temporal_stage1 = bool(cfg.get("use_temporal_stage1", False))
|
||||||
temporal_hidden_dim = int(cfg.get("temporal_hidden_dim", 256))
|
temporal_hidden_dim = int(cfg.get("temporal_hidden_dim", 256))
|
||||||
temporal_num_layers = int(cfg.get("temporal_num_layers", 1))
|
temporal_num_layers = int(cfg.get("temporal_num_layers", 1))
|
||||||
@@ -287,10 +289,28 @@ def main():
|
|||||||
hi = vmax.get(c, None)
|
hi = vmax.get(c, None)
|
||||||
if lo is None or hi is None:
|
if lo is None or hi is None:
|
||||||
continue
|
continue
|
||||||
|
lo = float(lo)
|
||||||
|
hi = float(hi)
|
||||||
if cont_bound_mode == "sigmoid":
|
if cont_bound_mode == "sigmoid":
|
||||||
x_cont[:, :, i] = float(lo) + (float(hi) - float(lo)) * torch.sigmoid(x_cont[:, :, i])
|
x_cont[:, :, i] = lo + (hi - lo) * torch.sigmoid(x_cont[:, :, i])
|
||||||
|
elif cont_bound_mode == "soft_tanh":
|
||||||
|
# Soft bound without hard piling at edges
|
||||||
|
mid = 0.5 * (lo + hi)
|
||||||
|
half = 0.5 * (hi - lo)
|
||||||
|
denom = cont_bound_strength if cont_bound_strength > 0 else 1.0
|
||||||
|
x_cont[:, :, i] = mid + half * torch.tanh(x_cont[:, :, i] / denom)
|
||||||
else:
|
else:
|
||||||
x_cont[:, :, i] = torch.clamp(x_cont[:, :, i], float(lo), float(hi))
|
x_cont[:, :, i] = torch.clamp(x_cont[:, :, i], lo, hi)
|
||||||
|
|
||||||
|
# optional post-scaling for problematic features
|
||||||
|
if cont_post_scale:
|
||||||
|
for i, c in enumerate(cont_cols):
|
||||||
|
if c in cont_post_scale:
|
||||||
|
try:
|
||||||
|
scale = float(cont_post_scale[c])
|
||||||
|
except Exception:
|
||||||
|
scale = 1.0
|
||||||
|
x_cont[:, :, i] = x_cont[:, :, i] * scale
|
||||||
|
|
||||||
header = read_header(data_path)
|
header = read_header(data_path)
|
||||||
out_cols = [c for c in header if c != time_col or args.include_time]
|
out_cols = [c for c in header if c != time_col or args.include_time]
|
||||||
|
|||||||
107
example/run_all_full.py
Normal file
107
example/run_all_full.py
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Run full pipeline + diagnostics in one command."""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from platform_utils import safe_path, is_windows, resolve_path
|
||||||
|
|
||||||
|
|
||||||
|
def run(cmd):
|
||||||
|
print("running:", " ".join(cmd))
|
||||||
|
cmd = [safe_path(arg) for arg in cmd]
|
||||||
|
if is_windows():
|
||||||
|
subprocess.run(cmd, check=True, shell=False)
|
||||||
|
else:
|
||||||
|
subprocess.run(cmd, check=True)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(description="Run prepare -> train -> export -> eval -> diagnostics.")
|
||||||
|
base_dir = Path(__file__).resolve().parent
|
||||||
|
parser.add_argument("--config", default=str(base_dir / "config.json"))
|
||||||
|
parser.add_argument("--device", default="auto", help="cpu, cuda, or auto")
|
||||||
|
parser.add_argument("--skip-prepare", action="store_true")
|
||||||
|
parser.add_argument("--skip-train", action="store_true")
|
||||||
|
parser.add_argument("--skip-export", action="store_true")
|
||||||
|
parser.add_argument("--skip-eval", action="store_true")
|
||||||
|
parser.add_argument("--skip-diagnose", action="store_true")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_config(base_dir: Path, cfg_arg: str) -> Path:
|
||||||
|
config_path = Path(cfg_arg)
|
||||||
|
if config_path.is_absolute():
|
||||||
|
return Path(resolve_path(config_path.parent, config_path))
|
||||||
|
candidate = base_dir / config_path
|
||||||
|
if candidate.exists():
|
||||||
|
return Path(resolve_path(candidate.parent, candidate))
|
||||||
|
if config_path.exists():
|
||||||
|
return Path(resolve_path(config_path.parent, config_path))
|
||||||
|
return Path(resolve_path(base_dir, config_path))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parse_args()
|
||||||
|
base_dir = Path(__file__).resolve().parent
|
||||||
|
config_path = resolve_config(base_dir, args.config)
|
||||||
|
with open(config_path, "r", encoding="utf-8") as f:
|
||||||
|
cfg = json.load(f)
|
||||||
|
|
||||||
|
timesteps = cfg.get("timesteps", 200)
|
||||||
|
seq_len = cfg.get("sample_seq_len", cfg.get("seq_len", 64))
|
||||||
|
batch_size = cfg.get("sample_batch_size", cfg.get("batch_size", 2))
|
||||||
|
clip_k = cfg.get("clip_k", 5.0)
|
||||||
|
|
||||||
|
if not args.skip_prepare:
|
||||||
|
run([sys.executable, str(base_dir / "prepare_data.py")])
|
||||||
|
if not args.skip_train:
|
||||||
|
run([sys.executable, str(base_dir / "train.py"), "--config", str(config_path), "--device", args.device])
|
||||||
|
if not args.skip_export:
|
||||||
|
run(
|
||||||
|
[
|
||||||
|
sys.executable,
|
||||||
|
str(base_dir / "export_samples.py"),
|
||||||
|
"--include-time",
|
||||||
|
"--device",
|
||||||
|
args.device,
|
||||||
|
"--config",
|
||||||
|
str(config_path),
|
||||||
|
"--timesteps",
|
||||||
|
str(timesteps),
|
||||||
|
"--seq-len",
|
||||||
|
str(seq_len),
|
||||||
|
"--batch-size",
|
||||||
|
str(batch_size),
|
||||||
|
"--clip-k",
|
||||||
|
str(clip_k),
|
||||||
|
"--use-ema",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
if not args.skip_eval:
|
||||||
|
ref = cfg.get("data_glob") or cfg.get("data_path") or ""
|
||||||
|
if ref:
|
||||||
|
run([sys.executable, str(base_dir / "evaluate_generated.py"), "--reference", str(ref)])
|
||||||
|
else:
|
||||||
|
run([sys.executable, str(base_dir / "evaluate_generated.py")])
|
||||||
|
run([sys.executable, str(base_dir / "summary_metrics.py")])
|
||||||
|
if not args.skip_diagnose:
|
||||||
|
run(
|
||||||
|
[
|
||||||
|
sys.executable,
|
||||||
|
str(base_dir / "diagnose_ks.py"),
|
||||||
|
"--generated",
|
||||||
|
str(base_dir / "results" / "generated.csv"),
|
||||||
|
"--reference",
|
||||||
|
str(config_path),
|
||||||
|
"--top-k",
|
||||||
|
"10",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -194,6 +194,7 @@ Recent run (user-reported, Windows):
|
|||||||
|
|
||||||
## 10. Automation / 自动化
|
## 10. Automation / 自动化
|
||||||
`example/run_all.py` runs all stages with config-driven paths.
|
`example/run_all.py` runs all stages with config-driven paths.
|
||||||
|
`example/run_all_full.py` runs prepare/train/export/eval + KS diagnostics in one command.
|
||||||
`example/run_compare.py` can run a baseline vs temporal config and compute metric deltas.
|
`example/run_compare.py` can run a baseline vs temporal config and compute metric deltas.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
Reference in New Issue
Block a user