From c3fb07ef11d16ab3687391fe12bcadbaec143331 Mon Sep 17 00:00:00 2001
From: MingzheYang <zhepro88@gmail.com>
Date: Wed, 28 Jan 2026 00:35:00 +0800
Subject: [PATCH] Add soft bounds, per-feature scaling, and full pipeline
 runner

---
 docs/README.md                      |   1 +
 docs/decisions.md                   |   7 ++
 example/config.json                 |  16 ++++-
 example/config_no_temporal.json     |  16 ++++-
 example/config_temporal_strong.json |  16 ++++-
 example/export_samples.py           |  24 ++++++-
 example/run_all_full.py             | 107 ++++++++++++++++++++++++++++
 report.md                           |   1 +
 8 files changed, 183 insertions(+), 5 deletions(-)
 create mode 100644 example/run_all_full.py

diff --git a/docs/README.md b/docs/README.md
index dc6b8d4..15d8cd2 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -13,3 +13,4 @@ Conventions:
 
 Tools:
 - `example/diagnose_ks.py` for per-feature KS + CDF plots.
+- `example/run_all_full.py` for one-command full pipeline + diagnostics.
diff --git a/docs/decisions.md b/docs/decisions.md
index ca8858f..cd3930b 100644
--- a/docs/decisions.md
+++ b/docs/decisions.md
@@ -55,3 +55,10 @@
 - **Files**:
   - `example/data_utils.py`
   - `example/export_samples.py`
+
+## 2026-01-27 — Soft bounds + post-scale for boundary pile-up
+- **Decision**: Replace hard sigmoid bounds with soft tanh bounds and allow per-feature post-scaling.
+- **Why**: Many continuous features collapsed to max bound (KS=1.0).
+- **Files**:
+  - `example/export_samples.py`
+  - `example/config.json`
diff --git a/example/config.json b/example/config.json
index 0e730fe..30d31b5 100644
--- a/example/config.json
+++ b/example/config.json
@@ -44,7 +44,21 @@
   "cont_clamp_x0": 5.0,
   "use_quantile_transform": true,
   "quantile_bins": 1001,
-  "cont_bound_mode": "sigmoid",
+  "cont_bound_mode": "soft_tanh",
+  "cont_bound_strength": 2.0,
+  "cont_post_scale": {
+    "P1_B4002": 0.8,
+    "P1_B400B": 0.8,
+    "P1_FT02Z": 0.8,
+    "P1_PCV01D": 0.8,
+    "P1_PCV01Z": 0.8,
+    "P1_PCV02Z": 0.8,
+    "P2_24Vdc": 0.8,
+    "P2_MSD": 0.8,
+    "P3_LCP01D": 0.8,
+    "P4_ST_PT01": 0.8,
+    "P4_ST_TT01": 0.8
+  },
   "shuffle_buffer": 256,
   "use_temporal_stage1": true,
   "temporal_hidden_dim": 256,
diff --git a/example/config_no_temporal.json b/example/config_no_temporal.json
index a061ba4..1e6a9a8 100644
--- a/example/config_no_temporal.json
+++ b/example/config_no_temporal.json
@@ -44,7 +44,21 @@
   "cont_clamp_x0": 5.0,
   "use_quantile_transform": true,
   "quantile_bins": 1001,
-  "cont_bound_mode": "sigmoid",
+  "cont_bound_mode": "soft_tanh",
+  "cont_bound_strength": 2.0,
+  "cont_post_scale": {
+    "P1_B4002": 0.8,
+    "P1_B400B": 0.8,
+    "P1_FT02Z": 0.8,
+    "P1_PCV01D": 0.8,
+    "P1_PCV01Z": 0.8,
+    "P1_PCV02Z": 0.8,
+    "P2_24Vdc": 0.8,
+    "P2_MSD": 0.8,
+    "P3_LCP01D": 0.8,
+    "P4_ST_PT01": 0.8,
+    "P4_ST_TT01": 0.8
+  },
   "shuffle_buffer": 1024,
   "use_temporal_stage1": false,
   "sample_batch_size": 4,
diff --git a/example/config_temporal_strong.json b/example/config_temporal_strong.json
index eb7c960..7bea30e 100644
--- a/example/config_temporal_strong.json
+++ b/example/config_temporal_strong.json
@@ -44,7 +44,21 @@
   "cont_clamp_x0": 5.0,
   "use_quantile_transform": true,
   "quantile_bins": 1001,
-  "cont_bound_mode": "sigmoid",
+  "cont_bound_mode": "soft_tanh",
+  "cont_bound_strength": 2.0,
+  "cont_post_scale": {
+    "P1_B4002": 0.8,
+    "P1_B400B": 0.8,
+    "P1_FT02Z": 0.8,
+    "P1_PCV01D": 0.8,
+    "P1_PCV01Z": 0.8,
+    "P1_PCV02Z": 0.8,
+    "P2_24Vdc": 0.8,
+    "P2_MSD": 0.8,
+    "P3_LCP01D": 0.8,
+    "P4_ST_PT01": 0.8,
+    "P4_ST_TT01": 0.8
+  },
   "shuffle_buffer": 1024,
   "use_temporal_stage1": true,
   "temporal_hidden_dim": 512,
diff --git a/example/export_samples.py b/example/export_samples.py
index 680dec2..ab59fc3 100644
--- a/example/export_samples.py
+++ b/example/export_samples.py
@@ -144,6 +144,8 @@ def main():
     cont_clamp_x0 = float(cfg.get("cont_clamp_x0", 0.0))
     use_quantile = bool(cfg.get("use_quantile_transform", False))
     cont_bound_mode = str(cfg.get("cont_bound_mode", "clamp"))
+    cont_bound_strength = float(cfg.get("cont_bound_strength", 1.0))
+    cont_post_scale = cfg.get("cont_post_scale", {}) if isinstance(cfg.get("cont_post_scale", {}), dict) else {}
     use_temporal_stage1 = bool(cfg.get("use_temporal_stage1", False))
     temporal_hidden_dim = int(cfg.get("temporal_hidden_dim", 256))
     temporal_num_layers = int(cfg.get("temporal_num_layers", 1))
@@ -287,10 +289,28 @@ def main():
             hi = vmax.get(c, None)
             if lo is None or hi is None:
                 continue
+            lo = float(lo)
+            hi = float(hi)
             if cont_bound_mode == "sigmoid":
-                x_cont[:, :, i] = float(lo) + (float(hi) - float(lo)) * torch.sigmoid(x_cont[:, :, i])
+                x_cont[:, :, i] = lo + (hi - lo) * torch.sigmoid(x_cont[:, :, i])
+            elif cont_bound_mode == "soft_tanh":
+                # Soft bound without hard piling at edges
+                mid = 0.5 * (lo + hi)
+                half = 0.5 * (hi - lo)
+                denom = cont_bound_strength if cont_bound_strength > 0 else 1.0
+                x_cont[:, :, i] = mid + half * torch.tanh(x_cont[:, :, i] / denom)
             else:
-                x_cont[:, :, i] = torch.clamp(x_cont[:, :, i], float(lo), float(hi))
+                x_cont[:, :, i] = torch.clamp(x_cont[:, :, i], lo, hi)
+
+    # optional post-scaling for problematic features
+    if cont_post_scale:
+        for i, c in enumerate(cont_cols):
+            if c in cont_post_scale:
+                try:
+                    scale = float(cont_post_scale[c])
+                except Exception:
+                    scale = 1.0
+                x_cont[:, :, i] = x_cont[:, :, i] * scale
 
     header = read_header(data_path)
     out_cols = [c for c in header if c != time_col or args.include_time]
diff --git a/example/run_all_full.py b/example/run_all_full.py
new file mode 100644
index 0000000..c04920c
--- /dev/null
+++ b/example/run_all_full.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+"""Run full pipeline + diagnostics in one command."""
+
+import argparse
+import json
+import subprocess
+import sys
+from pathlib import Path
+
+from platform_utils import safe_path, is_windows, resolve_path
+
+
+def run(cmd):
+    print("running:", " ".join(cmd))
+    cmd = [safe_path(arg) for arg in cmd]
+    if is_windows():
+        subprocess.run(cmd, check=True, shell=False)
+    else:
+        subprocess.run(cmd, check=True)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Run prepare -> train -> export -> eval -> diagnostics.")
+    base_dir = Path(__file__).resolve().parent
+    parser.add_argument("--config", default=str(base_dir / "config.json"))
+    parser.add_argument("--device", default="auto", help="cpu, cuda, or auto")
+    parser.add_argument("--skip-prepare", action="store_true")
+    parser.add_argument("--skip-train", action="store_true")
+    parser.add_argument("--skip-export", action="store_true")
+    parser.add_argument("--skip-eval", action="store_true")
+    parser.add_argument("--skip-diagnose", action="store_true")
+    return parser.parse_args()
+
+
+def resolve_config(base_dir: Path, cfg_arg: str) -> Path:
+    config_path = Path(cfg_arg)
+    if config_path.is_absolute():
+        return Path(resolve_path(config_path.parent, config_path))
+    candidate = base_dir / config_path
+    if candidate.exists():
+        return Path(resolve_path(candidate.parent, candidate))
+    if config_path.exists():
+        return Path(resolve_path(config_path.parent, config_path))
+    return Path(resolve_path(base_dir, config_path))
+
+
+def main():
+    args = parse_args()
+    base_dir = Path(__file__).resolve().parent
+    config_path = resolve_config(base_dir, args.config)
+    with open(config_path, "r", encoding="utf-8") as f:
+        cfg = json.load(f)
+
+    timesteps = cfg.get("timesteps", 200)
+    seq_len = cfg.get("sample_seq_len", cfg.get("seq_len", 64))
+    batch_size = cfg.get("sample_batch_size", cfg.get("batch_size", 2))
+    clip_k = cfg.get("clip_k", 5.0)
+
+    if not args.skip_prepare:
+        run([sys.executable, str(base_dir / "prepare_data.py")])
+    if not args.skip_train:
+        run([sys.executable, str(base_dir / "train.py"), "--config", str(config_path), "--device", args.device])
+    if not args.skip_export:
+        run(
+            [
+                sys.executable,
+                str(base_dir / "export_samples.py"),
+                "--include-time",
+                "--device",
+                args.device,
+                "--config",
+                str(config_path),
+                "--timesteps",
+                str(timesteps),
+                "--seq-len",
+                str(seq_len),
+                "--batch-size",
+                str(batch_size),
+                "--clip-k",
+                str(clip_k),
+                "--use-ema",
+            ]
+        )
+    if not args.skip_eval:
+        ref = cfg.get("data_glob") or cfg.get("data_path") or ""
+        if ref:
+            run([sys.executable, str(base_dir / "evaluate_generated.py"), "--reference", str(ref)])
+        else:
+            run([sys.executable, str(base_dir / "evaluate_generated.py")])
+        run([sys.executable, str(base_dir / "summary_metrics.py")])
+    if not args.skip_diagnose:
+        run(
+            [
+                sys.executable,
+                str(base_dir / "diagnose_ks.py"),
+                "--generated",
+                str(base_dir / "results" / "generated.csv"),
+                "--reference",
+                str(config_path),
+                "--top-k",
+                "10",
+            ]
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/report.md b/report.md
index 8db76b9..dde9b5e 100644
--- a/report.md
+++ b/report.md
@@ -194,6 +194,7 @@ Recent run (user-reported, Windows):
 
 ## 10. Automation / 自动化
 `example/run_all.py` runs all stages with config-driven paths.
+`example/run_all_full.py` runs prepare/train/export/eval + KS diagnostics in one command.
 `example/run_compare.py` can run a baseline vs temporal config and compute metric deltas.
 
 ---