Add soft bounds, per-feature scaling, and full pipeline runner

This commit is contained in:
2026-01-28 00:35:00 +08:00
parent f0f89abf4f
commit c3fb07ef11
8 changed files with 183 additions and 5 deletions

View File

@@ -44,7 +44,21 @@
"cont_clamp_x0": 5.0,
"use_quantile_transform": true,
"quantile_bins": 1001,
"cont_bound_mode": "sigmoid",
"cont_bound_mode": "soft_tanh",
"cont_bound_strength": 2.0,
"cont_post_scale": {
"P1_B4002": 0.8,
"P1_B400B": 0.8,
"P1_FT02Z": 0.8,
"P1_PCV01D": 0.8,
"P1_PCV01Z": 0.8,
"P1_PCV02Z": 0.8,
"P2_24Vdc": 0.8,
"P2_MSD": 0.8,
"P3_LCP01D": 0.8,
"P4_ST_PT01": 0.8,
"P4_ST_TT01": 0.8
},
"shuffle_buffer": 256,
"use_temporal_stage1": true,
"temporal_hidden_dim": 256,

View File

@@ -44,7 +44,21 @@
"cont_clamp_x0": 5.0,
"use_quantile_transform": true,
"quantile_bins": 1001,
"cont_bound_mode": "sigmoid",
"cont_bound_mode": "soft_tanh",
"cont_bound_strength": 2.0,
"cont_post_scale": {
"P1_B4002": 0.8,
"P1_B400B": 0.8,
"P1_FT02Z": 0.8,
"P1_PCV01D": 0.8,
"P1_PCV01Z": 0.8,
"P1_PCV02Z": 0.8,
"P2_24Vdc": 0.8,
"P2_MSD": 0.8,
"P3_LCP01D": 0.8,
"P4_ST_PT01": 0.8,
"P4_ST_TT01": 0.8
},
"shuffle_buffer": 1024,
"use_temporal_stage1": false,
"sample_batch_size": 4,

View File

@@ -44,7 +44,21 @@
"cont_clamp_x0": 5.0,
"use_quantile_transform": true,
"quantile_bins": 1001,
"cont_bound_mode": "sigmoid",
"cont_bound_mode": "soft_tanh",
"cont_bound_strength": 2.0,
"cont_post_scale": {
"P1_B4002": 0.8,
"P1_B400B": 0.8,
"P1_FT02Z": 0.8,
"P1_PCV01D": 0.8,
"P1_PCV01Z": 0.8,
"P1_PCV02Z": 0.8,
"P2_24Vdc": 0.8,
"P2_MSD": 0.8,
"P3_LCP01D": 0.8,
"P4_ST_PT01": 0.8,
"P4_ST_TT01": 0.8
},
"shuffle_buffer": 1024,
"use_temporal_stage1": true,
"temporal_hidden_dim": 512,

View File

@@ -144,6 +144,8 @@ def main():
cont_clamp_x0 = float(cfg.get("cont_clamp_x0", 0.0))
use_quantile = bool(cfg.get("use_quantile_transform", False))
cont_bound_mode = str(cfg.get("cont_bound_mode", "clamp"))
cont_bound_strength = float(cfg.get("cont_bound_strength", 1.0))
cont_post_scale = cfg.get("cont_post_scale", {}) if isinstance(cfg.get("cont_post_scale", {}), dict) else {}
use_temporal_stage1 = bool(cfg.get("use_temporal_stage1", False))
temporal_hidden_dim = int(cfg.get("temporal_hidden_dim", 256))
temporal_num_layers = int(cfg.get("temporal_num_layers", 1))
@@ -287,10 +289,28 @@ def main():
hi = vmax.get(c, None)
if lo is None or hi is None:
continue
lo = float(lo)
hi = float(hi)
if cont_bound_mode == "sigmoid":
x_cont[:, :, i] = float(lo) + (float(hi) - float(lo)) * torch.sigmoid(x_cont[:, :, i])
x_cont[:, :, i] = lo + (hi - lo) * torch.sigmoid(x_cont[:, :, i])
elif cont_bound_mode == "soft_tanh":
# Soft bound without hard piling at edges
mid = 0.5 * (lo + hi)
half = 0.5 * (hi - lo)
denom = cont_bound_strength if cont_bound_strength > 0 else 1.0
x_cont[:, :, i] = mid + half * torch.tanh(x_cont[:, :, i] / denom)
else:
x_cont[:, :, i] = torch.clamp(x_cont[:, :, i], float(lo), float(hi))
x_cont[:, :, i] = torch.clamp(x_cont[:, :, i], lo, hi)
# optional post-scaling for problematic features
if cont_post_scale:
for i, c in enumerate(cont_cols):
if c in cont_post_scale:
try:
scale = float(cont_post_scale[c])
except Exception:
scale = 1.0
x_cont[:, :, i] = x_cont[:, :, i] * scale
header = read_header(data_path)
out_cols = [c for c in header if c != time_col or args.include_time]

107
example/run_all_full.py Normal file
View File

@@ -0,0 +1,107 @@
#!/usr/bin/env python3
"""Run full pipeline + diagnostics in one command."""
import argparse
import json
import subprocess
import sys
from pathlib import Path
from platform_utils import safe_path, is_windows, resolve_path
def run(cmd):
print("running:", " ".join(cmd))
cmd = [safe_path(arg) for arg in cmd]
if is_windows():
subprocess.run(cmd, check=True, shell=False)
else:
subprocess.run(cmd, check=True)
def parse_args():
parser = argparse.ArgumentParser(description="Run prepare -> train -> export -> eval -> diagnostics.")
base_dir = Path(__file__).resolve().parent
parser.add_argument("--config", default=str(base_dir / "config.json"))
parser.add_argument("--device", default="auto", help="cpu, cuda, or auto")
parser.add_argument("--skip-prepare", action="store_true")
parser.add_argument("--skip-train", action="store_true")
parser.add_argument("--skip-export", action="store_true")
parser.add_argument("--skip-eval", action="store_true")
parser.add_argument("--skip-diagnose", action="store_true")
return parser.parse_args()
def resolve_config(base_dir: Path, cfg_arg: str) -> Path:
config_path = Path(cfg_arg)
if config_path.is_absolute():
return Path(resolve_path(config_path.parent, config_path))
candidate = base_dir / config_path
if candidate.exists():
return Path(resolve_path(candidate.parent, candidate))
if config_path.exists():
return Path(resolve_path(config_path.parent, config_path))
return Path(resolve_path(base_dir, config_path))
def main():
args = parse_args()
base_dir = Path(__file__).resolve().parent
config_path = resolve_config(base_dir, args.config)
with open(config_path, "r", encoding="utf-8") as f:
cfg = json.load(f)
timesteps = cfg.get("timesteps", 200)
seq_len = cfg.get("sample_seq_len", cfg.get("seq_len", 64))
batch_size = cfg.get("sample_batch_size", cfg.get("batch_size", 2))
clip_k = cfg.get("clip_k", 5.0)
if not args.skip_prepare:
run([sys.executable, str(base_dir / "prepare_data.py")])
if not args.skip_train:
run([sys.executable, str(base_dir / "train.py"), "--config", str(config_path), "--device", args.device])
if not args.skip_export:
run(
[
sys.executable,
str(base_dir / "export_samples.py"),
"--include-time",
"--device",
args.device,
"--config",
str(config_path),
"--timesteps",
str(timesteps),
"--seq-len",
str(seq_len),
"--batch-size",
str(batch_size),
"--clip-k",
str(clip_k),
"--use-ema",
]
)
if not args.skip_eval:
ref = cfg.get("data_glob") or cfg.get("data_path") or ""
if ref:
run([sys.executable, str(base_dir / "evaluate_generated.py"), "--reference", str(ref)])
else:
run([sys.executable, str(base_dir / "evaluate_generated.py")])
run([sys.executable, str(base_dir / "summary_metrics.py")])
if not args.skip_diagnose:
run(
[
sys.executable,
str(base_dir / "diagnose_ks.py"),
"--generated",
str(base_dir / "results" / "generated.csv"),
"--reference",
str(config_path),
"--top-k",
"10",
]
)
if __name__ == "__main__":
main()