Add soft bounds, per-feature scaling, and full pipeline runner

2026-01-28 00:35:00 +08:00
parent f0f89abf4f
commit c3fb07ef11
8 changed files with 183 additions and 5 deletions
--- a/example/export_samples.py
+++ b/example/export_samples.py
@@ -144,6 +144,8 @@ def main():
    cont_clamp_x0 = float(cfg.get("cont_clamp_x0", 0.0))
    use_quantile = bool(cfg.get("use_quantile_transform", False))
    cont_bound_mode = str(cfg.get("cont_bound_mode", "clamp"))
+    cont_bound_strength = float(cfg.get("cont_bound_strength", 1.0))
+    cont_post_scale = cfg.get("cont_post_scale", {}) if isinstance(cfg.get("cont_post_scale", {}), dict) else {}
    use_temporal_stage1 = bool(cfg.get("use_temporal_stage1", False))
    temporal_hidden_dim = int(cfg.get("temporal_hidden_dim", 256))
    temporal_num_layers = int(cfg.get("temporal_num_layers", 1))
@@ -287,10 +289,28 @@ def main():
            hi = vmax.get(c, None)
            if lo is None or hi is None:
                continue
+            lo = float(lo)
+            hi = float(hi)
            if cont_bound_mode == "sigmoid":
-                x_cont[:, :, i] = float(lo) + (float(hi) - float(lo)) * torch.sigmoid(x_cont[:, :, i])
+                x_cont[:, :, i] = lo + (hi - lo) * torch.sigmoid(x_cont[:, :, i])
+            elif cont_bound_mode == "soft_tanh":
+                # Soft bound without hard piling at edges
+                mid = 0.5 * (lo + hi)
+                half = 0.5 * (hi - lo)
+                denom = cont_bound_strength if cont_bound_strength > 0 else 1.0
+                x_cont[:, :, i] = mid + half * torch.tanh(x_cont[:, :, i] / denom)
            else:
-                x_cont[:, :, i] = torch.clamp(x_cont[:, :, i], float(lo), float(hi))
+                x_cont[:, :, i] = torch.clamp(x_cont[:, :, i], lo, hi)
+
+    # optional post-scaling for problematic features
+    if cont_post_scale:
+        for i, c in enumerate(cont_cols):
+            if c in cont_post_scale:
+                try:
+                    scale = float(cont_post_scale[c])
+                except Exception:
+                    scale = 1.0
+                x_cont[:, :, i] = x_cont[:, :, i] * scale

    header = read_header(data_path)
    out_cols = [c for c in header if c != time_col or args.include_time]