update2
@@ -39,3 +39,12 @@
|
|||||||
- **Why**: Avoid blind reweighting and find the specific features causing KS to stay high.
|
- **Why**: Avoid blind reweighting and find the specific features causing KS to stay high.
|
||||||
- **Files**:
|
- **Files**:
|
||||||
- `example/diagnose_ks.py`
|
- `example/diagnose_ks.py`
|
||||||
|
|
||||||
|
## 2026-01-26 — Quantile transform + sigmoid bounds for continuous features
|
||||||
|
- **Decision**: Add optional quantile normalization (TabDDPM-style) and sigmoid-based bounds to reduce KS spikes.
|
||||||
|
- **Why**: KS failures are dominated by boundary pile-up and tail mismatch.
|
||||||
|
- **Files**:
|
||||||
|
- `example/data_utils.py`
|
||||||
|
- `example/prepare_data.py`
|
||||||
|
- `example/export_samples.py`
|
||||||
|
- `example/config.json`
|
||||||
|
|||||||
@@ -42,6 +42,9 @@
|
|||||||
"cont_loss_eps": 1e-6,
|
"cont_loss_eps": 1e-6,
|
||||||
"cont_target": "x0",
|
"cont_target": "x0",
|
||||||
"cont_clamp_x0": 5.0,
|
"cont_clamp_x0": 5.0,
|
||||||
|
"use_quantile_transform": true,
|
||||||
|
"quantile_bins": 1001,
|
||||||
|
"cont_bound_mode": "sigmoid",
|
||||||
"shuffle_buffer": 256,
|
"shuffle_buffer": 256,
|
||||||
"use_temporal_stage1": true,
|
"use_temporal_stage1": true,
|
||||||
"temporal_hidden_dim": 256,
|
"temporal_hidden_dim": 256,
|
||||||
|
|||||||
@@ -42,6 +42,9 @@
|
|||||||
"cont_loss_eps": 1e-6,
|
"cont_loss_eps": 1e-6,
|
||||||
"cont_target": "x0",
|
"cont_target": "x0",
|
||||||
"cont_clamp_x0": 5.0,
|
"cont_clamp_x0": 5.0,
|
||||||
|
"use_quantile_transform": true,
|
||||||
|
"quantile_bins": 1001,
|
||||||
|
"cont_bound_mode": "sigmoid",
|
||||||
"shuffle_buffer": 1024,
|
"shuffle_buffer": 1024,
|
||||||
"use_temporal_stage1": false,
|
"use_temporal_stage1": false,
|
||||||
"sample_batch_size": 4,
|
"sample_batch_size": 4,
|
||||||
|
|||||||
@@ -42,6 +42,9 @@
|
|||||||
"cont_loss_eps": 1e-6,
|
"cont_loss_eps": 1e-6,
|
||||||
"cont_target": "x0",
|
"cont_target": "x0",
|
||||||
"cont_clamp_x0": 5.0,
|
"cont_clamp_x0": 5.0,
|
||||||
|
"use_quantile_transform": true,
|
||||||
|
"quantile_bins": 1001,
|
||||||
|
"cont_bound_mode": "sigmoid",
|
||||||
"shuffle_buffer": 1024,
|
"shuffle_buffer": 1024,
|
||||||
"use_temporal_stage1": true,
|
"use_temporal_stage1": true,
|
||||||
"temporal_hidden_dim": 512,
|
"temporal_hidden_dim": 512,
|
||||||
|
|||||||
@@ -138,6 +138,7 @@ def compute_cont_stats(
|
|||||||
cont_cols: List[str],
|
cont_cols: List[str],
|
||||||
max_rows: Optional[int] = None,
|
max_rows: Optional[int] = None,
|
||||||
transforms: Optional[Dict[str, str]] = None,
|
transforms: Optional[Dict[str, str]] = None,
|
||||||
|
quantile_bins: Optional[int] = None,
|
||||||
):
|
):
|
||||||
"""Compute stats on (optionally transformed) values. Returns raw + transformed stats."""
|
"""Compute stats on (optionally transformed) values. Returns raw + transformed stats."""
|
||||||
# First pass (raw) for metadata and raw mean/std
|
# First pass (raw) for metadata and raw mean/std
|
||||||
@@ -147,10 +148,11 @@ def compute_cont_stats(
|
|||||||
if transforms is None:
|
if transforms is None:
|
||||||
transforms = {c: "none" for c in cont_cols}
|
transforms = {c: "none" for c in cont_cols}
|
||||||
|
|
||||||
# Second pass for transformed mean/std
|
# Second pass for transformed mean/std (and optional quantiles)
|
||||||
count = {c: 0 for c in cont_cols}
|
count = {c: 0 for c in cont_cols}
|
||||||
mean = {c: 0.0 for c in cont_cols}
|
mean = {c: 0.0 for c in cont_cols}
|
||||||
m2 = {c: 0.0 for c in cont_cols}
|
m2 = {c: 0.0 for c in cont_cols}
|
||||||
|
quantile_values = {c: [] for c in cont_cols} if quantile_bins and quantile_bins > 1 else None
|
||||||
for i, row in enumerate(iter_rows(path)):
|
for i, row in enumerate(iter_rows(path)):
|
||||||
for c in cont_cols:
|
for c in cont_cols:
|
||||||
raw_val = row[c]
|
raw_val = row[c]
|
||||||
@@ -161,6 +163,8 @@ def compute_cont_stats(
|
|||||||
if x < 0:
|
if x < 0:
|
||||||
x = 0.0
|
x = 0.0
|
||||||
x = math.log1p(x)
|
x = math.log1p(x)
|
||||||
|
if quantile_values is not None:
|
||||||
|
quantile_values[c].append(x)
|
||||||
n = count[c] + 1
|
n = count[c] + 1
|
||||||
delta = x - mean[c]
|
delta = x - mean[c]
|
||||||
mean[c] += delta / n
|
mean[c] += delta / n
|
||||||
@@ -178,6 +182,25 @@ def compute_cont_stats(
|
|||||||
var = 0.0
|
var = 0.0
|
||||||
std[c] = var ** 0.5 if var > 0 else 1.0
|
std[c] = var ** 0.5 if var > 0 else 1.0
|
||||||
|
|
||||||
|
quantile_probs = None
|
||||||
|
quantile_table = None
|
||||||
|
if quantile_values is not None:
|
||||||
|
quantile_probs = [i / (quantile_bins - 1) for i in range(quantile_bins)]
|
||||||
|
quantile_table = {}
|
||||||
|
for c in cont_cols:
|
||||||
|
vals = quantile_values[c]
|
||||||
|
if not vals:
|
||||||
|
quantile_table[c] = [0.0 for _ in quantile_probs]
|
||||||
|
continue
|
||||||
|
vals.sort()
|
||||||
|
n = len(vals)
|
||||||
|
qvals = []
|
||||||
|
for p in quantile_probs:
|
||||||
|
idx = int(round(p * (n - 1)))
|
||||||
|
idx = max(0, min(n - 1, idx))
|
||||||
|
qvals.append(float(vals[idx]))
|
||||||
|
quantile_table[c] = qvals
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"mean": mean,
|
"mean": mean,
|
||||||
"std": std,
|
"std": std,
|
||||||
@@ -191,6 +214,8 @@ def compute_cont_stats(
|
|||||||
"skew": raw["skew"],
|
"skew": raw["skew"],
|
||||||
"all_pos": raw["all_pos"],
|
"all_pos": raw["all_pos"],
|
||||||
"max_rows": max_rows,
|
"max_rows": max_rows,
|
||||||
|
"quantile_probs": quantile_probs,
|
||||||
|
"quantile_values": quantile_table,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -249,6 +274,9 @@ def normalize_cont(
|
|||||||
mean: Dict[str, float],
|
mean: Dict[str, float],
|
||||||
std: Dict[str, float],
|
std: Dict[str, float],
|
||||||
transforms: Optional[Dict[str, str]] = None,
|
transforms: Optional[Dict[str, str]] = None,
|
||||||
|
quantile_probs: Optional[List[float]] = None,
|
||||||
|
quantile_values: Optional[Dict[str, List[float]]] = None,
|
||||||
|
use_quantile: bool = False,
|
||||||
):
|
):
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
@@ -256,11 +284,64 @@ def normalize_cont(
|
|||||||
for i, c in enumerate(cont_cols):
|
for i, c in enumerate(cont_cols):
|
||||||
if transforms.get(c) == "log1p":
|
if transforms.get(c) == "log1p":
|
||||||
x[:, :, i] = torch.log1p(torch.clamp(x[:, :, i], min=0))
|
x[:, :, i] = torch.log1p(torch.clamp(x[:, :, i], min=0))
|
||||||
|
if use_quantile:
|
||||||
|
if not quantile_probs or not quantile_values:
|
||||||
|
raise ValueError("use_quantile_transform enabled but quantile stats missing")
|
||||||
|
x = apply_quantile_transform(x, cont_cols, quantile_probs, quantile_values)
|
||||||
mean_t = torch.tensor([mean[c] for c in cont_cols], dtype=x.dtype, device=x.device)
|
mean_t = torch.tensor([mean[c] for c in cont_cols], dtype=x.dtype, device=x.device)
|
||||||
std_t = torch.tensor([std[c] for c in cont_cols], dtype=x.dtype, device=x.device)
|
std_t = torch.tensor([std[c] for c in cont_cols], dtype=x.dtype, device=x.device)
|
||||||
return (x - mean_t) / std_t
|
return (x - mean_t) / std_t
|
||||||
|
|
||||||
|
|
||||||
|
def _normal_cdf(x):
|
||||||
|
import torch
|
||||||
|
return 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
|
||||||
|
|
||||||
|
|
||||||
|
def _normal_ppf(p):
|
||||||
|
import torch
|
||||||
|
eps = 1e-6
|
||||||
|
p = torch.clamp(p, eps, 1.0 - eps)
|
||||||
|
return math.sqrt(2.0) * torch.erfinv(2.0 * p - 1.0)
|
||||||
|
|
||||||
|
|
||||||
|
def apply_quantile_transform(x, cont_cols, quantile_probs, quantile_values):
|
||||||
|
import torch
|
||||||
|
probs_t = torch.tensor(quantile_probs, dtype=x.dtype, device=x.device)
|
||||||
|
for i, c in enumerate(cont_cols):
|
||||||
|
q_vals = torch.tensor(quantile_values[c], dtype=x.dtype, device=x.device)
|
||||||
|
v = x[:, :, i]
|
||||||
|
idx = torch.bucketize(v, q_vals)
|
||||||
|
idx = torch.clamp(idx, 1, q_vals.numel() - 1)
|
||||||
|
x0 = q_vals[idx - 1]
|
||||||
|
x1 = q_vals[idx]
|
||||||
|
p0 = probs_t[idx - 1]
|
||||||
|
p1 = probs_t[idx]
|
||||||
|
denom = torch.where((x1 - x0) == 0, torch.ones_like(x1 - x0), (x1 - x0))
|
||||||
|
p = p0 + (v - x0) * (p1 - p0) / denom
|
||||||
|
x[:, :, i] = _normal_ppf(p)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
def inverse_quantile_transform(x, cont_cols, quantile_probs, quantile_values):
|
||||||
|
import torch
|
||||||
|
probs_t = torch.tensor(quantile_probs, dtype=x.dtype, device=x.device)
|
||||||
|
for i, c in enumerate(cont_cols):
|
||||||
|
q_vals = torch.tensor(quantile_values[c], dtype=x.dtype, device=x.device)
|
||||||
|
z = x[:, :, i]
|
||||||
|
p = _normal_cdf(z)
|
||||||
|
idx = torch.bucketize(p, probs_t)
|
||||||
|
idx = torch.clamp(idx, 1, probs_t.numel() - 1)
|
||||||
|
p0 = probs_t[idx - 1]
|
||||||
|
p1 = probs_t[idx]
|
||||||
|
x0 = q_vals[idx - 1]
|
||||||
|
x1 = q_vals[idx]
|
||||||
|
denom = torch.where((p1 - p0) == 0, torch.ones_like(p1 - p0), (p1 - p0))
|
||||||
|
v = x0 + (p - p0) * (x1 - x0) / denom
|
||||||
|
x[:, :, i] = v
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
def windowed_batches(
|
def windowed_batches(
|
||||||
path: Union[str, List[str]],
|
path: Union[str, List[str]],
|
||||||
cont_cols: List[str],
|
cont_cols: List[str],
|
||||||
@@ -273,6 +354,9 @@ def windowed_batches(
|
|||||||
max_batches: Optional[int] = None,
|
max_batches: Optional[int] = None,
|
||||||
return_file_id: bool = False,
|
return_file_id: bool = False,
|
||||||
transforms: Optional[Dict[str, str]] = None,
|
transforms: Optional[Dict[str, str]] = None,
|
||||||
|
quantile_probs: Optional[List[float]] = None,
|
||||||
|
quantile_values: Optional[Dict[str, List[float]]] = None,
|
||||||
|
use_quantile: bool = False,
|
||||||
shuffle_buffer: int = 0,
|
shuffle_buffer: int = 0,
|
||||||
):
|
):
|
||||||
import torch
|
import torch
|
||||||
@@ -316,7 +400,16 @@ def windowed_batches(
|
|||||||
if len(batch_cont) == batch_size:
|
if len(batch_cont) == batch_size:
|
||||||
x_cont = torch.tensor(batch_cont, dtype=torch.float32)
|
x_cont = torch.tensor(batch_cont, dtype=torch.float32)
|
||||||
x_disc = torch.tensor(batch_disc, dtype=torch.long)
|
x_disc = torch.tensor(batch_disc, dtype=torch.long)
|
||||||
x_cont = normalize_cont(x_cont, cont_cols, mean, std, transforms=transforms)
|
x_cont = normalize_cont(
|
||||||
|
x_cont,
|
||||||
|
cont_cols,
|
||||||
|
mean,
|
||||||
|
std,
|
||||||
|
transforms=transforms,
|
||||||
|
quantile_probs=quantile_probs,
|
||||||
|
quantile_values=quantile_values,
|
||||||
|
use_quantile=use_quantile,
|
||||||
|
)
|
||||||
if return_file_id:
|
if return_file_id:
|
||||||
x_file = torch.tensor(batch_file, dtype=torch.long)
|
x_file = torch.tensor(batch_file, dtype=torch.long)
|
||||||
yield x_cont, x_disc, x_file
|
yield x_cont, x_disc, x_file
|
||||||
@@ -344,7 +437,16 @@ def windowed_batches(
|
|||||||
import torch
|
import torch
|
||||||
x_cont = torch.tensor(batch_cont, dtype=torch.float32)
|
x_cont = torch.tensor(batch_cont, dtype=torch.float32)
|
||||||
x_disc = torch.tensor(batch_disc, dtype=torch.long)
|
x_disc = torch.tensor(batch_disc, dtype=torch.long)
|
||||||
x_cont = normalize_cont(x_cont, cont_cols, mean, std, transforms=transforms)
|
x_cont = normalize_cont(
|
||||||
|
x_cont,
|
||||||
|
cont_cols,
|
||||||
|
mean,
|
||||||
|
std,
|
||||||
|
transforms=transforms,
|
||||||
|
quantile_probs=quantile_probs,
|
||||||
|
quantile_values=quantile_values,
|
||||||
|
use_quantile=use_quantile,
|
||||||
|
)
|
||||||
if return_file_id:
|
if return_file_id:
|
||||||
x_file = torch.tensor(batch_file, dtype=torch.long)
|
x_file = torch.tensor(batch_file, dtype=torch.long)
|
||||||
yield x_cont, x_disc, x_file
|
yield x_cont, x_disc, x_file
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from typing import Dict, List
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
from data_utils import load_split
|
from data_utils import load_split, inverse_quantile_transform
|
||||||
from hybrid_diffusion import HybridDiffusionModel, TemporalGRUGenerator, cosine_beta_schedule
|
from hybrid_diffusion import HybridDiffusionModel, TemporalGRUGenerator, cosine_beta_schedule
|
||||||
from platform_utils import resolve_device, safe_path, ensure_dir, resolve_path
|
from platform_utils import resolve_device, safe_path, ensure_dir, resolve_path
|
||||||
|
|
||||||
@@ -112,6 +112,8 @@ def main():
|
|||||||
int_like = stats.get("int_like", {})
|
int_like = stats.get("int_like", {})
|
||||||
max_decimals = stats.get("max_decimals", {})
|
max_decimals = stats.get("max_decimals", {})
|
||||||
transforms = stats.get("transform", {})
|
transforms = stats.get("transform", {})
|
||||||
|
quantile_probs = stats.get("quantile_probs")
|
||||||
|
quantile_values = stats.get("quantile_values")
|
||||||
|
|
||||||
vocab_json = json.load(open(args.vocab_path, "r", encoding="utf-8"))
|
vocab_json = json.load(open(args.vocab_path, "r", encoding="utf-8"))
|
||||||
vocab = vocab_json["vocab"]
|
vocab = vocab_json["vocab"]
|
||||||
@@ -140,6 +142,8 @@ def main():
|
|||||||
raise SystemExit("use_condition enabled but no files matched data_glob: %s" % cfg_glob)
|
raise SystemExit("use_condition enabled but no files matched data_glob: %s" % cfg_glob)
|
||||||
cont_target = str(cfg.get("cont_target", "eps"))
|
cont_target = str(cfg.get("cont_target", "eps"))
|
||||||
cont_clamp_x0 = float(cfg.get("cont_clamp_x0", 0.0))
|
cont_clamp_x0 = float(cfg.get("cont_clamp_x0", 0.0))
|
||||||
|
use_quantile = bool(cfg.get("use_quantile_transform", False))
|
||||||
|
cont_bound_mode = str(cfg.get("cont_bound_mode", "clamp"))
|
||||||
use_temporal_stage1 = bool(cfg.get("use_temporal_stage1", False))
|
use_temporal_stage1 = bool(cfg.get("use_temporal_stage1", False))
|
||||||
temporal_hidden_dim = int(cfg.get("temporal_hidden_dim", 256))
|
temporal_hidden_dim = int(cfg.get("temporal_hidden_dim", 256))
|
||||||
temporal_num_layers = int(cfg.get("temporal_num_layers", 1))
|
temporal_num_layers = int(cfg.get("temporal_num_layers", 1))
|
||||||
@@ -270,15 +274,21 @@ def main():
|
|||||||
mean_vec = torch.tensor([mean[c] for c in cont_cols], dtype=x_cont.dtype)
|
mean_vec = torch.tensor([mean[c] for c in cont_cols], dtype=x_cont.dtype)
|
||||||
std_vec = torch.tensor([std[c] for c in cont_cols], dtype=x_cont.dtype)
|
std_vec = torch.tensor([std[c] for c in cont_cols], dtype=x_cont.dtype)
|
||||||
x_cont = x_cont * std_vec + mean_vec
|
x_cont = x_cont * std_vec + mean_vec
|
||||||
|
if use_quantile:
|
||||||
|
x_cont = inverse_quantile_transform(x_cont, cont_cols, quantile_probs, quantile_values)
|
||||||
for i, c in enumerate(cont_cols):
|
for i, c in enumerate(cont_cols):
|
||||||
if transforms.get(c) == "log1p":
|
if transforms.get(c) == "log1p":
|
||||||
x_cont[:, :, i] = torch.expm1(x_cont[:, :, i])
|
x_cont[:, :, i] = torch.expm1(x_cont[:, :, i])
|
||||||
# clamp to observed min/max per feature
|
# bound to observed min/max per feature
|
||||||
if vmin and vmax:
|
if vmin and vmax:
|
||||||
for i, c in enumerate(cont_cols):
|
for i, c in enumerate(cont_cols):
|
||||||
lo = vmin.get(c, None)
|
lo = vmin.get(c, None)
|
||||||
hi = vmax.get(c, None)
|
hi = vmax.get(c, None)
|
||||||
if lo is not None and hi is not None:
|
if lo is None or hi is None:
|
||||||
|
continue
|
||||||
|
if cont_bound_mode == "sigmoid":
|
||||||
|
x_cont[:, :, i] = float(lo) + (float(hi) - float(lo)) * torch.sigmoid(x_cont[:, :, i])
|
||||||
|
else:
|
||||||
x_cont[:, :, i] = torch.clamp(x_cont[:, :, i], float(lo), float(hi))
|
x_cont[:, :, i] = torch.clamp(x_cont[:, :, i], float(lo), float(hi))
|
||||||
|
|
||||||
header = read_header(data_path)
|
header = read_header(data_path)
|
||||||
|
|||||||
@@ -17,6 +17,14 @@ OUT_VOCAB = BASE_DIR / "results" / "disc_vocab.json"
|
|||||||
|
|
||||||
|
|
||||||
def main(max_rows: Optional[int] = None):
|
def main(max_rows: Optional[int] = None):
|
||||||
|
config_path = BASE_DIR / "config.json"
|
||||||
|
use_quantile = False
|
||||||
|
quantile_bins = None
|
||||||
|
if config_path.exists():
|
||||||
|
cfg = json.loads(config_path.read_text(encoding="utf-8"))
|
||||||
|
use_quantile = bool(cfg.get("use_quantile_transform", False))
|
||||||
|
quantile_bins = int(cfg.get("quantile_bins", 0)) if use_quantile else None
|
||||||
|
|
||||||
split = load_split(safe_path(SPLIT_PATH))
|
split = load_split(safe_path(SPLIT_PATH))
|
||||||
time_col = split.get("time_column", "time")
|
time_col = split.get("time_column", "time")
|
||||||
cont_cols = [c for c in split["continuous"] if c != time_col]
|
cont_cols = [c for c in split["continuous"] if c != time_col]
|
||||||
@@ -28,7 +36,13 @@ def main(max_rows: Optional[int] = None):
|
|||||||
data_paths = [safe_path(p) for p in data_paths]
|
data_paths = [safe_path(p) for p in data_paths]
|
||||||
|
|
||||||
transforms, _ = choose_cont_transforms(data_paths, cont_cols, max_rows=max_rows)
|
transforms, _ = choose_cont_transforms(data_paths, cont_cols, max_rows=max_rows)
|
||||||
cont_stats = compute_cont_stats(data_paths, cont_cols, max_rows=max_rows, transforms=transforms)
|
cont_stats = compute_cont_stats(
|
||||||
|
data_paths,
|
||||||
|
cont_cols,
|
||||||
|
max_rows=max_rows,
|
||||||
|
transforms=transforms,
|
||||||
|
quantile_bins=quantile_bins,
|
||||||
|
)
|
||||||
vocab, top_token = build_disc_stats(data_paths, disc_cols, max_rows=max_rows)
|
vocab, top_token = build_disc_stats(data_paths, disc_cols, max_rows=max_rows)
|
||||||
|
|
||||||
ensure_dir(OUT_STATS.parent)
|
ensure_dir(OUT_STATS.parent)
|
||||||
@@ -46,6 +60,8 @@ def main(max_rows: Optional[int] = None):
|
|||||||
"transform": cont_stats["transform"],
|
"transform": cont_stats["transform"],
|
||||||
"skew": cont_stats["skew"],
|
"skew": cont_stats["skew"],
|
||||||
"max_rows": cont_stats["max_rows"],
|
"max_rows": cont_stats["max_rows"],
|
||||||
|
"quantile_probs": cont_stats["quantile_probs"],
|
||||||
|
"quantile_values": cont_stats["quantile_values"],
|
||||||
},
|
},
|
||||||
f,
|
f,
|
||||||
indent=2,
|
indent=2,
|
||||||
|
|||||||
12
example/results/cdf_P1_B3004.svg
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="900" height="420">
|
||||||
|
<style>text{font-family:Arial,sans-serif;font-size:12px}</style>
|
||||||
|
<text x="50" y="30">CDF 비교: P1_B3004</text>
|
||||||
|
<line x1="50" y1="50" x2="50" y2="370" stroke="#333"/>
|
||||||
|
<line x1="50" y1="370" x2="850" y2="370" stroke="#333"/>
|
||||||
|
<polyline fill="none" stroke="#1f77b4" stroke-width="2" points="50,367 86,362 97,360 107,356 112,354 121,354 136,354 138,352 162,349 162,349 166,349 173,344 175,341 176,339 177,336 187,333 187,333 195,333 202,333 220,333 222,332 223,332 226,332 234,332 234,332 235,332 236,332 238,332 241,332 254,332 256,329 257,329 265,329 281,329 281,329 282,323 288,319 289,317 290,317 292,317 293,314 293,312 294,310 295,309 295,309 295,309 296,309 298,305 301,305 301,304 303,304 303,300 304,300 307,300 307,300 315,300 316,300 317,300 318,297 319,291 324,288 326,286 326,286 327,286 327,281 332,281 334,281 334,278 334,278 334,271 334,270 335,264 336,261 336,261 337,258 340,256 342,256 347,251 348,251 349,251 349,251 349,251 352,251 357,246 358,244 359,244 361,244 363,244 364,244 364,241 365,241 367,241 368,235 369,233 371,233 371,233 371,233 372,233 373,231 373,228 374,228 374,228 378,228 382,228 382,228 382,228 389,222 391,222 391,222 391,222 393,220 396,220 399,220 400,220 400,220 401,220 401,218 403,218 403,212 404,202 404,202 406,202 407,202 407,199 407,197 408,194 409,194 410,194 411,158 411,158 412,158 412,155 414,153 414,148 419,142 421,142 422,142 422,136 423,133 423,129 424,129 425,129 425,126 426,120 427,120 428,118 428,118 428,118 430,118 431,115 431,115 431,112 434,112 434,106 435,103 440,103 478,103 480,103 481,100 487,100 494,96 497,96 504,96 508,96 520,96 522,96 527,95 529,92 536,92 539,89 542,89 546,89 551,89 554,89 562,89 566,89 573,89 577,89 583,89 588,89 595,86 598,86 604,86 613,86 626,86 628,86 629,86 634,82 643,79 646,79 663,76 669,76 710,76 710,76 718,76 719,72 723,70 730,70 732,69 741,65 746,64 752,64 752,62 792,62 797,60 803,58 820,58 824,56 847,54 850,50"/>
|
||||||
|
<polyline fill="none" stroke="#d62728" stroke-width="2" points="332,364 378,358 429,352 472,346 506,340 511,334 517,327 517,321 529,50"/>
|
||||||
|
<text x="730" y="65" fill="#1f77b4">real</text>
|
||||||
|
<text x="730" y="80" fill="#d62728">generated</text>
|
||||||
|
<line x1="50" y1="50" x2="50" y2="370" stroke="#999" stroke-dasharray="4 3"/>
|
||||||
|
<line x1="850" y1="50" x2="850" y2="370" stroke="#999" stroke-dasharray="4 3"/>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 2.3 KiB |
12
example/results/cdf_P1_LIT01.svg
Normal file
|
After Width: | Height: | Size: 22 KiB |
12
example/results/cdf_P1_PCV02Z.svg
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="900" height="420">
|
||||||
|
<style>text{font-family:Arial,sans-serif;font-size:12px}</style>
|
||||||
|
<text x="50" y="30">CDF 비교: P1_PCV02Z</text>
|
||||||
|
<line x1="50" y1="50" x2="50" y2="370" stroke="#333"/>
|
||||||
|
<line x1="50" y1="370" x2="850" y2="370" stroke="#333"/>
|
||||||
|
<polyline fill="none" stroke="#1f77b4" stroke-width="2" points="50,370 72,370 83,370 87,370 94,328 100,328 105,328 105,328 109,328 111,328 112,328 116,177 122,177 125,177 126,177 127,177 133,177 134,177 138,126 139,126 146,126 149,126 161,126 183,126 716,125 738,125 750,125 755,125 761,108 764,108 765,108 766,108 772,107 777,107 783,64 788,64 794,64 805,53 811,53 816,53 827,51 850,50"/>
|
||||||
|
<polyline fill="none" stroke="#d62728" stroke-width="2" points="738,340 739,334 739,327 740,321 741,315 741,309 744,303 745,297 745,290 746,284 746,278 747,272 748,266 748,260 748,254 748,247 748,241 749,235 750,229 750,223 751,217 751,210 751,204 751,198 751,192 752,186 752,180 753,174 753,167 754,161 754,155 754,149 754,143 755,137 756,130 758,124 761,118 761,112 764,106 764,100 765,94 772,87 774,81 776,75 779,69 781,63 784,57 793,50"/>
|
||||||
|
<text x="730" y="65" fill="#1f77b4">real</text>
|
||||||
|
<text x="730" y="80" fill="#d62728">generated</text>
|
||||||
|
<line x1="50" y1="50" x2="50" y2="370" stroke="#999" stroke-dasharray="4 3"/>
|
||||||
|
<line x1="850" y1="50" x2="850" y2="370" stroke="#999" stroke-dasharray="4 3"/>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 1.4 KiB |
12
example/results/cdf_P2_MSD.svg
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="900" height="420">
|
||||||
|
<style>text{font-family:Arial,sans-serif;font-size:12px}</style>
|
||||||
|
<text x="50" y="30">CDF 비교: P2_MSD</text>
|
||||||
|
<line x1="50" y1="50" x2="50" y2="370" stroke="#333"/>
|
||||||
|
<line x1="50" y1="370" x2="850" y2="370" stroke="#333"/>
|
||||||
|
<polyline fill="none" stroke="#1f77b4" stroke-width="2" points="50,50"/>
|
||||||
|
<polyline fill="none" stroke="#d62728" stroke-width="2" points="50,50"/>
|
||||||
|
<text x="730" y="65" fill="#1f77b4">real</text>
|
||||||
|
<text x="730" y="80" fill="#d62728">generated</text>
|
||||||
|
<line x1="50" y1="50" x2="50" y2="370" stroke="#999" stroke-dasharray="4 3"/>
|
||||||
|
<line x1="50" y1="50" x2="50" y2="370" stroke="#999" stroke-dasharray="4 3"/>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 698 B |
12
example/results/cdf_P2_SIT01.svg
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="900" height="420">
|
||||||
|
<style>text{font-family:Arial,sans-serif;font-size:12px}</style>
|
||||||
|
<text x="50" y="30">CDF 비교: P2_SIT01</text>
|
||||||
|
<line x1="50" y1="50" x2="50" y2="370" stroke="#333"/>
|
||||||
|
<line x1="50" y1="370" x2="850" y2="370" stroke="#333"/>
|
||||||
|
<polyline fill="none" stroke="#1f77b4" stroke-width="2" points="50,370 61,370 73,370 84,370 96,370 107,370 119,370 131,370 142,370 154,370 165,370 177,369 189,368 200,366 212,364 218,364 223,360 235,356 247,351 252,351 258,343 270,332 276,332 281,319 287,319 293,304 299,304 305,289 310,289 316,273 322,273 328,256 339,236 345,236 351,212 357,212 363,184 365,184 368,184 374,156 380,156 386,129 392,129 394,129 397,107 403,107 409,90 415,90 418,90 421,77 426,77 432,68 438,68 444,61 450,61 455,56 461,56 467,54 478,52 490,51 502,51 513,51 525,51 536,51 548,51 560,51 571,51 583,51 594,51 606,51 618,51 629,51 641,51 652,51 664,51 676,51 687,51 699,51 710,51 722,51 734,51 745,51 757,51 768,51 780,51 792,51 803,51 850,50"/>
|
||||||
|
<polyline fill="none" stroke="#d62728" stroke-width="2" points="111,364 117,358 119,352 122,346 122,340 123,327 124,321 124,315 126,309 126,297 127,290 132,284 134,278 135,272 135,266 136,260 137,254 138,247 141,241 142,235 143,229 143,223 143,217 149,210 159,204 166,198 184,192 184,186 186,180 190,174 192,167 200,161 202,155 211,149 218,143 222,137 230,130 233,124 242,118 243,112 244,106 246,100 250,94 250,87 251,81 254,75 268,69 270,63 279,57 311,50"/>
|
||||||
|
<text x="730" y="65" fill="#1f77b4">real</text>
|
||||||
|
<text x="730" y="80" fill="#d62728">generated</text>
|
||||||
|
<line x1="50" y1="50" x2="50" y2="370" stroke="#999" stroke-dasharray="4 3"/>
|
||||||
|
<line x1="850" y1="50" x2="850" y2="370" stroke="#999" stroke-dasharray="4 3"/>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 1.7 KiB |
12
example/results/cdf_P2_SIT02.svg
Normal file
|
After Width: | Height: | Size: 226 KiB |
12
example/results/cdf_P3_LCP01D.svg
Normal file
|
After Width: | Height: | Size: 14 KiB |
12
example/results/cdf_P3_PIT01.svg
Normal file
|
After Width: | Height: | Size: 27 KiB |
12
example/results/cdf_P4_HT_FD.svg
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="900" height="420">
|
||||||
|
<style>text{font-family:Arial,sans-serif;font-size:12px}</style>
|
||||||
|
<text x="50" y="30">CDF 비교: P4_HT_FD</text>
|
||||||
|
<line x1="50" y1="50" x2="50" y2="370" stroke="#333"/>
|
||||||
|
<line x1="50" y1="370" x2="850" y2="370" stroke="#333"/>
|
||||||
|
<polyline fill="none" stroke="#1f77b4" stroke-width="2" points="50,370 60,370 113,370 115,370 132,370 133,370 134,370 135,370 137,370 138,370 139,370 140,370 141,370 142,370 144,370 145,370 146,370 147,370 150,370 151,370 152,370 153,370 154,370 156,370 157,370 158,370 159,370 160,370 163,370 164,370 165,370 166,370 168,370 169,370 170,370 171,370 172,370 173,370 175,370 176,370 177,370 178,370 179,370 181,370 182,370 183,370 184,370 185,370 187,370 188,370 189,370 190,370 191,370 193,370 194,370 195,370 196,370 197,370 198,370 200,370 201,370 202,370 203,370 204,370 206,370 207,370 208,370 209,370 210,370 212,370 213,370 214,370 215,370 216,370 218,370 219,370 220,370 221,370 222,370 224,370 225,370 226,370 227,370 228,370 229,370 231,370 232,370 233,370 234,369 235,369 237,369 238,369 239,369 240,369 242,369 243,369 244,369 245,369 246,369 247,369 249,369 251,369 252,369 253,369 255,369 256,369 257,369 258,369 259,369 260,369 262,369 263,369 264,369 265,369 266,369 268,369 269,369 270,369 271,369 272,369 274,369 275,368 276,368 277,368 278,368 280,368 281,368 282,368 283,368 284,368 286,368 287,368 288,368 289,368 290,368 291,368 293,368 294,368 295,368 296,367 297,367 299,367 300,367 301,367 302,367 303,367 305,367 306,367 307,367 308,367 309,367 311,367 312,366 313,366 314,366 315,366 316,366 318,366 319,366 320,366 321,366 322,366 324,366 325,366 325,366 326,366 327,366 329,365 330,365 331,365 332,365 333,365 334,365 336,365 337,365 338,365 339,365 340,365 342,365 343,365 344,364 345,364 346,364 347,364 349,364 350,364 351,364 352,364 353,364 355,364 356,364 357,364 358,364 360,363 361,363 362,363 363,363 364,363 365,363 367,363 368,363 369,363 370,363 371,362 373,362 374,362 374,362 375,362 376,361 377,361 378,361 380,356 381,354 382,354 383,353 385,346 386,346 386,346 387,346 388,346 388,340 389,339 390,339 390,339 391,339 392,332 392,332 393,331 394,331 395,325 396,322 398,322 399,315 399,315 400,307 401,305 402,305 402,276 403,276 403,276 404,227 404,227 405,207 405,207 405,207 406,207 406,193 406,193 407,139 408,139 408,122 409,122 410,122 411,115 411,115 412,104 413,104 414,104 414,104 415,104 416,90 417,90 417,90 418,90 419,80 420,79 421,79 423,78 424,69 425,69 425,69 426,69 426,69 427,59 428,59 429,59 430,58 431,57 432,56 433,56 435,56 436,55 437,55 438,55 439,55 440,55 442,55 443,55 444,55 445,55 447,55 448,55 449,55 450,55 451,55 452,55 454,55 455,54 456,54 457,54 458,54 460,54 461,54 462,54 463,54 464,54 465,54 467,54 468,54 469,54 470,54 472,54 473,54 474,54 475,54 476,54 478,54 479,54 480,54 481,54 482,54 483,54 485,54 486,54 487,54 488,54 489,54 491,54 492,54 493,54 494,54 495,54 497,54 498,54 499,54 500,54 501,54 503,53 504,53 505,53 506,53 507,53 509,53 510,53 511,53 512,53 513,53 514,53 516,53 517,53 518,53 519,53 520,53 522,53 523,53 524,53 525,53 526,53 528,53 529,53 530,53 531,53 532,53 534,53 535,53 536,53 537,53 538,53 539,53 541,53 542,53 543,53 544,53 545,52 547,52 548,52 549,52 550,52 551,52 553,52 554,52 555,52 556,52 557,52 559,52 561,52 562,52 565,52 566,52 572,52 573,52 574,52 575,52 576,52 578,52 579,52 581,52 582,52 587,52 588,52 590,52 592,52 593,52 594,52 596,52 597,52 601,52 604,52 606,52 610,52 611,52 612,52 616,52 616,52 618,52 619,52 621,52 623,52 624,52 625,52 627,52 629,52 631,52 632,52 635,52 637,52 643,52 644,52 646,52 647,52 648,52 649,52 650,52 652,52 653,52 655,52 656,52 657,52 659,52 662,52 663,52 665,52 666,52 668,52 671,52 672,52 674,52 675,52 677,52 678,52 683,52 684,52 685,52 686,52 688,52 690,52 692,52 696,52 697,52 699,52 700,52 703,52 704,52 705,52 708,52 709,52 711,52 712,52 714,52 715,52 717,52 718,52 719,52 722,52 723,52 725,52 727,52 728,52 729,52 730,52 731,52 733,52 734,52 735,52 737,52 739,51 740,51 741,51 742,51 743,51 745,51 746,51 747,51 748,51 749,51 750,51 752,51 753,51 754,51 756,51 758,51 759,51 760,51 761,51 762,51 765,51 766,51 767,51 768,51 770,51 771,51 772,51 773,51 774,51 775,51 778,51 779,51 780,51 781,51 783,51 784,51 785,51 786,51 788,51 789,51 790,51 791,51 792,51 793,51 795,51 796,51 797,51 798,51 801,51 802,51 803,51 804,51 806,51 808,51 809,51 810,51 811,51 812,51 814,51 815,51 816,51 817,51 819,51 820,51 821,51 822,51 823,51 824,51 826,51 827,51 828,51 829,51 830,51 832,51 833,51 834,51 835,51 836,51 837,51 839,51 840,51 842,51 844,51 846,51 847,51 850,50"/>
|
||||||
|
<polyline fill="none" stroke="#d62728" stroke-width="2" points="409,364 410,358 412,352 413,346 416,340 417,334 418,327 418,321 420,315 420,309 422,303 423,297 425,284 425,278 427,272 427,266 429,260 430,254 431,247 431,241 432,235 433,229 434,223 434,217 434,210 437,198 437,192 439,186 439,180 440,174 440,167 440,161 443,155 443,149 444,143 445,137 449,130 451,124 451,118 452,112 455,100 455,94 458,87 459,81 460,75 462,69 463,63 471,57 472,50"/>
|
||||||
|
<text x="730" y="65" fill="#1f77b4">real</text>
|
||||||
|
<text x="730" y="80" fill="#d62728">generated</text>
|
||||||
|
<line x1="50" y1="50" x2="50" y2="370" stroke="#999" stroke-dasharray="4 3"/>
|
||||||
|
<line x1="850" y1="50" x2="850" y2="370" stroke="#999" stroke-dasharray="4 3"/>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 5.2 KiB |
12
example/results/cdf_P4_ST_PT01.svg
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="900" height="420">
|
||||||
|
<style>text{font-family:Arial,sans-serif;font-size:12px}</style>
|
||||||
|
<text x="50" y="30">CDF 비교: P4_ST_PT01</text>
|
||||||
|
<line x1="50" y1="50" x2="50" y2="370" stroke="#333"/>
|
||||||
|
<line x1="50" y1="370" x2="850" y2="370" stroke="#333"/>
|
||||||
|
<polyline fill="none" stroke="#1f77b4" stroke-width="2" points="50,370 53,370 107,370 111,370 113,370 115,370 130,370 140,370 146,370 148,370 150,370 153,370 155,370 157,370 161,370 163,370 165,370 167,370 171,370 173,370 175,370 178,370 180,370 182,370 186,370 190,370 192,370 196,370 198,370 200,370 203,370 205,369 207,369 211,369 213,366 215,365 217,365 221,365 223,365 225,365 228,364 230,364 232,364 236,364 238,364 240,364 242,364 246,364 248,364 250,363 253,363 255,363 257,363 261,362 263,362 264,362 265,334 266,334 267,330 268,330 270,330 271,327 273,325 275,323 278,321 280,319 282,318 283,318 286,316 287,316 288,314 290,312 292,311 294,311 296,309 298,307 300,305 303,303 304,303 305,300 307,296 308,296 309,296 310,296 311,291 312,291 313,281 314,281 315,230 315,230 316,230 317,88 319,88 321,85 322,85 323,84 325,83 328,82 330,82 332,81 336,81 338,80 340,80 342,80 346,79 348,79 350,79 350,79 353,78 355,78 357,77 361,77 363,76 365,76 367,75 371,74 373,72 375,58 378,57 380,57 382,57 386,57 387,57 388,56 390,56 392,56 396,56 398,56 400,56 403,56 405,56 407,56 411,55 413,55 415,55 417,55 421,55 423,55 425,54 428,52 430,52 432,52 436,52 438,52 440,52 442,51 446,51 448,51 450,51 453,51 455,51 457,51 461,51 463,51 465,51 467,51 471,51 473,51 475,51 478,51 480,51 482,51 486,51 488,51 492,51 503,51 505,51 507,51 515,51 517,51 521,51 523,51 525,51 528,51 530,51 532,51 536,51 538,51 540,51 561,51 571,51 573,51 578,51 580,51 582,51 588,51 598,51 603,51 640,51 642,51 646,51 655,51 657,51 692,51 746,51 798,51 803,51 828,51 838,51 850,50"/>
|
||||||
|
<polyline fill="none" stroke="#d62728" stroke-width="2" points="296,364 299,358 300,352 300,346 313,340 314,334 318,327 320,321 322,315 325,309 326,303 326,297 330,290 334,284 336,278 336,272 336,266 337,260 337,254 338,247 339,241 339,235 342,229 343,223 344,217 345,210 348,204 351,198 352,192 356,186 356,180 360,174 360,167 364,161 365,155 366,149 366,143 366,137 367,130 367,124 368,118 375,112 378,106 381,100 384,94 392,87 393,81 395,75 397,69 400,63 405,57 407,50"/>
|
||||||
|
<text x="730" y="65" fill="#1f77b4">real</text>
|
||||||
|
<text x="730" y="80" fill="#d62728">generated</text>
|
||||||
|
<line x1="50" y1="50" x2="50" y2="370" stroke="#999" stroke-dasharray="4 3"/>
|
||||||
|
<line x1="850" y1="50" x2="850" y2="370" stroke="#999" stroke-dasharray="4 3"/>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 2.5 KiB |
1
example/results/ks_diagnosis.csv
Normal file
@@ -0,0 +1 @@
|
|||||||
|
feature,ks,boundary_frac,mean_shift,std_ratio,diagnosis,gen_frac_at_min,gen_frac_at_max
|
||||||
|
54
example/results/ks_per_feature.csv
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
feature,ks,gen_frac_at_min,gen_frac_at_max,real_n,gen_n,real_min,real_max
|
||||||
|
P2_MSD,1.0,1.0,1.0,92163,52,763.19324,763.19324
|
||||||
|
P3_PIT01,0.9141619071227483,0.0,0.0,92163,52,-24.0,3847.0
|
||||||
|
P2_SIT02,0.8628397930422604,0.0,0.0,92163,52,757.68005,826.50775
|
||||||
|
P2_SIT01,0.8617182433464456,0.0,0.0,92163,52,758.0,827.0
|
||||||
|
P3_LCP01D,0.8261961040597803,0.8269230769230769,0.0,92163,52,-8.0,13816.0
|
||||||
|
P4_HT_FD,0.7983631008272134,0.0,0.0,92163,52,-0.0217,0.02684
|
||||||
|
P1_B3004,0.7794726567227461,0.0,0.0,92163,52,369.75601,447.83438
|
||||||
|
P1_LIT01,0.7761347161675927,0.0,0.0,92163,52,356.09085,459.24484
|
||||||
|
P4_ST_PT01,0.7676921073783155,0.0,0.0,92163,52,9914.0,10330.0
|
||||||
|
P1_PCV02Z,0.7670214728253204,0.0,0.0,92163,52,11.76605,12.04071
|
||||||
|
P1_PIT02,0.7347702941026726,0.0,0.0,92163,52,0.17105,2.34161
|
||||||
|
P4_ST_PO,0.7212397099119537,0.019230769230769232,0.0,92163,52,233.66968,498.60754
|
||||||
|
P1_B2016,0.6999296397102459,0.0,0.0,92163,52,0.9508,2.0523
|
||||||
|
P4_ST_LD,0.6933532896148046,0.0,0.0,92163,52,230.55914,499.62018
|
||||||
|
P4_LD,0.6897361614330463,0.0,0.0,92163,52,231.33685,498.58942
|
||||||
|
P3_LIT01,0.6615471835435378,0.0,0.0,92163,52,5047.0,19680.0
|
||||||
|
P1_PCV01D,0.6231695265662259,0.0,0.4807692307692308,92163,52,24.95222,100.0
|
||||||
|
P1_B2004,0.617741226038482,0.0,0.0,92163,52,0.02978,0.10196
|
||||||
|
P2_CO_rpm,0.6100514640031582,0.0,0.0,92163,52,53993.0,54183.0
|
||||||
|
P4_ST_GOV,0.6084888062037244,0.0,0.0,92163,52,12665.0,26898.0
|
||||||
|
P1_FCV02Z,0.5961538461538461,0.5961538461538461,0.0,92163,52,-1.89057,97.38312
|
||||||
|
P1_B4002,0.5783991406529735,0.0,0.0,92163,52,31.41343,33.6555
|
||||||
|
P1_FT01Z,0.5633543078775981,0.0,0.0,92163,52,0.0,1365.69287
|
||||||
|
P1_PCV01Z,0.547708324465266,0.0,0.5,92163,52,25.57526,100.0
|
||||||
|
P1_B3005,0.5359248538751159,0.0,0.0,92163,52,890.07843,1121.94116
|
||||||
|
P1_B4005,0.5101396438918004,0.019230769230769232,0.0,92163,52,0.0,100.0
|
||||||
|
P1_FT02,0.5049748814600219,0.0,0.0,92163,52,4.99723,2005.23364
|
||||||
|
P3_FIT01,0.497898998346575,0.0,0.0,92163,52,-27.0,5421.0
|
||||||
|
P2_24Vdc,0.4871763572733593,0.0,0.0,92163,52,28.01351,28.04294
|
||||||
|
P4_HT_LD,0.48082202185258727,0.6346153846153846,0.0,92163,52,-0.00723,83.04398
|
||||||
|
P1_B400B,0.4544694642184959,0.0,0.0,92163,52,25.02598,2855.56567
|
||||||
|
P2_VXT03,0.45055916816276176,0.0,0.0,92163,52,-2.135,0.1491
|
||||||
|
P2_VYT03,0.4479521650186668,0.0,0.0,92163,52,4.6083,7.2547
|
||||||
|
P2_VXT02,0.44536394131133883,0.0,0.0,92163,52,-4.3925,-1.8818
|
||||||
|
P4_HT_PO,0.42936573912941867,0.019230769230769232,0.0,92163,52,0.05423,83.04401
|
||||||
|
P3_LCV01D,0.4154990030205681,0.25,0.0,92163,52,-288.0,17776.0
|
||||||
|
P1_FT03,0.41513384730565156,0.0,0.0,92163,52,187.91197,331.15381
|
||||||
|
P1_FT02Z,0.40720829900869615,0.0,0.0,92163,52,25.02598,2856.88574
|
||||||
|
P2_VT01,0.36856126144398005,0.0,0.0,92163,52,11.76163,12.06125
|
||||||
|
P1_TIT02,0.3579625646534276,0.0,0.0,92163,52,34.99451,40.4419
|
||||||
|
P1_FCV03Z,0.35665363791075844,0.0,0.0,92163,52,46.20513,75.3189
|
||||||
|
P1_LCV01Z,0.3512624789357317,0.0,0.0,92163,52,0.29907,28.52783
|
||||||
|
P1_FCV03D,0.30470616858592514,0.0,0.0,92163,52,45.78336,74.1622
|
||||||
|
P4_ST_TT01,0.30430700122441934,0.0,0.21153846153846154,92163,52,27539.0,27629.0
|
||||||
|
P2_HILout,0.3041348563873872,0.0,0.0,92163,52,673.80371,768.76831
|
||||||
|
P4_ST_FD,0.30162947086224323,0.0,0.0,92163,52,-0.05244,0.05035
|
||||||
|
P1_B4022,0.2862201083531769,0.0,0.0,92163,52,34.21529,38.63682
|
||||||
|
P1_TIT01,0.2807849220319517,0.0,0.0,92163,52,34.68933,36.94763
|
||||||
|
P1_LCV01D,0.28024261363019864,0.0,0.0,92163,52,3.17127,28.23791
|
||||||
|
P1_FT03Z,0.24018503170386246,0.0,0.0,92163,52,867.43927,1146.92163
|
||||||
|
P1_PIT01,0.21846515245981413,0.0,0.0,92163,52,0.88211,2.38739
|
||||||
|
P1_FT01,0.21452397466361856,0.0,0.0,92163,52,-9.88007,462.57019
|
||||||
|
P2_VYT02,0.18998029411101902,0.0,0.0,92163,52,2.4459,5.1248
|
||||||
|
17
example/results/ks_summary.json
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
{
|
||||||
|
"generated_rows": 52,
|
||||||
|
"reference_rows_per_file": 50000,
|
||||||
|
"stride": 10,
|
||||||
|
"top_k_features": [
|
||||||
|
"P2_MSD",
|
||||||
|
"P3_PIT01",
|
||||||
|
"P2_SIT02",
|
||||||
|
"P2_SIT01",
|
||||||
|
"P3_LCP01D",
|
||||||
|
"P4_HT_FD",
|
||||||
|
"P1_B3004",
|
||||||
|
"P1_LIT01",
|
||||||
|
"P4_ST_PT01",
|
||||||
|
"P1_PCV02Z"
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -173,6 +173,9 @@ def main():
|
|||||||
std = stats["std"]
|
std = stats["std"]
|
||||||
transforms = stats.get("transform", {})
|
transforms = stats.get("transform", {})
|
||||||
raw_std = stats.get("raw_std", std)
|
raw_std = stats.get("raw_std", std)
|
||||||
|
quantile_probs = stats.get("quantile_probs")
|
||||||
|
quantile_values = stats.get("quantile_values")
|
||||||
|
use_quantile = bool(config.get("use_quantile_transform", False))
|
||||||
|
|
||||||
vocab = load_json(config["vocab_path"])["vocab"]
|
vocab = load_json(config["vocab_path"])["vocab"]
|
||||||
vocab_sizes = [len(vocab[c]) for c in disc_cols]
|
vocab_sizes = [len(vocab[c]) for c in disc_cols]
|
||||||
@@ -253,6 +256,9 @@ def main():
|
|||||||
max_batches=int(config["max_batches"]),
|
max_batches=int(config["max_batches"]),
|
||||||
return_file_id=False,
|
return_file_id=False,
|
||||||
transforms=transforms,
|
transforms=transforms,
|
||||||
|
quantile_probs=quantile_probs,
|
||||||
|
quantile_values=quantile_values,
|
||||||
|
use_quantile=use_quantile,
|
||||||
shuffle_buffer=int(config.get("shuffle_buffer", 0)),
|
shuffle_buffer=int(config.get("shuffle_buffer", 0)),
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
@@ -284,6 +290,9 @@ def main():
|
|||||||
max_batches=int(config["max_batches"]),
|
max_batches=int(config["max_batches"]),
|
||||||
return_file_id=use_condition,
|
return_file_id=use_condition,
|
||||||
transforms=transforms,
|
transforms=transforms,
|
||||||
|
quantile_probs=quantile_probs,
|
||||||
|
quantile_values=quantile_values,
|
||||||
|
use_quantile=use_quantile,
|
||||||
shuffle_buffer=int(config.get("shuffle_buffer", 0)),
|
shuffle_buffer=int(config.get("shuffle_buffer", 0)),
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
|
|||||||
@@ -144,6 +144,7 @@ Defined in `example/data_utils.py` + `example/prepare_data.py`.
|
|||||||
Key steps:
|
Key steps:
|
||||||
- Streaming mean/std/min/max + int-like detection
|
- Streaming mean/std/min/max + int-like detection
|
||||||
- Optional **log1p transform** for heavy-tailed continuous columns
|
- Optional **log1p transform** for heavy-tailed continuous columns
|
||||||
|
- Optional **quantile transform** (TabDDPM-style) for continuous columns
|
||||||
- Discrete vocab + most frequent token
|
- Discrete vocab + most frequent token
|
||||||
- Windowed batching with **shuffle buffer**
|
- Windowed batching with **shuffle buffer**
|
||||||
|
|
||||||
@@ -159,7 +160,8 @@ Export process:
|
|||||||
- Diffusion generates residuals
|
- Diffusion generates residuals
|
||||||
- Output: `trend + residual`
|
- Output: `trend + residual`
|
||||||
- De-normalize continuous values
|
- De-normalize continuous values
|
||||||
- Clamp to observed min/max
|
- Inverse quantile transform (if enabled)
|
||||||
|
- Bound to observed min/max (clamp or sigmoid mapping)
|
||||||
- Restore discrete tokens from vocab
|
- Restore discrete tokens from vocab
|
||||||
- Write to CSV
|
- Write to CSV
|
||||||
|
|
||||||
|
|||||||