Clean artifacts and update example pipeline

This commit is contained in:
2026-01-22 16:32:51 +08:00
parent c0639386be
commit c3f750cd9d
20 changed files with 651 additions and 30826 deletions

View File

@@ -66,6 +66,8 @@ def build_vocab(
vocab = {}
for c in disc_cols:
tokens = sorted(values[c])
if "<UNK>" not in tokens:
tokens.append("<UNK>")
vocab[c] = {tok: idx for idx, tok in enumerate(tokens)}
return vocab
@@ -105,7 +107,7 @@ def windowed_batches(
batches_yielded = 0
for row in iter_rows(path):
cont_row = [float(row[c]) for c in cont_cols]
disc_row = [vocab[c][row[c]] for c in disc_cols]
disc_row = [vocab[c].get(row[c], vocab[c]["<UNK>"]) for c in disc_cols]
seq_cont.append(cont_row)
seq_disc.append(disc_row)
if len(seq_cont) == seq_len: