Clean artifacts and update example pipeline
This commit is contained in:
@@ -66,6 +66,8 @@ def build_vocab(
|
||||
vocab = {}
|
||||
for c in disc_cols:
|
||||
tokens = sorted(values[c])
|
||||
if "<UNK>" not in tokens:
|
||||
tokens.append("<UNK>")
|
||||
vocab[c] = {tok: idx for idx, tok in enumerate(tokens)}
|
||||
return vocab
|
||||
|
||||
@@ -105,7 +107,7 @@ def windowed_batches(
|
||||
batches_yielded = 0
|
||||
for row in iter_rows(path):
|
||||
cont_row = [float(row[c]) for c in cont_cols]
|
||||
disc_row = [vocab[c][row[c]] for c in disc_cols]
|
||||
disc_row = [vocab[c].get(row[c], vocab[c]["<UNK>"]) for c in disc_cols]
|
||||
seq_cont.append(cont_row)
|
||||
seq_disc.append(disc_row)
|
||||
if len(seq_cont) == seq_len:
|
||||
|
||||
Reference in New Issue
Block a user