# Copyright 2021 NVIDIA Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
NVTabular demo on Rossmann data - PyTorch
Overview
NVTabular is a feature engineering and preprocessing library for tabular data designed to quickly and easily manipulate terabyte scale datasets used to train deep learning based recommender systems. It provides a high level abstraction to simplify code and accelerates computation on the GPU using the RAPIDS cuDF library.
Learning objectives
In the previous notebooks (01-Download-Convert.ipynb and 02-ETL-with-NVTabular.ipynb), we downloaded, preprocessed and created features for the dataset. Now, we are ready to train our deep learning model on the dataset. In this notebook, we use PyTorch with the NVTabular data loader for PyTorch to accelereate the training pipeline.
import os
import math
import json
import nvtabular as nvt
import glob
Loading NVTabular workflow
This time, we only need to define our data directories. We can load the data schema from the NVTabular workflow.
DATA_DIR = os.environ.get("OUTPUT_DATA_DIR", os.path.expanduser("~/nvt-examples/data/"))
PREPROCESS_DIR = os.path.join(DATA_DIR, "ross_pre/")
PREPROCESS_DIR_TRAIN = os.path.join(PREPROCESS_DIR, "train")
PREPROCESS_DIR_VALID = os.path.join(PREPROCESS_DIR, "valid")
What files are available to train on in our directories?
!ls $PREPROCESS_DIR
stats.json train valid
!ls $PREPROCESS_DIR_TRAIN
0.1136d38916184bd39bf3d0cc6af8aecc.parquet _metadata
_file_list.txt _metadata.json
!ls $PREPROCESS_DIR_VALID
0.bcd2404e802640f29b1427feaacbd24a.parquet _metadata
_file_list.txt _metadata.json
We load the data schema and statistic information from stats.json
. We created the file in the previous notebook rossmann-store-sales-feature-engineering
.
stats = json.load(open(PREPROCESS_DIR + "/stats.json", "r"))
CATEGORICAL_COLUMNS = stats["CATEGORICAL_COLUMNS"]
CONTINUOUS_COLUMNS = stats["CONTINUOUS_COLUMNS"]
LABEL_COLUMNS = stats["LABEL_COLUMNS"]
COLUMNS = CATEGORICAL_COLUMNS + CONTINUOUS_COLUMNS + LABEL_COLUMNS
The embedding table shows the cardinality of each categorical variable along with its associated embedding size. Each entry is of the form (cardinality, embedding_size)
.
EMBEDDING_TABLE_SHAPES = stats["EMBEDDING_TABLE_SHAPES"]
EMBEDDING_TABLE_SHAPES
{'Assortment': [4, 16],
'CompetitionMonthsOpen': [26, 16],
'CompetitionOpenSinceYear': [24, 16],
'Day': [32, 16],
'DayOfWeek': [8, 16],
'Events': [22, 16],
'Month': [13, 16],
'Promo2SinceYear': [9, 16],
'Promo2Weeks': [27, 16],
'PromoInterval': [4, 16],
'Promo_bw': [7, 16],
'Promo_fw': [7, 16],
'SchoolHoliday_bw': [9, 16],
'SchoolHoliday_fw': [9, 16],
'State': [13, 16],
'StateHoliday': [3, 16],
'StateHoliday_bw': [4, 16],
'StateHoliday_fw': [4, 16],
'Store': [1116, 81],
'StoreType': [5, 16],
'Week': [53, 16],
'Year': [4, 16]}
Training a Network
Now that our data is preprocessed and saved out, we can leverage dataset
s to read through the preprocessed parquet files in an online fashion to train neural networks.
We’ll start by setting some universal hyperparameters for our model and optimizer. These settings will be the same across all of the frameworks that we explore in the different notebooks.
EMBEDDING_DROPOUT_RATE = 0.04
DROPOUT_RATES = [0.001, 0.01]
HIDDEN_DIMS = [1000, 500]
BATCH_SIZE = 65536
LEARNING_RATE = 0.001
EPOCHS = 25
# TODO: Calculate on the fly rather than recalling from previous analysis.
MAX_SALES_IN_TRAINING_SET = 38722.0
MAX_LOG_SALES_PREDICTION = 1.2 * math.log(MAX_SALES_IN_TRAINING_SET + 1.0)
TRAIN_PATHS = sorted(glob.glob(os.path.join(PREPROCESS_DIR_TRAIN, "*.parquet")))
VALID_PATHS = sorted(glob.glob(os.path.join(PREPROCESS_DIR_VALID, "*.parquet")))
PyTorch
PyTorch: Preparing Datasets
import torch
from nvtabular.loader.torch import TorchAsyncItr, DLDataLoader
from nvtabular.framework_utils.torch.models import Model
from nvtabular.framework_utils.torch.utils import process_epoch
# TensorItrDataset returns a single batch of x_cat, x_cont, y.
train_dataset = TorchAsyncItr(
nvt.Dataset(TRAIN_PATHS),
batch_size=BATCH_SIZE,
cats=CATEGORICAL_COLUMNS,
conts=CONTINUOUS_COLUMNS,
labels=LABEL_COLUMNS,
)
train_loader = DLDataLoader(
train_dataset, batch_size=None, collate_fn=lambda x: x, pin_memory=False, num_workers=0
)
valid_dataset = TorchAsyncItr(
nvt.Dataset(VALID_PATHS),
batch_size=BATCH_SIZE,
cats=CATEGORICAL_COLUMNS,
conts=CONTINUOUS_COLUMNS,
labels=LABEL_COLUMNS,
)
valid_loader = DLDataLoader(
valid_dataset, batch_size=None, collate_fn=lambda x: x, pin_memory=False, num_workers=0
)
PyTorch: Defining a Model
model = Model(
embedding_table_shapes=EMBEDDING_TABLE_SHAPES,
num_continuous=len(CONTINUOUS_COLUMNS),
emb_dropout=EMBEDDING_DROPOUT_RATE,
layer_hidden_dims=HIDDEN_DIMS,
layer_dropout_rates=DROPOUT_RATES,
max_output=MAX_LOG_SALES_PREDICTION,
).to("cuda")
PyTorch: Training
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
def rmspe_func(y_pred, y):
"Return y_pred and y to non-log space and compute RMSPE"
y_pred, y = torch.exp(y_pred) - 1, torch.exp(y) - 1
pct_var = (y_pred - y) / y
return (pct_var ** 2).mean().pow(0.5)
%%time
from time import time
for epoch in range(EPOCHS):
start = time()
train_loss, y_pred, y = process_epoch(train_loader, model, train=True, optimizer=optimizer)
train_rmspe = rmspe_func(y_pred, y)
valid_loss, y_pred, y = process_epoch(valid_loader, model, train=False)
valid_rmspe = rmspe_func(y_pred, y)
print(
f"Epoch {epoch:02d}. Train loss: {train_loss:.4f}. Train RMSPE: {train_rmspe:.4f}. Valid loss: {valid_loss:.4f}. Valid RMSPE: {valid_rmspe:.4f}."
)
t_final = time() - start
total_rows = train_dataset.num_rows_processed + valid_dataset.num_rows_processed
print(
f"run_time: {t_final} - rows: {total_rows} - epochs: {EPOCHS} - dl_thru: {total_rows / t_final}"
)
Total batches: 12
Total batches: 0
Epoch 00. Train loss: 8.2353. Train RMSPE: inf. Valid loss: 4.6918. Valid RMSPE: 0.8720.
run_time: 2.8103976249694824 - rows: 844338 - epochs: 25 - dl_thru: 300433.6441570856
Total batches: 12
Total batches: 0
Epoch 01. Train loss: 3.9518. Train RMSPE: 0.8210. Valid loss: 3.5244. Valid RMSPE: 0.8308.
run_time: 1.7620582580566406 - rows: 844338 - epochs: 25 - dl_thru: 479177.1192237499
Total batches: 12
Total batches: 0
Epoch 02. Train loss: 2.5495. Train RMSPE: 0.7655. Valid loss: 2.0343. Valid RMSPE: 0.7342.
run_time: 1.744666576385498 - rows: 844338 - epochs: 25 - dl_thru: 483953.78889486834
Total batches: 12
Total batches: 0
Epoch 03. Train loss: 1.2480. Train RMSPE: 0.6372. Valid loss: 0.7330. Valid RMSPE: 0.5311.
run_time: 1.74592924118042 - rows: 844338 - epochs: 25 - dl_thru: 483603.79108442244
Total batches: 12
Total batches: 0
Epoch 04. Train loss: 0.4000. Train RMSPE: 0.4666. Valid loss: 0.2494. Valid RMSPE: 0.3945.
run_time: 1.74570631980896 - rows: 844338 - epochs: 25 - dl_thru: 483665.5458132268
Total batches: 12
Total batches: 0
Epoch 05. Train loss: 0.1837. Train RMSPE: inf. Valid loss: 0.1510. Valid RMSPE: 0.4358.
run_time: 1.7406132221221924 - rows: 844338 - epochs: 25 - dl_thru: 485080.76881696057
Total batches: 12
Total batches: 0
Epoch 06. Train loss: 0.1635. Train RMSPE: inf. Valid loss: 0.1332. Valid RMSPE: 0.3840.
run_time: 1.7466964721679688 - rows: 844338 - epochs: 25 - dl_thru: 483391.3696247538
Total batches: 12
Total batches: 0
Epoch 07. Train loss: 0.1364. Train RMSPE: inf. Valid loss: 0.1120. Valid RMSPE: 0.3286.
run_time: 1.7637887001037598 - rows: 844338 - epochs: 25 - dl_thru: 478707.0015531506
Total batches: 12
Total batches: 0
Epoch 08. Train loss: 0.1204. Train RMSPE: 0.4360. Valid loss: 0.0997. Valid RMSPE: 0.3041.
run_time: 1.7595922946929932 - rows: 844338 - epochs: 25 - dl_thru: 479848.65729780705
Total batches: 12
Total batches: 0
Epoch 09. Train loss: 0.1117. Train RMSPE: 0.3887. Valid loss: 0.0952. Valid RMSPE: 0.2937.
run_time: 1.7458832263946533 - rows: 844338 - epochs: 25 - dl_thru: 483616.5370255634
Total batches: 12
Total batches: 0
Epoch 10. Train loss: 0.1035. Train RMSPE: inf. Valid loss: 0.0812. Valid RMSPE: 0.2911.
run_time: 1.758488655090332 - rows: 844338 - epochs: 25 - dl_thru: 480149.8136231235
Total batches: 12
Total batches: 0
Epoch 11. Train loss: 0.0978. Train RMSPE: 0.3773. Valid loss: 0.0739. Valid RMSPE: 0.2733.
run_time: 1.7456979751586914 - rows: 844338 - epochs: 25 - dl_thru: 483667.8577938123
Total batches: 12
Total batches: 0
Epoch 12. Train loss: 0.0925. Train RMSPE: 0.3671. Valid loss: 0.0693. Valid RMSPE: 0.2753.
run_time: 1.7409119606018066 - rows: 844338 - epochs: 25 - dl_thru: 484997.5295178771
Total batches: 12
Total batches: 0
Epoch 13. Train loss: 0.0902. Train RMSPE: 0.3725. Valid loss: 0.0722. Valid RMSPE: 0.2833.
run_time: 1.7457911968231201 - rows: 844338 - epochs: 25 - dl_thru: 483642.0309235564
Total batches: 12
Total batches: 0
Epoch 14. Train loss: 0.0869. Train RMSPE: 0.3538. Valid loss: 0.0663. Valid RMSPE: 0.2507.
run_time: 1.7432737350463867 - rows: 844338 - epochs: 25 - dl_thru: 484340.4584292283
Total batches: 12
Total batches: 0
Epoch 15. Train loss: 0.0923. Train RMSPE: inf. Valid loss: 0.0786. Valid RMSPE: 0.2623.
run_time: 1.748652458190918 - rows: 844338 - epochs: 25 - dl_thru: 482850.66368963703
Total batches: 12
Total batches: 0
Epoch 16. Train loss: 0.0858. Train RMSPE: inf. Valid loss: 0.0583. Valid RMSPE: 0.2369.
run_time: 1.7407851219177246 - rows: 844338 - epochs: 25 - dl_thru: 485032.86785323656
Total batches: 12
Total batches: 0
Epoch 17. Train loss: 0.0765. Train RMSPE: inf. Valid loss: 0.0522. Valid RMSPE: 0.2330.
run_time: 1.7414436340332031 - rows: 844338 - epochs: 25 - dl_thru: 484849.45679493726
Total batches: 12
Total batches: 0
Epoch 18. Train loss: 0.0725. Train RMSPE: 0.3219. Valid loss: 0.0564. Valid RMSPE: 0.2332.
run_time: 1.742950439453125 - rows: 844338 - epochs: 25 - dl_thru: 484430.2975504701