import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torchvision.datasets as dsets import torchvision.transforms as transforms
from blitz.modules import BayesianLinear, BayesianConv2d from blitz.losses import kl_divergence_from_nn from blitz.utils import variational_estimator
defforward(self, x): out = F.relu(self.conv1(x)) out = F.max_pool2d(out, 2) out = F.relu(self.conv2(out)) out = F.max_pool2d(out, 2) out = out.view(out.size(0), -1) out = F.relu(self.fc1(out)) out = F.relu(self.fc2(out)) out = self.fc3(out) return out
defforward(self, x): out = F.relu(self.conv1(x)) out = F.max_pool2d(out, 2) out = F.relu(self.conv2(out)) out = F.max_pool2d(out, 2) out = out.view(out.size(0), -1) out = F.relu(self.fc1(out)) out = F.relu(self.fc2(out)) out = self.fc3(out) return out
if stop: break for i, (datapoints, labels) inenumerate(train_loader): optimizer.zero_grad() loss = classifier.sample_elbo( inputs=datapoints.to(device), labels=labels.to(device), criterion=criterion, sample_nbr=3, complexity_cost_weight=1 / 50000, ) # print(loss) loss.backward() optimizer.step()
iteration += 1 if iteration % 250 == 0: print(loss) correct = 0 total = 0 with torch.no_grad(): for data in test_loader: images, labels = data outputs = classifier(images.to(device)) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.to(device)).sum().item()
accuracy = 100 * correct / total print( "Iteration: {} | Accuracy of the network on the 10000 test images: {} %".format( str(iteration), str(100 * correct / total) ) )
""" Samples the ELBO Loss for a batch of data, consisting of inputs and corresponding-by-index labels The ELBO Loss consists of the sum of the KL Divergence of the model (explained above, interpreted as a "complexity part" of the loss) with the actual criterion - (loss function) of optimization of our model (the performance part of the loss). As we are using variational inference, it takes several (quantified by the parameter sample_nbr) Monte-Carlo samples of the weights in order to gather a better approximation for the loss. Parameters: inputs: torch.tensor -> the input data to the model labels: torch.tensor -> label data for the performance-part of the loss calculation The shape of the labels must match the label-parameter shape of the criterion (one hot encoded or as index, if needed) criterion: torch.nn.Module, custom criterion (loss) function, torch.nn.functional function -> criterion to gather the performance cost for the model sample_nbr: int -> The number of times of the weight-sampling and predictions done in our Monte-Carlo approach to gather the loss to be .backwarded in the optimization of the model. """
loss = 0 for _ inrange(sample_nbr): outputs = self(inputs) loss += criterion(outputs, labels) loss += self.nn_kl_divergence() * complexity_cost_weight return loss / sample_nbr
tensor(4.0596, device='cuda:0', grad_fn=<DivBackward0>) Iteration: 250 | Accuracy of the network on the 10000 test images: 94.26 % ...... The best accuracy is 98.75 for bayesian nn.
pred = classifier(datapoints) loss = criterion(pred, labels) # print(loss) loss.backward() optimizer.step()
iteration += 1 if iteration % 250 == 0: print(loss) correct = 0 total = 0 with torch.no_grad(): for data in test_loader: images, labels = data outputs = classifier(images.to(device)) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.to(device)).sum().item()
accuracy = 100 * correct / total print( "Iteration: {} | Accuracy of the network on the 10000 test images: {} %".format( str(iteration), str(100 * correct / total) ) )
if waiting >= 4: print(f"The best accuracy is {best_accuracy} for lenet nn.") stop = True break
1 2 3 4
tensor(0.2020, device='cuda:0', grad_fn=<NllLossBackward0>) Iteration: 250 | Accuracy of the network on the 10000 test images: 91.49 % ...... The best accuracy is 99.01 for lenet nn.
import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import numpy as np
from blitz.modules import BayesianLinear from blitz.utils import variational_estimator
from sklearn.datasets import fetch_california_housing from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split
X, y = fetch_california_housing(return_X_y=True) X = StandardScaler().fit_transform(X) y = StandardScaler().fit_transform(np.expand_dims(y, -1))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1, random_state=42)
for epoch inrange(10): for i, (datapoints, labels) inenumerate(dataloader_train): optimizer.zero_grad() loss = regressor.sample_elbo(inputs=datapoints, labels=labels, criterion=criterion, sample_nbr=3, complexity_cost_weight=0) # loss = criterion(regressor(datapoints), labels)