Logistic regression

In a previous notebook, I have shown how to fit a psychometric curve using pyTorch. Here, I would like to review some nice properies of the logistic regression model (WORK IN PROGRESS).

Let's first initialize the notebook:

In [1]:
import time
import os
import datetime
import numpy as np
import matplotlib.pyplot as plt
In [2]:
N = 256 # number of factors
n_classes = 10 # number of classes
N_batch = 4
seed = 1973 # release year of https://en.wikipedia.org/wiki/Ring_Ring_(ABBA_song)

np.random.seed(seed)
W = np.random.randn(N+1, n_classes) # FIXED design matrix

def psychometric_function(W, factors):
    print(W.shape, factors.shape)
    logit = (factors @ W[:-1, :]) + W[-1, :]
    return 1 / (1 + np.exp(-logit))

def get_data(W, seed, N_batch):
    N, n_classes = W.shape[0]-1, W.shape[1]
    np.random.seed(seed)
    factors = np.random.randn(N_batch, N)
    p = psychometric_function(W, factors)
    y = p > np.random.rand(N_batch, n_classes)  # generate data
    
    return factors, p, y
In [3]:
factors = np.random.randn(N_batch, N)
factors.shape
Out[3]:
(4, 256)
In [4]:
W.shape
Out[4]:
(257, 10)
In [5]:
(factors@W[:-1, :]).shape
Out[5]:
(4, 10)
In [6]:
((factors@W[:-1, :]) + W[-1, :]).shape
Out[6]:
(4, 10)
In [7]:
psychometric_function(W, factors)
(257, 10) (4, 256)
Out[7]:
array([[9.99953315e-01, 3.45805742e-04, 9.99999999e-01, 4.75663615e-10,
        1.00000000e+00, 9.82830237e-01, 2.55754635e-09, 4.08280280e-01,
        4.28969143e-08, 9.99999994e-01],
       [1.10737661e-07, 8.51910182e-01, 2.73556933e-01, 9.99999999e-01,
        5.29446551e-02, 1.00000000e+00, 6.10242975e-03, 9.99336308e-01,
        1.00000000e+00, 9.29815468e-01],
       [9.99999496e-01, 9.99936173e-01, 3.46135826e-07, 1.00000000e+00,
        2.80007212e-04, 1.00000000e+00, 1.09061508e-07, 9.49790432e-04,
        7.03708243e-07, 1.09295139e-04],
       [1.00000000e+00, 9.99999846e-01, 1.00000000e+00, 9.99999968e-01,
        1.33090647e-04, 1.00000000e+00, 5.67447527e-02, 9.81359607e-01,
        9.99998653e-01, 9.99882309e-01]])
In [8]:
factors, p, y = get_data(W, seed, N_batch)
factors.shape, p.shape, y.shape
(257, 10) (4, 256)
Out[8]:
((4, 256), (4, 10), (4, 10))
In [9]:
plt.hist(p.ravel(), bins=10)
Out[9]:
(array([17.,  1.,  1.,  0.,  0.,  2.,  0.,  0.,  0., 19.]),
 array([1.10640081e-12, 1.00000000e-01, 2.00000000e-01, 3.00000000e-01,
        4.00000000e-01, 5.00000000e-01, 6.00000000e-01, 7.00000000e-01,
        8.00000000e-01, 9.00000000e-01, 1.00000000e+00]),
 <BarContainer object of 10 artists>)
No description has been provided for this image

inversion

In [10]:
import torch
from torch.utils.data import TensorDataset, DataLoader
torch.set_default_tensor_type("torch.DoubleTensor") # -> torch.tensor([1.2, 3]).dtype = torch.float64
# see https://sebastianraschka.com/faq/docs/pytorch-crossentropy.html#pytorch-loss-input-confusion-cheatsheet
criterion = torch.nn.BCELoss(reduction="mean") # loss divided by output size
#criterion = torch.nn.NLLLoss(reduction="mean") # loss divided by output size

class LogisticRegressionModel(torch.nn.Module):
    #torch.nn.Module -> Base class for all neural network modules
    def __init__(self, N, n_classes, bias=True):
        super(LogisticRegressionModel, self).__init__() 
        self.linear = torch.nn.Linear(N, n_classes, bias=bias)
        # self.nl = torch.nn.LogSoftmax(n_classes)
        self.nl = torch.nn.Sigmoid()

    def forward(self, factors):
        return self.nl(self.linear(factors))
In [11]:
learning_rate = 0.005
beta1, beta2 = 0.9, 0.999
betas = (beta1, beta2)
num_epochs = 2 ** 9 + 1
batch_size = 256
n_classes=10
amsgrad = False # gives similar results
amsgrad = True  # gives similar results
In [12]:
logistic_model = LogisticRegressionModel(N, n_classes)
In [13]:
factors = torch.randn(N_batch, N)
outputs = logistic_model(factors)
In [14]:
def fit_data(factors, y, 
            learning_rate=learning_rate,
            batch_size=batch_size,  # gamma=gamma,
            num_epochs=num_epochs,
            betas=betas,
            verbose=False, **kwargs
        ):

    X, labels = torch.Tensor(factors[:, None]), torch.Tensor(y[:, None])
    loader = DataLoader(
        TensorDataset(X, labels), batch_size=batch_size, shuffle=True
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    N_batch = factors.shape[0]
    N = factors.shape[1]
    n_classes = y.shape[1]
    logistic_model = LogisticRegressionModel(N, n_classes)
    logistic_model = logistic_model.to(device)
    logistic_model.train()
    optimizer = torch.optim.Adam(
        logistic_model.parameters(), lr=learning_rate, betas=betas, amsgrad=amsgrad
    )
    for epoch in range(int(num_epochs)):
        logistic_model.train()
        losses = []
        for X_, labels_ in loader:
            X_, labels_ = X_.to(device), labels_.to(device)
            outputs = logistic_model(X_)
            loss = criterion(outputs, labels_)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            losses.append(loss.item())

        if verbose and (epoch % (num_epochs // 32) == 0):
            print(f"Iteration: {epoch} - Loss: {np.mean(losses):.5f}")

    logistic_model.eval()
    X, labels = torch.Tensor(factors[:, None]), torch.Tensor(y[:, None])
    outputs = logistic_model(X)
    loss = criterion(outputs, labels).item()
    return logistic_model, loss
In [15]:
factors, p, y = get_data(W, seed=seed, N_batch=10000)
logistic_model, loss = fit_data(factors, y, verbose=True)
print("Final loss =", loss)
(257, 10) (10000, 256)
Iteration: 0 - Loss: 0.51191
Iteration: 16 - Loss: 0.11679
Iteration: 32 - Loss: 0.09595
Iteration: 48 - Loss: 0.08673
Iteration: 64 - Loss: 0.08175
Iteration: 80 - Loss: 0.07856
Iteration: 96 - Loss: 0.07641
Iteration: 112 - Loss: 0.07492
Iteration: 128 - Loss: 0.07407
Iteration: 144 - Loss: 0.07360
Iteration: 160 - Loss: 0.07150
Iteration: 176 - Loss: 0.07222
Iteration: 192 - Loss: 0.07206
Iteration: 208 - Loss: 0.07195
Iteration: 224 - Loss: 0.07036
Iteration: 240 - Loss: 0.07067
Iteration: 256 - Loss: 0.07029
Iteration: 272 - Loss: 0.07079
Iteration: 288 - Loss: 0.06933
Iteration: 304 - Loss: 0.07033
Iteration: 320 - Loss: 0.06982
Iteration: 336 - Loss: 0.06929
Iteration: 352 - Loss: 0.06922
Iteration: 368 - Loss: 0.06944
Iteration: 384 - Loss: 0.06908
Iteration: 400 - Loss: 0.07040
Iteration: 416 - Loss: 0.06889
Iteration: 432 - Loss: 0.06974
Iteration: 448 - Loss: 0.06888
Iteration: 464 - Loss: 0.06813
Iteration: 480 - Loss: 0.06891
Iteration: 496 - Loss: 0.06770
Iteration: 512 - Loss: 0.06920
Final loss = 0.06601844977709632
In [16]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(13, 5))
ax.plot(logistic_model.linear.bias.detach().numpy(), 'r')
ax.plot(W[-1, :], 'r--')
Out[16]:
[<matplotlib.lines.Line2D at 0x11ed6e730>]
No description has been provided for this image
In [17]:
logistic_model.linear.weight.shape, W.shape
Out[17]:
(torch.Size([10, 256]), (257, 10))
In [18]:
i_digit = 2
fig, ax = plt.subplots(figsize=(13, 5))
ax.plot(W[:, i_digit], 'g--', lw=3)
ax.plot(logistic_model.linear.weight[i_digit, :].detach().numpy(), 'r')
Out[18]:
[<matplotlib.lines.Line2D at 0x11edfd490>]
No description has been provided for this image