Logistic regression

Laurent Perrinet

2021-12-12 10:32

In a previous notebook, I have shown how to fit a psychometric curve using pyTorch. Here, I would like to review some nice properies of the logistic regression model (WORK IN PROGRESS).

Let's first initialize the notebook:

In [1]:

import time
import os
import datetime
import numpy as np
import matplotlib.pyplot as plt

In [2]:

N = 256 # number of factors
n_classes = 10 # number of classes
N_batch = 4
seed = 1973 # release year of https://en.wikipedia.org/wiki/Ring_Ring_(ABBA_song)

np.random.seed(seed)
W = np.random.randn(N+1, n_classes) # FIXED design matrix

def psychometric_function(W, factors):
    print(W.shape, factors.shape)
    logit = (factors @ W[:-1, :]) + W[-1, :]
    return 1 / (1 + np.exp(-logit))

def get_data(W, seed, N_batch):
    N, n_classes = W.shape[0]-1, W.shape[1]
    np.random.seed(seed)
    factors = np.random.randn(N_batch, N)
    p = psychometric_function(W, factors)
    y = p > np.random.rand(N_batch, n_classes)  # generate data
    
    return factors, p, y

In [3]:

factors = np.random.randn(N_batch, N)
factors.shape

Out[3]:

(4, 256)

In [4]:

W.shape

Out[4]:

(257, 10)

In [5]:

(factors@W[:-1, :]).shape

Out[5]:

(4, 10)

In [6]:

((factors@W[:-1, :]) + W[-1, :]).shape

Out[6]:

(4, 10)

In [7]:

psychometric_function(W, factors)

(257, 10) (4, 256)

Out[7]:

array([[9.99953315e-01, 3.45805742e-04, 9.99999999e-01, 4.75663615e-10,
        1.00000000e+00, 9.82830237e-01, 2.55754635e-09, 4.08280280e-01,
        4.28969143e-08, 9.99999994e-01],
       [1.10737661e-07, 8.51910182e-01, 2.73556933e-01, 9.99999999e-01,
        5.29446551e-02, 1.00000000e+00, 6.10242975e-03, 9.99336308e-01,
        1.00000000e+00, 9.29815468e-01],
       [9.99999496e-01, 9.99936173e-01, 3.46135826e-07, 1.00000000e+00,
        2.80007212e-04, 1.00000000e+00, 1.09061508e-07, 9.49790432e-04,
        7.03708243e-07, 1.09295139e-04],
       [1.00000000e+00, 9.99999846e-01, 1.00000000e+00, 9.99999968e-01,
        1.33090647e-04, 1.00000000e+00, 5.67447527e-02, 9.81359607e-01,
        9.99998653e-01, 9.99882309e-01]])

In [8]:

factors, p, y = get_data(W, seed, N_batch)
factors.shape, p.shape, y.shape

(257, 10) (4, 256)

Out[8]:

((4, 256), (4, 10), (4, 10))

In [9]:

plt.hist(p.ravel(), bins=10)

Out[9]:

(array([17.,  1.,  1.,  0.,  0.,  2.,  0.,  0.,  0., 19.]),
 array([1.10640081e-12, 1.00000000e-01, 2.00000000e-01, 3.00000000e-01,
        4.00000000e-01, 5.00000000e-01, 6.00000000e-01, 7.00000000e-01,
        8.00000000e-01, 9.00000000e-01, 1.00000000e+00]),
 <BarContainer object of 10 artists>)

No description has been provided for this image

inversion¶

In [10]:

import torch
from torch.utils.data import TensorDataset, DataLoader
torch.set_default_tensor_type("torch.DoubleTensor") # -> torch.tensor([1.2, 3]).dtype = torch.float64
# see https://sebastianraschka.com/faq/docs/pytorch-crossentropy.html#pytorch-loss-input-confusion-cheatsheet
criterion = torch.nn.BCELoss(reduction="mean") # loss divided by output size
#criterion = torch.nn.NLLLoss(reduction="mean") # loss divided by output size

class LogisticRegressionModel(torch.nn.Module):
    #torch.nn.Module -> Base class for all neural network modules
    def __init__(self, N, n_classes, bias=True):
        super(LogisticRegressionModel, self).__init__() 
        self.linear = torch.nn.Linear(N, n_classes, bias=bias)
        # self.nl = torch.nn.LogSoftmax(n_classes)
        self.nl = torch.nn.Sigmoid()

    def forward(self, factors):
        return self.nl(self.linear(factors))

In [11]:

learning_rate = 0.005
beta1, beta2 = 0.9, 0.999
betas = (beta1, beta2)
num_epochs = 2 ** 9 + 1
batch_size = 256
n_classes=10
amsgrad = False # gives similar results
amsgrad = True  # gives similar results

In [12]:

logistic_model = LogisticRegressionModel(N, n_classes)

In [13]:

factors = torch.randn(N_batch, N)
outputs = logistic_model(factors)

In [14]:

def fit_data(factors, y, 
            learning_rate=learning_rate,
            batch_size=batch_size,  # gamma=gamma,
            num_epochs=num_epochs,
            betas=betas,
            verbose=False, **kwargs
        ):

    X, labels = torch.Tensor(factors[:, None]), torch.Tensor(y[:, None])
    loader = DataLoader(
        TensorDataset(X, labels), batch_size=batch_size, shuffle=True
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    N_batch = factors.shape[0]
    N = factors.shape[1]
    n_classes = y.shape[1]
    logistic_model = LogisticRegressionModel(N, n_classes)
    logistic_model = logistic_model.to(device)
    logistic_model.train()
    optimizer = torch.optim.Adam(
        logistic_model.parameters(), lr=learning_rate, betas=betas, amsgrad=amsgrad
    )
    for epoch in range(int(num_epochs)):
        logistic_model.train()
        losses = []
        for X_, labels_ in loader:
            X_, labels_ = X_.to(device), labels_.to(device)
            outputs = logistic_model(X_)
            loss = criterion(outputs, labels_)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            losses.append(loss.item())

        if verbose and (epoch % (num_epochs // 32) == 0):
            print(f"Iteration: {epoch} - Loss: {np.mean(losses):.5f}")

    logistic_model.eval()
    X, labels = torch.Tensor(factors[:, None]), torch.Tensor(y[:, None])
    outputs = logistic_model(X)
    loss = criterion(outputs, labels).item()
    return logistic_model, loss

In [15]:

factors, p, y = get_data(W, seed=seed, N_batch=10000)
logistic_model, loss = fit_data(factors, y, verbose=True)
print("Final loss =", loss)

(257, 10) (10000, 256)
Iteration: 0 - Loss: 0.51191
Iteration: 16 - Loss: 0.11679
Iteration: 32 - Loss: 0.09595
Iteration: 48 - Loss: 0.08673
Iteration: 64 - Loss: 0.08175
Iteration: 80 - Loss: 0.07856
Iteration: 96 - Loss: 0.07641
Iteration: 112 - Loss: 0.07492
Iteration: 128 - Loss: 0.07407
Iteration: 144 - Loss: 0.07360
Iteration: 160 - Loss: 0.07150
Iteration: 176 - Loss: 0.07222
Iteration: 192 - Loss: 0.07206
Iteration: 208 - Loss: 0.07195
Iteration: 224 - Loss: 0.07036
Iteration: 240 - Loss: 0.07067
Iteration: 256 - Loss: 0.07029
Iteration: 272 - Loss: 0.07079
Iteration: 288 - Loss: 0.06933
Iteration: 304 - Loss: 0.07033
Iteration: 320 - Loss: 0.06982
Iteration: 336 - Loss: 0.06929
Iteration: 352 - Loss: 0.06922
Iteration: 368 - Loss: 0.06944
Iteration: 384 - Loss: 0.06908
Iteration: 400 - Loss: 0.07040
Iteration: 416 - Loss: 0.06889
Iteration: 432 - Loss: 0.06974
Iteration: 448 - Loss: 0.06888
Iteration: 464 - Loss: 0.06813
Iteration: 480 - Loss: 0.06891
Iteration: 496 - Loss: 0.06770
Iteration: 512 - Loss: 0.06920
Final loss = 0.06601844977709632

In [16]:

import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(13, 5))
ax.plot(logistic_model.linear.bias.detach().numpy(), 'r')
ax.plot(W[-1, :], 'r--')

Out[16]:

[<matplotlib.lines.Line2D at 0x11ed6e730>]

In [17]:

logistic_model.linear.weight.shape, W.shape

Out[17]:

(torch.Size([10, 256]), (257, 10))

In [18]:

i_digit = 2
fig, ax = plt.subplots(figsize=(13, 5))
ax.plot(W[:, i_digit], 'g--', lw=3)
ax.plot(logistic_model.linear.weight[i_digit, :].detach().numpy(), 'r')

Out[18]:

[<matplotlib.lines.Line2D at 0x11edfd490>]