Benchmarking CNNs
Hi! I am Jean-Nicolas Jérémie and the goal of this benchmark is to offer a comparison between differents pre-trained image recognition's networks based on the Imagenet dataset wich allows to work on naturals images for $1000$ labels. These different networks tested here are taken from the torchvision.models
library : AlexNet
, VGG16
, MobileNetV2
and ResNet101
.
Our use case is to measure the performance of a system which receives a sequence of images and has to make a decision as soon as possible, hence with batch_size=1
. Specifically, we wish also to compare different computing architectures such as CPUs, desktop GPUs or other more exotic platform such as the Jetson TX2 (experiment 1). Additionally, we will implement some image transformations as up/down-sampling (experiment 2) or transforming to grayscale (experiment 3) to quantify their influence on the accuracy and computation time of each network.
In this notebook, I will use the Pytorch library for running the networks and the pandas library to collect and display the results. This notebook was done during a master 1 internship at the Neurosciences Institute of Timone (INT) under the supervision of Laurent PERRINET. It is curated in the following github repo.
Initialization of the benchmark¶
Our coding strategy is to build up a small libray as a package of scripts in the DCNN_benchmark
folder and to run all calls to that library from this notebook. This organization will be useful to run on specific hardware such as the Jetson card on the one hand and to visualize results in the notebook, on the other hand.
%matplotlib inline
%mkdir -p DCNN_benchmark
Importing libraries; definition of the dataset¶
%%writefile DCNN_benchmark/init.py
# Importing libraries
import os
import time
from time import strftime,gmtime
import json
import time
import numpy as np
import imageio
from numpy import random
from torchvision.datasets import ImageFolder
# to plot
import matplotlib.pyplot as plt
# to store results
import pandas as pd
# figure's variables
fig_width = 20
phi = (np.sqrt(5)+1)/2 # golden ratio
phi = phi**2
colors = ['b', 'r', 'k','g']
# host & date's variables
# HOST = os.uname()[1]
HOST = 'jnjer-HP-Pavilion-Notebook'
HOST = 'fortytwo'
#datetag = strftime("%Y-%m-%d", gmtime())
datetag = '2020-08-27'
#dataset configuration
image_size = 256 # default image resolution
image_sizes = 2**np.arange(6, 10) # resolutions explored in experiment 2
N_images_per_class = 10
#i_labels = random.randint(1000, size=(N_labels)) # Random choice
i_labels = [409, 530, 892, 487, 920, 704, 879, 963, 646, 620 ] # Pre-selected classes
N_labels = len(i_labels)
id_dl = ''
root = 'data'
folder = 'imagenet_classes_100'
path = os.path.join(root, folder) # data path
with open('ImageNet-Datasets-Downloader/imagenet_classes.txt') as f:
labels = [line.strip() for line in f.readlines()]
labels[0].split(', ')
labels = [label.split(', ')[1].lower().replace('_', ' ') for label in labels]
class_loader = 'ImageNet-Datasets-Downloader/imagenet_class_info.json'
with open(class_loader, 'r') as fp: # get all the classes on the data_downloader
name = json.load(fp)
# a reverse look-up-table giving the index of a given label (within the whole set of imagenet labels)
reverse_labels = {}
for i_label, label in enumerate(labels):
reverse_labels[label] = i_label
# a reverse look-up-table giving the index of a given i_label (within the sub-set of classes)
reverse_i_labels = {}
for i_label, label in enumerate(i_labels):
reverse_i_labels[label] = i_label
def pprint(message):
print('-'*len(message))
print(message)
print('-'*len(message))
pprint('List of Pre-selected classes')
# choosing the selected classes for recognition
for i_label in i_labels:
print('label', i_label, '=', labels[i_label])
for key in name:
if name[key]['class_name'] == labels[i_label]:
id_dl += key + ' '
pprint('label IDs = ' + str(id_dl) )
%run DCNN_benchmark/init.py
if HOST == 'fortytwo':
do_local = False
python_exec = "KMP_DUPLICATE_LIB_OK=TRUE python3"
else :
do_local =True
Download of example images from ImageNet :¶
We use an ImageNet dataloader to populate a dataset based on the pre-selected or randoms classes listed in the DCNN_benchmark/init.py
file.
scriptname = 'DCNN_benchmark/dataset.py'
%%writefile {scriptname}
from DCNN_benchmark.init import *
# check if the folder exist
if os.path.isdir(path):
list_dir = os.listdir(path)
print("The folder " , folder, " already exists, it includes: ", list_dir)
# no folder, creating one
else :
print(f"No existing path match for this folder, creating a folder at {path}")
os.makedirs(path)
# if the folder is empty, download the images using the ImageNet-Datasets-Downloader
if len(list_dir) < N_labels :
print('This folder do not have anough classes, downloading some more')
cmd =f"python3 ImageNet-Datasets-Downloader/downloader.py -data_root {root} -data_folder {folder} -images_per_class {N_images_per_class} -use_class_list True -class_list {id_dl} -multiprocessing_workers 0"
print('Command to run : ', cmd)
os.system(cmd) # running it
list_dir = os.listdir(path)
elif len(os.listdir(path)) == N_labels :
print(f'The folder already contains : {len(list_dir)} classes')
else : # if there are to many folders delete some
print('The folder have to many classes, deleting some')
for elem in os.listdir(path):
contenu = os.listdir(f'{path}/{elem}')
if len(os.listdir(path)) > N_labels :
for x in contenu:
os.remove(f'{path}/{elem}/{x}') # delete exces folders
try:
os.rmdir(f'{path}/{elem}')
except:
os.remove(f'{path}/{elem}')
list_dir = os.listdir(path)
print("Now the folder " , folder, f" contains :", os.listdir(path))
if do_local:
%run {scriptname}
else:
!python3 {scriptname}
Pre-trained network's import¶
Here we worked on four differents pre-trained networks Alexnet
, Mobilenet
, Resnet101
and VGG16
:
scriptname = 'DCNN_benchmark/models.py'
%%writefile {scriptname}
from DCNN_benchmark.init import *
import torch
import torchvision
import torchvision.transforms as transforms
# transform function for input's image processing
transform = transforms.Compose([
transforms.Resize(int(image_size)), # Resize the image to image_size x image_size pixels size.
transforms.CenterCrop(int(image_size-20)), # Crop the image to (image_size-20) x (image_size-20) pixels around the center.
transforms.ToTensor(), # Convert the image to PyTorch Tensor data type.
transforms.Normalize( # Normalize the image by adjusting
mean=[0.485, 0.456, 0.406], # its average and
std=[0.229, 0.224, 0.225] # its standard deviation at the specified values.
)])
image_dataset = ImageFolder(path, transform=transform) # save the dataset
# imports networks with weights
models = {} # get model's names
models['alex'] = torchvision.models.alexnet(pretrained=True)
models['vgg'] = torchvision.models.vgg16(pretrained=True)
models['mob'] = torchvision.models.mobilenet_v2(pretrained=True)
models['res'] = torchvision.models.resnext101_32x8d(pretrained=True)
# Select a device (CPU or CUDA)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
for name in models.keys():
models[name].to(device)
%run {scriptname}
Experiment 1: Image processing and recognition for differents labels :¶
To recover the classification confidence of the models according to the classes on which they have been trained, i.e. the $1000$ classes of the ImageNet
library, the softmax
mathematical function is added at the last layer of the networks. The softmax
function is a function which takes a vector of real values (here represented by a 1-D tensor) of dimension K
(here K=1000
trained classes) and returns for each of these values a normalized propability between $0$ and $1$ with a sum equal to $1$. Thus, all the classes are represented in the final vector and a low probability would then be a proof of absence for instance. A careful reading of the original imagenet paper shows that this probability reflects the response of users to questions such as "Is there a Burmese cat in the images?" when presented an image (retrieved on internet) which is likely to include "Burmese cat".
Here, we are interested in a sub-set of such classes. Nevertheless, the recognition being carried out on so-called "natural" images of the irrelevant classes could "mask" the recognition of those of interest. To reduce this effect, we have applied a slight modification to the output softmax
function, by assuming that we know a priori that the image belongs to one (and only one) category from the sub-set, but that we do not know which one. As a consequence, it does not recover a vector of $K = 1000$ but of $K = N_{labels}$. As a consequence, the probabilities obtained would correspond to a confidence of classification discriminating only the classes of interest and can be compared to a chance level of $1 / N_{labels}$.
For further statistical analyses, we extract these differents factors (like the accuracy and the processing time for differents datasets at differents resolution) in a pandas
object.
scriptname = 'experiment_basic.py'
%%writefile {scriptname}
#import model's script and set the output file
from DCNN_benchmark.models import *
filename = f'results/{datetag}_results_1_{HOST}.json'
try:
df = pd.read_json(filename)
except:
df = pd.DataFrame([], columns=['model', 'perf', 'fps', 'time', 'label', 'i_label', 'i_image', 'filename', 'device'])
i_trial = 0
# image preprocessing
for i_image, (data, label) in enumerate(image_dataset):
for name in models.keys():
model = models[name]
model.eval()
tic = time.time()
out = model(data.unsqueeze(0).to(device)).squeeze(0)
percentage = torch.nn.functional.softmax(out[i_labels], dim=0) * 100
_, indices = torch.sort(percentage, descending=True)
dt = time.time() - tic
i_label_top = reverse_labels[image_dataset.classes[label]]
perf_ = percentage[reverse_i_labels[i_label_top]].item()
df.loc[i_trial] = {'model':name, 'perf':perf_, 'time':dt, 'fps': 1/dt,
'label':labels[i_label_top], 'i_label':i_label_top,
'i_image':i_image, 'filename':image_dataset.imgs[i_image][0], 'device':str(device)}
print(f'The {name} model get {labels[i_label_top]} at {perf_:.2f} % confidence in {dt:.3f} seconds')
i_trial += 1
df.to_json(filename)
if do_local:
%run {scriptname}
else:
!{python_exec} {scriptname}
Image recognition on differents labels display :¶
Here we collect our results, we can already display all the data in a table
filename = f'results/{datetag}_results_1_{HOST}.json'
df = pd.read_json(filename)
df
A display of the differents computation time of each models on the same dataset for the sequence of trials :
fig, axs = plt.subplots(figsize=(30, fig_width/phi))
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
for color, name in zip(colors, models.keys()):
axs.set_ylabel('Computation time (s)', size= 18)
axs.set_xlabel('Trial', size= 18)
#axs.set_ylim(0, 1)
df[df['model']==name]['time'].plot(label=name, color=color, marker='s', lw=0)
axs.legend(loc=0, fontsize = 20)
axs.set_title('Processed on : ' + HOST + '_' + str(df['device'][0]), size = 20)
This graph shows the frequency of the classification performance for our four models.
fig, axs = plt.subplots(len(models), 1, figsize=(30, fig_width/phi))
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
for ax, color, name in zip(axs, colors, models.keys()):
ax.set_ylabel('Frequency', fontsize=14)
df[df['model']==name]['perf'].plot.hist(bins=np.linspace(0, 100, 100), lw=1, label=name,ax=ax, color=color, density=True)
ax.legend(loc='upper left', fontsize = 20)
ax.set_xlim(0, 100)
ax.set_ylim(0, 1)
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
axs[-1].set_xlabel('Classification performance (%)', size= 18)
axs[0].set_title('Processed on : ' + HOST + '_' + str(df['device'][0]), size = 20);
Here we display the 64 worsts classification performance, all model combined :
N_image_i = 8
N_image_j = 8
fig, axs = plt.subplots(N_image_i, N_image_j, figsize=(21, 21))
for i, idx in enumerate(df["perf"].argsort()[:(N_image_i*N_image_j)]):
ax = axs[i%N_image_i][i//N_image_i]
ax.imshow(imageio.imread(image_dataset.imgs[df.loc[idx]['i_image']][0]))
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel(df.loc[idx]['label'] + ' | ' + df.loc[idx]['model'], color='g')
perf_ = df.loc[idx]['perf']
ax.set_ylabel(f'{perf_:2.1f}', color='g')
To make it even clearer we extracted a specific median for each models :
Accuracy's median
for name in models.keys():
med_perf = np.median(df[df['model']==name]["perf"])
print(f'For the {name} model, the median clasification performance = {med_perf:.2f} %' )
Computation time 's median
for name in models.keys():
med_perf = np.median(df[df['model']==name]["time"])
print(f'For the {name} model, the median computation time = {med_perf:.3f} s')
Frame per second's median
for name in models.keys():
med_perf = np.median(df[df['model']==name]["fps"])
print(f'For the {name} model, the median fps = {med_perf:.3f} Hz' )
To summarize, the model which displays the best accuracy is the Resnet_101
network. However, the cost for such a high accuracy is reflected in the computation time as the Resnet_101
also presents the higher computation time to process an image. Note that the Mobilenet
network shows a good accuracy (>95%), while keeping the computation time reasonable. This results into a higher frame rate (images processed per second) that allows near to real-time recognition on a standard camera such as a webcam.
Experiment 2: Image processing and recognition for differents resolutions :¶
Let's now study that same performance indicators at different image resolutions.
scriptname = 'experiment_downsample.py'
%%writefile {scriptname}
#import model's script and set the output file
from DCNN_benchmark.models import *
filename = f'results/{datetag}_results_2_{HOST}.json'
# Output's set up
try:
df_downsample = pd.read_json(filename)
except:
df_downsample = pd.DataFrame([], columns=['model', 'perf', 'fps', 'time', 'label', 'i_label', 'i_image', 'image_size', 'filename', 'device'])
i_trial = 0
# image preprocessing
for image_size in image_sizes:
image_size = int(image_size)
transform = transforms.Compose([ # Downsampling function on the input
transforms.Resize(image_size), # Resize the image to image_size x image_size pixels size.
transforms.CenterCrop(image_size), # Crop the image to image_size x image_size pixels around the center.
transforms.ToTensor(), # Convert the image to PyTorch Tensor data type.
transforms.Normalize( # Normalize the image by adjusting its average and
# its standard deviation at the specified values.
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)])
image_dataset_downsample = ImageFolder(path, transform=transform) # Get the downsample dataset
print(f'Résolution de {image_size}')
# Displays the input image of the model
for i_image, (data, label) in enumerate(image_dataset_downsample):
for name in models.keys():
model = models[name]
model.eval()
tic = time.time()
out = model(data.unsqueeze(0).to(device)).squeeze(0)
percentage = torch.nn.functional.softmax(out[i_labels], dim=0) * 100
_, indices = torch.sort(percentage, descending=True)
dt = time.time() - tic
i_label_top = reverse_labels[image_dataset_downsample.classes[label]]
perf_ = percentage[reverse_i_labels[i_label_top]].item()
df_downsample.loc[i_trial] = {'model':name, 'perf':perf_, 'time':dt, 'fps': 1/dt,
'label':labels[i_label_top], 'i_label':i_label_top,
'i_image':i_image, 'filename':image_dataset.imgs[i_image][0], 'image_size': image_size, 'device':str(device)}
print(f'The {name} model get {labels[i_label_top]} at {perf_:.2f} % confidence in {dt:.3f} seconds')
i_trial += 1
df_downsample.to_json(filename)
if do_local:
%run {scriptname}
else:
!{python_exec} {scriptname}
Image recognition on differents resolutions display :¶
Here, again, we collect our results, and display all the data in a table
filename = f'results/{datetag}_results_2_{HOST}.json'
df_downsample = pd.read_json(filename)
df_downsample
Let's display of the accuracy of each models on the same dataset for differents resolutions. Here accuracies are displayed as a violin plot to allow a better representation of the models.
import seaborn as sns
fig, axs = plt.subplots(figsize=(30, fig_width/phi))
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
for color, name in zip(colors, models.keys()):
axs = sns.violinplot(x="image_size", y="perf", data=df_downsample, inner="quartile", hue='model', cut = 0)
axs.set_title('Processed on : ' + HOST + '_' + str(df_downsample['device'][0]), size=20)
axs.set_ylabel('Classification performance (%)', size=18)
axs.set_xlabel('Image size', size=18)
h, l = axs.get_legend_handles_labels()
axs.legend(h[:4], l[:4], loc ='center', fontsize=16);
The 64 worsts classification performance, all models and sizes combined :
N_image_i = 8
N_image_j = 8
fig, axs = plt.subplots(N_image_i, N_image_j, figsize=(21, 21))
for i, idx in enumerate(df_downsample["perf"].argsort()[:(N_image_i*N_image_j)]):
ax = axs[i%N_image_i][i//N_image_i]
ax.imshow(imageio.imread(image_dataset.imgs[df_downsample.loc[idx]['i_image']][0]))
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel(df_downsample.loc[idx]['label'] + ' | ' + df_downsample.loc[idx]['model']+ ' | ' + str(df_downsample.loc[idx]['image_size']), color='g')
perf_ = df_downsample.loc[idx]['perf']
ax.set_ylabel(f'{perf_:2.1f}', color='g')
A display of the differents computation time of each models on the same dataset for differents resolutions :
import seaborn as sns
fig, axs = plt.subplots(figsize=(30, fig_width/phi))
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
for color, name in zip(colors, models.keys()):
axs = sns.violinplot(x="image_size", y="time", data=df_downsample, inner="quartile", hue='model')
axs.set_title('Processed on : ' + HOST + '_' + str(df_downsample['device'][0]), size = 20)
axs.set_ylabel('Computation time (s)', size= 18)
axs.set_xlabel('Trial', size= 18)
axs.set_yscale('log')
h, l = axs.get_legend_handles_labels()
axs.legend(h[:4], l[:4], loc='upper center', fontsize=16);
Again, we extracted a specific median for each models :
for name in models.keys():
pprint(f'Benchmarking model {name}')
for image_size in image_sizes:
med_perf = np.median(df_downsample[(df_downsample['model']==name) & (df_downsample['image_size']==image_size)]["perf"])
print(f'For size {image_size}, the median clasification performance = {med_perf:.2f} %' )
The classification performance does not depend on the host (a priori :-) ) but the timing does (see almso the synthesis below):
for name in models.keys():
pprint(f'Benchmarking model {name}')
for image_size in image_sizes:
med_perf = np.median(df_downsample[(df_downsample['model']==name) & (df_downsample['image_size']==image_size)]["time"])
print(f'For size {image_size}, the median computation time = {med_perf:.2f} s' )
for name in models.keys():
pprint(f'Benchmarking model {name}')
for image_size in image_sizes:
med_perf = np.median(df_downsample[(df_downsample['model']==name) & (df_downsample['image_size']==image_size)]["fps"])
print(f'For size {image_size}, the median fps = {med_perf:.3f} Hz' )
The classification performance reduces when the resolution is too low or, surprisingly, higher as the regular size of an input's image (which is trained usually with 128 x 128
pixels). Also, the computation time seems proportional to the resolution, a higher resolution need a higher delay to compute the image on a CPU. A size of 128 x 128
pixels clearly stands out as an optimal compromise for these models.
Experiment 3: Image processing and recognition on grayscale images :¶
scriptname = 'experiment_grayscale.py'
%%writefile {scriptname}
#import model's script and set the output file
from DCNN_benchmark.models import *
filename = f'results/{datetag}_results_3_{HOST}.json'
# Output's set up
try:
df_gray = pd.read_json(filename)
except:
df_gray = pd.DataFrame([], columns=['model', 'perf', 'fps', 'time', 'label', 'i_label', 'i_image', 'filename', 'device'])
i_trial = 0
# image preprocessing
transform = transforms.Compose([
transforms.Grayscale(3), # convert the image in grayscale
transforms.Resize(int(image_size)), # Resize the image.
transforms.CenterCrop(int(image_size-20)), # Crop the image with a 20 pixels border.
transforms.ToTensor(), # Convert the image to PyTorch Tensor data type.
transforms.Normalize( # Normalize the image by adjusting its average and
# its standard deviation at the specified values.
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)])
image_dataset_grayscale = ImageFolder(path, transform=transform) # Get the downsample dataset
# Displays the input image of the model
for i_image, (data, label) in enumerate(image_dataset_grayscale):
for name in models.keys():
model = models[name]
model.eval()
tic = time.time()
out = model(data.unsqueeze(0).to(device)).squeeze(0)
percentage = torch.nn.functional.softmax(out[i_labels], dim=0) * 100
_, indices = torch.sort(percentage, descending=True)
dt = time.time() - tic
i_label_top = reverse_labels[image_dataset_grayscale.classes[label]]
perf_ = percentage[reverse_i_labels[i_label_top]].item()
df_gray.loc[i_trial] = {'model':name, 'perf':perf_, 'time':dt, 'fps': 1/dt,
'label':labels[i_label_top], 'i_label':i_label_top,
'i_image':i_image, 'filename':image_dataset.imgs[i_image][0], 'device':str(device)}
print(f'The {name} model get {labels[i_label_top]} at {perf_:.2f} % confidence in {dt:.3f} seconds')
i_trial += 1
df_gray.to_json(filename)
if do_local:
%run {scriptname}
else:
!{python_exec} {scriptname}
Image recognition on differents labels with grayscale display :¶
Collecting all the results, displaying all the data in a table
filename = f'results/{datetag}_results_3_{HOST}.json'
df_gray = pd.read_json(filename)
df_gray
A display of the differents computation time of each models on the same dataset for a single resolution :
fig, axs = plt.subplots(figsize=(30, fig_width/phi))
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
for color, name in zip(colors, models.keys()):
axs.set_ylabel('Computation time (s)', size= 18)
axs.set_xlabel('Trial', size= 18)
df_gray[df_gray['model']==name]['time'].plot(label=name, color=color, marker='s', lw=0)
axs.legend(loc=0, fontsize = 20)
axs.set_title('Processed on : ' + HOST + '_' + str(df_gray['device'][0]), size = 20)
A display of the accuracy of each models on the same dataset for a single resolution :
fig, axs = plt.subplots(len(models), 1, figsize=(30, fig_width/phi))
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
for ax, color, name in zip(axs, colors, models.keys()):
ax.set_ylabel('Frequency', fontsize=14)
df_gray[df_gray['model']==name]['perf'].plot.hist(bins=np.linspace(0, 100, 100), lw=0, alpha=0.6, label=name + '_gray', ax=ax, color='k', density=True)
df[df['model']==name]['perf'].plot.hist(bins=np.linspace(0, 100, 100), lw=0, alpha=0.3, label=name + '_color', ax=ax, color=color, density=True)
ax.legend(loc='upper left', fontsize = 20)
ax.set_xlim(0, 100)
ax.set_ylim(0, 1)
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
axs[-1].set_xlabel('Classification performance (%)', size= 18)
axs[0].set_title('Processed on : ' + HOST + '_' + str(df['device'][0]), size = 20);
The 64 worsts classification performance, all model combined :
N_image_i = 6
N_image_j = 6
fig, axs = plt.subplots(N_image_i, N_image_j, figsize=(21, 21))
for i, idx in enumerate(df_gray["perf"].argsort()[:(N_image_i*N_image_j)]):
ax = axs[i%N_image_i][i//N_image_i]
ax.imshow(imageio.imread(image_dataset.imgs[df_gray.loc[idx]['i_image']][0]))
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel(df_gray.loc[idx]['label'] + ' | ' + df_gray.loc[idx]['model'], color='g')
perf_ = df_gray.loc[idx]['perf']
ax.set_ylabel(f'{perf_:2.1f}', color='g')
Let's analyze the accuracy of each models on the same dataset for color versus grayscale images. Here accuracies are displayed as a violin plot to allow a better representation of the models.
import seaborn as sns
fig, axs = plt.subplots(figsize=(30, fig_width/phi))
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
for color, df_, label in zip(['gray', 'red'], [df_gray, df], ['black', 'color']):
axs = sns.violinplot(x="model", y="perf", data=df_, inner="quartile", cut=0, color=color, alpha=.5)
axs.set_title('Processed on : ' + HOST + '_' + str(df_['device'][0]), size=20)
axs.set_ylabel('Classification performance (%)', size=18)
axs.legend(['Grayscale', 'Regular'], fontsize=18)
axs.set_xlabel('Model', size=18)
In summary, we have:
for name in models.keys():
med_perf_orig = np.median(df[df['model']==name]["perf"])
med_perf = np.median(df_gray[df_gray['model']==name]["perf"])
print(f'For the {name} model, the median clasification performance = {med_perf:.2f} % (color = {med_perf_orig:.2f} % )' )
for name in models.keys():
med_perf_orig = np.median(df[df['model']==name]["time"])
med_perf = np.median(df_gray[df_gray['model']==name]["time"])
print(f'For the {name} model, the median computation time = {med_perf:.3f} s (color = {med_perf_orig:.3f} s )' )
for name in models.keys():
med_perf_orig = np.median(df[df['model']==name]["fps"])
med_perf = np.median(df_gray[df_gray['model']==name]["fps"])
print(f'For the {name} model, the median fps = {med_perf:.3f} Hz (color = {med_perf_orig:.3f} Hz )' )
The grayscale transformation on the input seems to degrade the recognition accuracy for all the models as they perform on the same dataset. There is only a modest gain in processing speed.
Final synthesis¶
We have run the benchmark on various platforms, with or without GPU. Let's summarize the main message.
HOSTS = {'fortytwo': 'iMac pro 36 cores ',
'ai-int-desktop': 'NVIDIA Jetson TX2 ',
'jnjer-HP-Pavilion-Notebook' : 'Intel core i5 7th gen'
# 'inv-ope-de06': 'Dell station with GTX Tegra',
}
for HOST in HOSTS:
print('HOST:', HOST, ', device:', HOSTS[HOST])
We verify that the classification performance is similar on different machines (as these algorithms are deterministic):
for i in [1,3]:
print('> For experiment ', i)
for name in models.keys():
print('>>> For model ', name)
for HOST in HOSTS:
filename = f'results/{datetag}_results_{i}_{HOST}.json'
#print(filename)
df = pd.read_json(filename)
med_perf = np.median(df[df['model']==name]["perf"])
print(f'On host {HOSTS[HOST]}, for the {name} model, the median clasification performance = {med_perf:.2f} %' )
But that the computation time varies greatly depending on the platform:
for i in [1,3]:
print('> For experiment ', i)
for name in models.keys():
print('>>> For model ', name)
for HOST in HOSTS:
filename = f'results/{datetag}_results_{i}_{HOST}.json'
df = pd.read_json(filename)
med_perf = np.median(df[df['model']==name]["time"])
print(f'On host {HOSTS[HOST]}, for the {name} model, the median computation time = {med_perf:.3f} s' )
From experiment 2, we check on the different hosts the same trend of classification performance for different image size (as a note, the run on the Jetson filed for resnet at an image size of 64) :
for name in models.keys():
print('> For model ', name)
for size in image_sizes :
print(f'>> Image size : {size}')
for HOST in HOSTS:
filename = f'results/{datetag}_results_2_{HOST}.json'
df = pd.read_json(filename)
med_perf = np.median(df[df['model']==name][df['image_size']==size]["perf"])
print(f'On host {HOSTS[HOST]}, for the {name} model, the median clasification performance = {med_perf:.2f} %' )
Note that performance may varry due to the difference between the datasets automatically extracted on the various machines.