Skip to content
Snippets Groups Projects
Commit 9a8f8e62 authored by Devin Routh's avatar Devin Routh
Browse files

Deleted unneeded files

parent 96ce2eeb
Branches lightning
No related tags found
No related merge requests found
#!/usr/bin/env python
# coding: utf-8
# Import all libraries
import tensorflow as tf
import numpy as np
from sklearn.model_selection import StratifiedKFold
import pandas as pd
import itertools
from joblib import Parallel, delayed
import time
import matplotlib.pyplot as plt
from pynvml import *
import cpuinfo
# Define variables that should be augmented for benchmarking purposes
# Specify the number of cores to use
number_of_cores = int(sys.argv[1])
# Input k for K-Fold CV
k = int(sys.argv[2])
# Input the number of images to keep from the full dataset
number_of_images = int(sys.argv[3])
# Use a string for file organization purposes
output_string = sys.argv[4]
# Obtain the number of available GPUs and adjust the CPU variable accordingly
number_of_gpus = len(tf.config.list_physical_devices('GPU'))
number_of_cores = 1 if number_of_gpus>=1 else number_of_cores
# Obtain the data of interest
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
# Standardize the images
train_images = train_images / 255.0
x = train_images[0:number_of_images,]
y = train_labels[0:number_of_images,]
# Create a Pandas Dataframe with the data
df = pd.DataFrame({'arrays':x.tolist(), 'classes':y})
# Specify the k-fold CV details
skf = StratifiedKFold(n_splits=k)
for fold,(train,validate) in enumerate(skf.split(X=df, y=df.classes)):
# print(fold, (train,validate))
df.loc[validate, 'kfold'] = fold
df['kfold'] = df['kfold'].astype(int)
folds = df.kfold.unique()
test_train_validate_permutations = list(itertools.permutations(folds, 2))
# Make a function that takes a tuple of test and validate fold designations and returns an accuracy
# on the test fold using a list of models
def compute_accuracy_on_test_validation_fold(tv_tuple,fold_list,model_list):
# Compute the training data
training_folds = tuple(set(fold_list) ^ set(tv_tuple))
training_df = df[df['kfold'].isin(training_folds)]
arrays_for_training = np.stack(training_df['arrays'].to_numpy())
labels_for_training = training_df['classes'].to_numpy()
# Compute the testing fold data
test_df = df[df['kfold'].isin([tv_tuple[0]])]
arrays_for_testing = np.stack(test_df['arrays'].to_numpy())
labels_for_testing = test_df['classes'].to_numpy()
# Compute the validation fold data
validation_df = df[df['kfold'].isin([tv_tuple[1]])]
arrays_for_validation = np.stack(validation_df['arrays'].to_numpy())
labels_for_validation = validation_df['classes'].to_numpy()
# Train the models with the training data and compute the test accuracies
results_list = list()
for model in model_list:
# Compile and fit the model, then record the accuracies
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.fit(arrays_for_training, labels_for_training, epochs=10, verbose=0)
test_loss, test_accuracy = model.evaluate(arrays_for_testing, labels_for_testing, verbose=0)
validation_loss, validation_accuracy = model.evaluate(arrays_for_validation, labels_for_validation, verbose=0)
results_list.append([model.name,test_accuracy,validation_accuracy])
return sorted(results_list, key = lambda x: x[1], reverse=True)
# Apply the function to compute accuracies across all fold permutations
# Note: this is a single core implementation of the code
single_results = list()
time_start_single_core = time.time()
for t in test_train_validate_permutations:
# Set the seed on every iteration
tf.random.set_seed(42)
# Relu activated model
model_relu = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(10)
],name="relu")
# Linear activated model
model_linear = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='linear'),
tf.keras.layers.Dense(10)
],name="linear")
model_list = [model_relu,model_linear]
# Apply the models to the data, using the test-train-validate folds
single_results.append(compute_accuracy_on_test_validation_fold(t,folds,model_list))
# Clear the loop for the next model training
tf.keras.backend.clear_session()
tf.compat.v1.reset_default_graph()
del model_list
time_end_single_core = time.time()
# Elapsed single core time
chunk_single_core = time_end_single_core-time_start_single_core
# Define the operations within a function that can be applied with a multiprocessing function
def computeAccuracyScores(testValidateTuple):
# Set the seed on every iteration
tf.random.set_seed(42)
# Make the models of interest
# Relu activated model
model_relu = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(10)
],name="relu")
# Linear activated model
model_linear = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='linear'),
tf.keras.layers.Dense(10)
],name="linear")
model_list = [model_relu,model_linear]
# Compute the accuracy values to return
accuracyValues = compute_accuracy_on_test_validation_fold(testValidateTuple,folds,model_list)
# Clear the session for the next model training
tf.keras.backend.clear_session()
tf.compat.v1.reset_default_graph()
del model_list
return accuracyValues
# Create an if/else statement that uses a multiple GPU implementation of the code
# if there are multiple GPU's available; otherwise, use a multiprocessing implementation
# of the code so that multiple CPU cores can be harnessed
if number_of_gpus > 1:
# Apply the function to compute accuracies across all fold permutations
# Note: this is a multi-GPU implementation of the code
multi_results = list()
time_start_multi_gpu = time.time()
strategy = tf.distribute.MirroredStrategy()
for t in test_train_validate_permutations:
# Set the seed on every iteration
tf.random.set_seed(42)
# strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
# Relu activated model
model_relu = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(10)
],name="relu")
# Linear activated model
model_linear = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='linear'),
tf.keras.layers.Dense(10)
],name="linear")
model_list = [model_relu,model_linear]
# Apply the models to the data, using the test-train-validate folds
multi_results.append(compute_accuracy_on_test_validation_fold(t,folds,model_list))
# Clear the loop for the next model training
tf.keras.backend.clear_session()
tf.compat.v1.reset_default_graph()
del model_list
time_end_multi_gpu = time.time()
# Elapsed single core time
chunk_multi_gpu = time_end_multi_gpu-time_start_multi_gpu
# Save chunk time for final recording
multitime = chunk_multi_gpu
elif number_of_gpus == 1:
# If there's just 1 GPU, copy the single core time as the "multitime" for ease during analysis
# of the results
multitime = chunk_single_core
else :
# Apply the function to compute accuracies across all fold permutations
# Note: this is a multicore implementation of the code
time_start_multicore = time.time()
multi_results = Parallel(n_jobs=number_of_cores)(delayed(computeAccuracyScores)(t) for t in test_train_validate_permutations)
time_end_multicore = time.time()
# Elapsed multicore time
chunk_multicore = time_end_multicore-time_start_multicore
# Save chunk time for final recording
multitime = chunk_multicore
# Record the GPU type (if one is present)
if number_of_gpus == 0:
gpu_type = "None"
else:
nvmlInit()
gpu_type = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(0)).decode("utf-8")
# Record the processor platform
cpu_type = cpuinfo.get_cpu_info().get("brand_raw")
cpu_speed = cpuinfo.get_cpu_info().get("hz_advertised_friendly")
df = pd.DataFrame(columns=["NumCores","NumGPUs","GPUType","CPUType","CPUSpeed","k","NumImages","SingleCoreTime","MultiTime","Metadata"],
data=[[number_of_cores,number_of_gpus,gpu_type,cpu_type,cpu_speed,k,number_of_images,chunk_single_core,multitime,output_string]])
file_name = output_string+"_"+str(number_of_cores)+"cores_"+str(number_of_gpus)+"gpus_"+str(k)+"k_"+str(number_of_images)+"images_"+time.strftime("%Y%m%d%H%M%S")
df.to_csv("Time_Results/"+file_name+".csv",index=False)
pd.DataFrame(single_results).to_csv("Accuracy_Results/"+file_name+"_single_accuracy_outputs"+".csv",index=False)
try:
pd.DataFrame(multi_results).to_csv("Accuracy_Results/"+file_name+"_multi_accuracy_outputs"+".csv",index=False)
except:
print("No multi-implementation")
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment