Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Scientific Workflows
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Model registry
Analyze
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Devin Routh
Scientific Workflows
Commits
9a8f8e62
Commit
9a8f8e62
authored
5 months ago
by
Devin Routh
Browse files
Options
Downloads
Patches
Plain Diff
Deleted unneeded files
parent
96ce2eeb
Branches
lightning
No related tags found
No related merge requests found
Changes
2
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
TensorFlow_Tutorial_CommandLine.py
+0
-236
0 additions, 236 deletions
TensorFlow_Tutorial_CommandLine.py
TensorFlow_Tutorial_Jupyter.ipynb
+0
-640
0 additions, 640 deletions
TensorFlow_Tutorial_Jupyter.ipynb
with
0 additions
and
876 deletions
TensorFlow_Tutorial_CommandLine.py
deleted
100644 → 0
+
0
−
236
View file @
96ce2eeb
#!/usr/bin/env python
# coding: utf-8
# Import all libraries
import
tensorflow
as
tf
import
numpy
as
np
from
sklearn.model_selection
import
StratifiedKFold
import
pandas
as
pd
import
itertools
from
joblib
import
Parallel
,
delayed
import
time
import
matplotlib.pyplot
as
plt
from
pynvml
import
*
import
cpuinfo
# Define variables that should be augmented for benchmarking purposes
# Specify the number of cores to use
number_of_cores
=
int
(
sys
.
argv
[
1
])
# Input k for K-Fold CV
k
=
int
(
sys
.
argv
[
2
])
# Input the number of images to keep from the full dataset
number_of_images
=
int
(
sys
.
argv
[
3
])
# Use a string for file organization purposes
output_string
=
sys
.
argv
[
4
]
# Obtain the number of available GPUs and adjust the CPU variable accordingly
number_of_gpus
=
len
(
tf
.
config
.
list_physical_devices
(
'
GPU
'
))
number_of_cores
=
1
if
number_of_gpus
>=
1
else
number_of_cores
# Obtain the data of interest
fashion_mnist
=
tf
.
keras
.
datasets
.
fashion_mnist
(
train_images
,
train_labels
),
(
test_images
,
test_labels
)
=
fashion_mnist
.
load_data
()
class_names
=
[
'
T-shirt/top
'
,
'
Trouser
'
,
'
Pullover
'
,
'
Dress
'
,
'
Coat
'
,
'
Sandal
'
,
'
Shirt
'
,
'
Sneaker
'
,
'
Bag
'
,
'
Ankle boot
'
]
# Standardize the images
train_images
=
train_images
/
255.0
x
=
train_images
[
0
:
number_of_images
,]
y
=
train_labels
[
0
:
number_of_images
,]
# Create a Pandas Dataframe with the data
df
=
pd
.
DataFrame
({
'
arrays
'
:
x
.
tolist
(),
'
classes
'
:
y
})
# Specify the k-fold CV details
skf
=
StratifiedKFold
(
n_splits
=
k
)
for
fold
,(
train
,
validate
)
in
enumerate
(
skf
.
split
(
X
=
df
,
y
=
df
.
classes
)):
# print(fold, (train,validate))
df
.
loc
[
validate
,
'
kfold
'
]
=
fold
df
[
'
kfold
'
]
=
df
[
'
kfold
'
].
astype
(
int
)
folds
=
df
.
kfold
.
unique
()
test_train_validate_permutations
=
list
(
itertools
.
permutations
(
folds
,
2
))
# Make a function that takes a tuple of test and validate fold designations and returns an accuracy
# on the test fold using a list of models
def
compute_accuracy_on_test_validation_fold
(
tv_tuple
,
fold_list
,
model_list
):
# Compute the training data
training_folds
=
tuple
(
set
(
fold_list
)
^
set
(
tv_tuple
))
training_df
=
df
[
df
[
'
kfold
'
].
isin
(
training_folds
)]
arrays_for_training
=
np
.
stack
(
training_df
[
'
arrays
'
].
to_numpy
())
labels_for_training
=
training_df
[
'
classes
'
].
to_numpy
()
# Compute the testing fold data
test_df
=
df
[
df
[
'
kfold
'
].
isin
([
tv_tuple
[
0
]])]
arrays_for_testing
=
np
.
stack
(
test_df
[
'
arrays
'
].
to_numpy
())
labels_for_testing
=
test_df
[
'
classes
'
].
to_numpy
()
# Compute the validation fold data
validation_df
=
df
[
df
[
'
kfold
'
].
isin
([
tv_tuple
[
1
]])]
arrays_for_validation
=
np
.
stack
(
validation_df
[
'
arrays
'
].
to_numpy
())
labels_for_validation
=
validation_df
[
'
classes
'
].
to_numpy
()
# Train the models with the training data and compute the test accuracies
results_list
=
list
()
for
model
in
model_list
:
# Compile and fit the model, then record the accuracies
model
.
compile
(
optimizer
=
'
adam
'
,
loss
=
tf
.
keras
.
losses
.
SparseCategoricalCrossentropy
(
from_logits
=
True
),
metrics
=
[
'
accuracy
'
])
model
.
fit
(
arrays_for_training
,
labels_for_training
,
epochs
=
10
,
verbose
=
0
)
test_loss
,
test_accuracy
=
model
.
evaluate
(
arrays_for_testing
,
labels_for_testing
,
verbose
=
0
)
validation_loss
,
validation_accuracy
=
model
.
evaluate
(
arrays_for_validation
,
labels_for_validation
,
verbose
=
0
)
results_list
.
append
([
model
.
name
,
test_accuracy
,
validation_accuracy
])
return
sorted
(
results_list
,
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
# Apply the function to compute accuracies across all fold permutations
# Note: this is a single core implementation of the code
single_results
=
list
()
time_start_single_core
=
time
.
time
()
for
t
in
test_train_validate_permutations
:
# Set the seed on every iteration
tf
.
random
.
set_seed
(
42
)
# Relu activated model
model_relu
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
Flatten
(
input_shape
=
(
28
,
28
)),
tf
.
keras
.
layers
.
Dense
(
128
,
activation
=
'
relu
'
),
tf
.
keras
.
layers
.
Dense
(
10
)
],
name
=
"
relu
"
)
# Linear activated model
model_linear
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
Flatten
(
input_shape
=
(
28
,
28
)),
tf
.
keras
.
layers
.
Dense
(
128
,
activation
=
'
linear
'
),
tf
.
keras
.
layers
.
Dense
(
10
)
],
name
=
"
linear
"
)
model_list
=
[
model_relu
,
model_linear
]
# Apply the models to the data, using the test-train-validate folds
single_results
.
append
(
compute_accuracy_on_test_validation_fold
(
t
,
folds
,
model_list
))
# Clear the loop for the next model training
tf
.
keras
.
backend
.
clear_session
()
tf
.
compat
.
v1
.
reset_default_graph
()
del
model_list
time_end_single_core
=
time
.
time
()
# Elapsed single core time
chunk_single_core
=
time_end_single_core
-
time_start_single_core
# Define the operations within a function that can be applied with a multiprocessing function
def
computeAccuracyScores
(
testValidateTuple
):
# Set the seed on every iteration
tf
.
random
.
set_seed
(
42
)
# Make the models of interest
# Relu activated model
model_relu
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
Flatten
(
input_shape
=
(
28
,
28
)),
tf
.
keras
.
layers
.
Dense
(
128
,
activation
=
'
relu
'
),
tf
.
keras
.
layers
.
Dense
(
10
)
],
name
=
"
relu
"
)
# Linear activated model
model_linear
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
Flatten
(
input_shape
=
(
28
,
28
)),
tf
.
keras
.
layers
.
Dense
(
128
,
activation
=
'
linear
'
),
tf
.
keras
.
layers
.
Dense
(
10
)
],
name
=
"
linear
"
)
model_list
=
[
model_relu
,
model_linear
]
# Compute the accuracy values to return
accuracyValues
=
compute_accuracy_on_test_validation_fold
(
testValidateTuple
,
folds
,
model_list
)
# Clear the session for the next model training
tf
.
keras
.
backend
.
clear_session
()
tf
.
compat
.
v1
.
reset_default_graph
()
del
model_list
return
accuracyValues
# Create an if/else statement that uses a multiple GPU implementation of the code
# if there are multiple GPU's available; otherwise, use a multiprocessing implementation
# of the code so that multiple CPU cores can be harnessed
if
number_of_gpus
>
1
:
# Apply the function to compute accuracies across all fold permutations
# Note: this is a multi-GPU implementation of the code
multi_results
=
list
()
time_start_multi_gpu
=
time
.
time
()
strategy
=
tf
.
distribute
.
MirroredStrategy
()
for
t
in
test_train_validate_permutations
:
# Set the seed on every iteration
tf
.
random
.
set_seed
(
42
)
# strategy = tf.distribute.MirroredStrategy()
with
strategy
.
scope
():
# Relu activated model
model_relu
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
Flatten
(
input_shape
=
(
28
,
28
)),
tf
.
keras
.
layers
.
Dense
(
128
,
activation
=
'
relu
'
),
tf
.
keras
.
layers
.
Dense
(
10
)
],
name
=
"
relu
"
)
# Linear activated model
model_linear
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
Flatten
(
input_shape
=
(
28
,
28
)),
tf
.
keras
.
layers
.
Dense
(
128
,
activation
=
'
linear
'
),
tf
.
keras
.
layers
.
Dense
(
10
)
],
name
=
"
linear
"
)
model_list
=
[
model_relu
,
model_linear
]
# Apply the models to the data, using the test-train-validate folds
multi_results
.
append
(
compute_accuracy_on_test_validation_fold
(
t
,
folds
,
model_list
))
# Clear the loop for the next model training
tf
.
keras
.
backend
.
clear_session
()
tf
.
compat
.
v1
.
reset_default_graph
()
del
model_list
time_end_multi_gpu
=
time
.
time
()
# Elapsed single core time
chunk_multi_gpu
=
time_end_multi_gpu
-
time_start_multi_gpu
# Save chunk time for final recording
multitime
=
chunk_multi_gpu
elif
number_of_gpus
==
1
:
# If there's just 1 GPU, copy the single core time as the "multitime" for ease during analysis
# of the results
multitime
=
chunk_single_core
else
:
# Apply the function to compute accuracies across all fold permutations
# Note: this is a multicore implementation of the code
time_start_multicore
=
time
.
time
()
multi_results
=
Parallel
(
n_jobs
=
number_of_cores
)(
delayed
(
computeAccuracyScores
)(
t
)
for
t
in
test_train_validate_permutations
)
time_end_multicore
=
time
.
time
()
# Elapsed multicore time
chunk_multicore
=
time_end_multicore
-
time_start_multicore
# Save chunk time for final recording
multitime
=
chunk_multicore
# Record the GPU type (if one is present)
if
number_of_gpus
==
0
:
gpu_type
=
"
None
"
else
:
nvmlInit
()
gpu_type
=
nvmlDeviceGetName
(
nvmlDeviceGetHandleByIndex
(
0
)).
decode
(
"
utf-8
"
)
# Record the processor platform
cpu_type
=
cpuinfo
.
get_cpu_info
().
get
(
"
brand_raw
"
)
cpu_speed
=
cpuinfo
.
get_cpu_info
().
get
(
"
hz_advertised_friendly
"
)
df
=
pd
.
DataFrame
(
columns
=
[
"
NumCores
"
,
"
NumGPUs
"
,
"
GPUType
"
,
"
CPUType
"
,
"
CPUSpeed
"
,
"
k
"
,
"
NumImages
"
,
"
SingleCoreTime
"
,
"
MultiTime
"
,
"
Metadata
"
],
data
=
[[
number_of_cores
,
number_of_gpus
,
gpu_type
,
cpu_type
,
cpu_speed
,
k
,
number_of_images
,
chunk_single_core
,
multitime
,
output_string
]])
file_name
=
output_string
+
"
_
"
+
str
(
number_of_cores
)
+
"
cores_
"
+
str
(
number_of_gpus
)
+
"
gpus_
"
+
str
(
k
)
+
"
k_
"
+
str
(
number_of_images
)
+
"
images_
"
+
time
.
strftime
(
"
%Y%m%d%H%M%S
"
)
df
.
to_csv
(
"
Time_Results/
"
+
file_name
+
"
.csv
"
,
index
=
False
)
pd
.
DataFrame
(
single_results
).
to_csv
(
"
Accuracy_Results/
"
+
file_name
+
"
_single_accuracy_outputs
"
+
"
.csv
"
,
index
=
False
)
try
:
pd
.
DataFrame
(
multi_results
).
to_csv
(
"
Accuracy_Results/
"
+
file_name
+
"
_multi_accuracy_outputs
"
+
"
.csv
"
,
index
=
False
)
except
:
print
(
"
No multi-implementation
"
)
This diff is collapsed.
Click to expand it.
TensorFlow_Tutorial_Jupyter.ipynb
deleted
100644 → 0
+
0
−
640
View file @
96ce2eeb
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment