Generate metadata table from folder content
Listing files in a folder and using filename parts to create a table
Code
library(purrr)
# paths
dirinput <- file.path(dirname(dirname(getwd())), '_data','SPring','Images')
#find relevant files
files <- dir(dirinput)
ext <- '.jpg'
files <- files[grepl(paste0('*',ext,'$'),files)]
# Table with filename parts
#-------------------------------
tbl <- as.data.frame(files)
# new columns
tbl$subjID <- ''
tbl$status <- ''
tbl$scanID <- ''
tbl$desc <- ''
tbl$recon <- ''
# Separate extension and first part ('_' separated)
basenames <- sapply(strsplit(tbl$files, ext), '[[', 1) # strip extension
basenames_firstpart <- sapply(strsplit(basenames,'_'),'[[',1)# split by underscore
# Loop thru rows
for (r in 1:nrow(tbl)){
# Files starting with JP* it contains subject data with meaningful nameparts
if (grepl('^JP*',tbl$files[r])) {
tbl$subjID[r] <- strsplit(basenames_firstpart[r],'-')[[1]][1] #1st element splitted by '-'
tbl$status[r] <- strsplit(basenames_firstpart[r],'-')[[1]][2] #2nd element splitted by '-'
tbl$scanID[r] <- strsplit(basenames_firstpart[r],'-')[[1]][3] #...
tbl$desc[r] <- strsplit(basenames_firstpart[r],'-')[[1]][4] #
tbl$recon[r] <- strsplit(basenames[r],'^.*_')[[1]][2] # last element after '_'
} else {
tbl$subjID[r] <- 'N/A'
}
}
# Add HTML code so that pictures will be rendered in the table
# ----------------------------------------------------------------
pic_fullpath <- file.path('https://gitlab.uzh.ch/crsuzh/afford_page/-/blob/master/_data/SPring/Images',tbl$files)
pic_relpath <- file.path(dirinput,tbl$files)
tbl$pic <- paste0('<a href=\'', pic_fullpath,'\' target=\'_blank\'>',
'<img src=\'',pic_relpath, '\' height=\'70\'></a>')
# Sort: rows with subjects first
tbl <- tbl[order(tbl$subjID),]
We have a folder with preview data files (thumbnail pictures). Their filename parts encode our key variable Subject ID that can be matched to the same variable in the table with subject information. Other parts of the file name encode different attributes of the file content (e.g., dead or in-vivo). The filename parts are split into columns of a table using ‘-’ and ’_’ separators.
Important: filenaming conventions
A consistent filenaming scheme will facilitate the process of generating metadata tables based on filename and foldername separators.
Back to topThe generated table has 762 rows and 7 columns.