#!/usr/bin/env bash
#SBATCH --time=02:00:00
#SBATCH --mem-per-cpu=20G
#SBATCH --cpus-per-task=2
#SBATCH --job-name=panaroo
#SBATCH --output=panaroo_%j.out
#SBATCH --error=panaroo_%j.err

# Load singularity module
module load singularityce

# Declare variables
# input data folder 
datadir=
# results folder
wd=
# Path to singularity images folder
singpath=/shares/amr.imm.uzh/bioinfo/singularity/

# Use the input data folder to generate an array containing the sample names
array=($(ls $datadir | cut -f 1 -d '_' | uniq))

# Move to the results folder, create input and output folders
cd $wd
mkdir -p panaroo/annotation_files
mkdir -p panaroo/panaroo_output

# generate soft links to the .gff files while looping through the sample names
for sample_id in "${array[@]}"
do
	ln -s $wd/$sample_id/prokka_annotation/$sample_id.gff $wd/panaroo/annotation_files/
done 

# Runs singularity on the annotations and send the output to the output folder
echo Starting panaroo $(date +"%T")
cd panaroo
$singpath/panaroo_1.3.0--pyhdfd78af_0.sif panaroo -i $wd/panaroo/annotation_files/*.gff -o ./panaroo_output/ --clean-mode strict --remove-invalid-genes
echo Done $(date +"%T")