UNIX_HPC_exercise_instructions.md

pwd
ls       # Prints the names of files and directories
ls -l    # List content of the subdirectory in "long listing" format. This
         # provides additional details for each file/directory, such as
         # its permissions, its size and its last modified date.
ls -a    # Adding the "-a" option additionally displays hidden files and
         # directories. These are files/directories whose name starts with
         # a dot ".".
         # Hidden files are often used to store program configurations.
ll       # This alias is the same as "ls -l"
la       # This alias is the same as "ls -lA"
cd data/
pwd         # this shows you your location relative to the symlink you followed
cd ..
pwd         # you are back to your home
cd /data/$USER
pwd      # this shows you the actual location /data/$USER
cd ..
pwd      # you are now in /data
ll       # this can take a moment
cp ompA.zip data/
cd data
unzip ompA.zip       # This extracts the files from the archive while also preserving the archive.
mkdir intro_to_unix
mv ompA_ref_short.fasta intro_to_unix/
ll intro_to_unix/
mv *.fasta intro_to_unix/  # This moves all files with the ending ".fasta"
unzip ompA.zip
mv * intro_to_unix/        # It will overwrite existing files with the same name
cd intro_to_unix/
mv ompA.zip fasta_files.zip
rm fasta_files.zip
mkdir archive
cp ~/ompA.zip archive/
mv archive/ ../..     # You can use ".." multiple times to go upward in the file tree.
cd ../..
rm -r archive/        # the -r flag will recursively delete the directory, meaning it will also delete its content.
cd          # Remember, this is a shorthand to get "home"
ln -s data/intro_to_unix/ unix_exercise
cd unix_exercise/
pwd
cat *.fasta >> all_ompA.fasta
grep ">" all_ompA.fasta       # Quotation marks are necessary because ">" is meant as a character, not a command!

grep ">" all_ompA.fasta > headers
head all_ompA.fasta
head -n 5 all_ompA.fasta       # show only the first 5 lines

tail all_ompA.fasta
tail -n 5 all_ompA.fasta

cat all_ompA.fasta
# "cat" print the complete file to standard output.

more all_ompA.fasta
# with "more" you can remain in your command line. You scroll through the file with space bar, when you reach the end of file, you get your prompt back.

less all_ompA.fasta
# with "less", you will see the document as if in a new window apart from your command line commands and you can go backward and forward. Type 'g' to get to the top of the file, 'SHIFT+g' to the end. Type '/' followed by a search term will highlight all instances.
  grep "SNVYGKNHDTGVSP" all_ompA.fasta | wc -l        # 5 sequences
grep "variant" all_ompA.fasta | cut -d '_' -f 3       # or use the headers file
touch myOmpA
echo "This is my favourite ompA sequence:" >> myOmpA
cat ompA_variant_008.fasta >> myOmpA
var1="ompA"
echo $var1
for x in $var1*.fasta
do
   echo $x
done
#!/bin/bash
for x in ompA*.fasta
do
   # save file ending in variable
   ending=$(ls $x | cut -d '_' -f 2,3)    # extract 2nd and 3rd field
   # copy and rename file
   cp $x outerMembraneProtein_$ending

done
#!/bin/bash

# Define variables prior to your actual code and use these throughout instead of hard-coded names
name1="ompA"
name2="outerMembraneProteinA"

for x in $name1*.fasta
do
   # save file ending in variable
    ending=$(ls $x | cut -d '_' -f 2,3)
   # copy and rename file
    cp $x ${name2}_$ending
done
numbers=(5 6 7 8 9 10)
echo ${numbers[@]}      # don't forget the curly brackets
echo ${numbers[2]}      # indices start at 0

# iterating through the array elements
for i in ${numbers[@]}
do
   echo $i
done
# OR iterating through the array indices
for i in ${!numbers[@]}
do
   echo ${numbers[$i]}
done
# OR iterating through indices defines by yourself (which you use for the array in this case)
for i in {0..5}
do
   echo ${numbers[$i]}
done

#!/bin/bash

name1="ompA"
name2="outerMembraneProteinA"

ompAfiles=($(ls $name1*.fasta))     # remember: x=(x1 x2 x3) creates an array, y=$(command) assigns the output of a command to a variable. Here these two are combined.
echo ${ompAfiles[@]}                # including echos of variables can be a good sanity check for your code

for x in ${ompAfiles[@]}
do
   # save file ending in variable
   ending=$(echo $x | cut -d '_' -f 2,3)
   echo $ending

   # extract ompA variant
   variant=$(echo $x | cut -d '_' -f 3 | cut -d '.' -f 1)   # you can pipe as many commands as you like
   echo $variant

   # create new directory
   mkdir -p $variant		# -p: throws no warning if folder already exists

   # copy file into new directory and rename
   cp $x $variant/${name2}_$ending

done
echo "export SINGULARITY_BINDPATH=/scratch,/data,/home/$USER,/shares/amr.imm.uzh" >> $HOME/.bashrc
source $HOME/.bashrc
mafft input.fasta > output.fasta
#!/usr/bin/env bash
#SBATCH --time=00:30:00
#SBATCH --mem-per-cpu=8G
#SBATCH --cpus-per-task=2
#SBATCH --job-name=msa
#SBATCH --output=msa_%j.out
#SBATCH --error=msa_%j.err

# load the Singularity module
module load singularityce

/shares/amr.imm.uzh/bioinfo/singularity/mafft_7.505--hec16e2b_0.sif mafft all_ompA.fasta > all_ompA_msa.fasta