Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/bin/bash
##
## Reprocesses M Berlanga's CLiP using nf-core/clipseq, and only one read
##
## Tue Apr 5 08:28:56 CEST 2022
WD=/home/imallona/polymenidou_manu_clip/nextflow
mkdir -p $WD/data; cd $WD/data
for fn in $(find /home/kathi/Manu_TDP_CLIP/FASTQ/ -name "*R2*fastq.gz")
do
ln -s $fn .
done
mkdir -p $WD/run
cd $_
cat << EOF > design.conf
sample,fastq
20200123.A-6M2_R2,/home/imallona/polymenidou_manu_clip/nextflow/data/20200123.A-6M2_R2.fastq.gz
20200123.A-6M_R2,/home/imallona/polymenidou_manu_clip/nextflow/data/20200123.A-6M_R2.fastq.gz
20200123.A-RBDm2_R2,/home/imallona/polymenidou_manu_clip/nextflow/data/20200123.A-RBDm2_R2.fastq.gz
20200123.A-RBDm_R2,/home/imallona/polymenidou_manu_clip/nextflow/data/20200123.A-RBDm_R2.fastq.gz
20200123.A-WT2_R2,/home/imallona/polymenidou_manu_clip/nextflow/data/20200123.A-WT2_R2.fastq.gz
20200123.A-WT_R2,/home/imallona/polymenidou_manu_clip/nextflow/data/20200123.A-WT_R2.fastq.gz
EOF
## beware, is trying to deduplicate something - and raises an error if setting
## --deduplicate False
## Channel `ch_aligned` has been used twice as an input by process `get_crosslinks` and process `rseqc`
## RNA premapping to smRNAs included
# As per their documentation, https://nf-co.re/clipseq/1.0.0/usage
# The pipeline comes equipped with some 'smallRNA' FASTA references for premapping.
# This includes rRNA and tRNA sequences, the sources of which can be viewed here.
# The purpose of this premapping is to capture abundant ncRNA that are present in
# multiple similar copies in the genome, making them hard to assign reads to.
# tRNA can occur within genes and without proper handling can result in misassignment
# of reads to mRNA in certain situations.
# https://rnajournal.cshlp.org/content/early/2018/08/21/rna.067348.118
## it's indeed tRNAs mostly, not small RNAs otherwise
# zcat /home/imallona/.nextflow/assets/nf-core.old/clipseq/assets/small_rna/Homo_sapiens/Homo_sapiens.smallRNA.fa.gz | grep ">"
nice -n 19 nextflow run --max_cpus 50 \
nf-core/clipseq \
--genome GRCm38 \
--smrna_org mouse \
--input design.conf \
-profile singularity \
--peakcaller paraclu,piranha \
-resume \
--outdir out