Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/bash
##
##
##
DATA=/home/imallona/src/polymenidou_manu_clip/01_snv_analysis
BEDTOOLS=/home/imallona/soft/bedtools/bedtools-2.29.2/bin/bedtools
WD=/home/imallona/src/polymenidou_manu_clip/03_postprocessing_snv_analysis
# motif_sample=2000
fai=hg38.genome
fasta=~/polymenidou_manu_clip/reference/GRCh38.primary_assembly.genome.fa
DREME=~/soft/meme/bin/dreme
STREME=~/soft/meme/bin/streme
DUST=~/soft/meme/bin/dust ## to remove homopolymers
cd $WD
mkdir -p meme
cd $_
mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \
"select chrom, size from hg38.chromInfo" > hg38.genome
for fn in $(find $DATA -name "deletion_pos_min_3reads*.bed")
do
curr=$(basename $fn .bed)
motif_sample=$(wc -l $fn | cut -f1 -d" ")
# if [ "$motif_sample" -gt "3000" ]; then
# motif_sample=3000
# fi
motif_sample=2000
egrep "^chr[0-9XY]" $fn | \
$BEDTOOLS slop -s -l 10 -r 10 -i /dev/stdin -g $fai | \
shuf -n $motif_sample > "$curr"_peaks.bed
$BEDTOOLS getfasta -s -fi $fasta -bed "$curr"_peaks.bed -fo "$curr"_peaks.fasta
$DUST "$curr"_peaks.fasta > "$curr"_dusted.fasta
$DREME -norc -o dreme_"$curr" -p "$curr"_dusted.fasta
$STREME --p "$curr"_dusted.fasta \
-o streme_"$curr" \
--minw 3 \
--maxw 30
done
# now for the clusters
for fn in $(find $DATA -name "deletion_cluster_dist_10_min_3reads_*.bed")
do
curr=$(basename $fn .bed)
motif_sample=$(wc -l $fn | cut -f1 -d" ")
# if [ "$motif_sample" -gt "3000" ]; then
# motif_sample=3000
# fi
motif_sample=1000
egrep "^chr[0-9XY]" $fn | \
$BEDTOOLS slop -s -l 10 -r 10 -i /dev/stdin -g $fai | \
shuf -n $motif_sample > "$curr"_peaks.bed
$BEDTOOLS getfasta -s -fi $fasta -bed "$curr"_peaks.bed -fo "$curr"_peaks.fasta
$DUST "$curr"_peaks.fasta > "$curr"_dusted.fasta
$DREME -norc -o dreme_"$curr" -p "$curr"_dusted.fasta
$STREME --p "$curr"_dusted.fasta \
-o streme_"$curr" \
--minw 3 \
--maxw 30
done