#!/bin/bash
##
##
##

DATA=/home/imallona/src/polymenidou_manu_clip/01_snv_analysis
BEDTOOLS=/home/imallona/soft/bedtools/bedtools-2.29.2/bin/bedtools
WD=/home/imallona/src/polymenidou_manu_clip/03_postprocessing_snv_analysis
# motif_sample=2000
fai=hg38.genome
fasta=~/polymenidou_manu_clip/reference/GRCh38.primary_assembly.genome.fa
DREME=~/soft/meme/bin/dreme
STREME=~/soft/meme/bin/streme
DUST=~/soft/meme/bin/dust ## to remove homopolymers

cd $WD
mkdir -p meme
cd $_

mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \
      "select chrom, size from hg38.chromInfo" > hg38.genome


for fn in $(find $DATA -name "deletion_pos_min_3reads*.bed")          
do
    curr=$(basename $fn .bed)
    motif_sample=$(wc -l $fn | cut -f1 -d" ")

    # if [ "$motif_sample" -gt "3000" ]; then
    #     motif_sample=3000
    # fi

    motif_sample=2000
    

    egrep "^chr[0-9XY]" $fn | \
        $BEDTOOLS slop -s -l 10 -r 10 -i /dev/stdin -g $fai | \
        shuf -n $motif_sample > "$curr"_peaks.bed

    $BEDTOOLS getfasta -s -fi $fasta -bed "$curr"_peaks.bed -fo "$curr"_peaks.fasta

    $DUST "$curr"_peaks.fasta > "$curr"_dusted.fasta

    $DREME -norc -o dreme_"$curr" -p "$curr"_dusted.fasta

    $STREME --p "$curr"_dusted.fasta \
            -o streme_"$curr" \
            --minw 3 \
            --maxw 30
            
done


# now for the clusters


for fn in $(find $DATA -name "deletion_cluster_dist_10_min_3reads_*.bed")          
do
    curr=$(basename $fn .bed)
    motif_sample=$(wc -l $fn | cut -f1 -d" ")

    # if [ "$motif_sample" -gt "3000" ]; then
    #     motif_sample=3000
    # fi

    motif_sample=1000
    

    egrep "^chr[0-9XY]" $fn | \
        $BEDTOOLS slop -s -l 10 -r 10 -i /dev/stdin -g $fai | \
        shuf -n $motif_sample > "$curr"_peaks.bed

    $BEDTOOLS getfasta -s -fi $fasta -bed "$curr"_peaks.bed -fo "$curr"_peaks.fasta

    $DUST "$curr"_peaks.fasta > "$curr"_dusted.fasta

    $DREME -norc -o dreme_"$curr" -p "$curr"_dusted.fasta

    $STREME --p "$curr"_dusted.fasta \
            -o streme_"$curr" \
            --minw 3 \
            --maxw 30

done