manta+strelka2 call somatic mutation
bgzip -f AgilentV6_GRCh38_ex_region.sort.filtered.bed
tabix -p bed AgilentV6_GRCh38_ex_region.sort.filtered.bed.gz
mkdir {manta,strelka2}
conda activate strelka2
cd manta
perl -ne 'chomp; next if /^$/; @a = split /\t/; print "configManta.py --tumorBam ../align/$a[0]_bqsr.bam --normalBam ../align/$a[1]_bqsr.bam --referenceFasta /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa --callRegions /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.filtered.bed.gz --generateEvidenceBam --runDir /data02/zhangmengmeng/BPT/LCM-WES/manta/$a[0] --exome --outputContig \n"' sample_pair.txt > CreateRunManta.sh
./CreateRunManta.sh |grep "runWorkflow.py" > RunManta.sh
nohup ./RunManta.sh > RunManta.log 2>&1 &
cat RunManta.log |grep "Manta workflow successfully completed"
# 注意strelka2的conda版本似乎有问题,需要从github下载后解压使用
cd ../strelka2
perl -ne 'chomp; next if /^$/; @a = split /\t/; print "/data02/zhangmengmeng/software/strelka-2.9.10.centos6_x86_64/bin/configureStrelkaSomaticWorkflow.py --tumorBam ../align/$a[0]_bqsr.bam --normalBam ../align/$a[1]_bqsr.bam --referenceFasta /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa --callRegions /data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.filtered.bed.gz --indelCandidates /data02/zhangmengmeng/BPT/LCM-WES/manta/$a[0]/results/variants/candidateSmallIndels.vcf.gz --runDir /data02/zhangmengmeng/BPT/LCM-WES/strelka2/$a[0] --exome \n"' sample_pair.txt > CreateRunStrelka2.sh
./CreateRunStrelka2.sh |grep "runWorkflow.py" > RunStrelka2.sh
sed -i 's#$# -m local -j 4#' RunStrelka2.sh
cat RunStrelka2.log |grep "Strelka somatic workflow successfully completed"
运行完后处理:
1.将所有vcf文件copy到新的文件夹
# cp_results.sh
mkdir -p snp indel
# Move and rename somatic.snvs.vcf.gz
find . -path "*/results/variants/somatic.snvs.vcf.gz" | while read file; do
new_name=$(echo "$file" | awk -F'/' '{print $(NF-3) ".somatic.snvs.vcf.gz"}')
cp "$file" "snp/$new_name"
done
# Move and rename somatic.indels.vcf.gz
find . -path "*/results/variants/somatic.indels.vcf.gz" | while read file; do
new_name=$(echo "$file" | awk -F'/' '{print $(NF-3) ".somatic.indels.vcf.gz"}')
cp "$file" "indel/$new_name"
done
2.解压并筛选pass
cd snp
gunzip ./*.gz
cd indel
gunzip ./*.gz
cd ../
# filter_pass.sh:
# Process VCF files in the snp directory
for file in snp/*.vcf; do
grep '^#' "$file" > "${file%.vcf}_filtered.vcf" # Extract headers
awk '$7 == "PASS"' "$file" >> "${file%.vcf}_filtered.vcf" # Append PASS rows
done
# Process VCF files in the indel directory
for file in indel/*.vcf; do
grep '^#' "$file" > "${file%.vcf}_filtered.vcf" # Extract headers
awk '$7 == "PASS"' "$file" >> "${file%.vcf}_filtered.vcf" # Append PASS rows
done
varscan2 call somatic mutation
cd align
ls *_bqsr.bam|perl -ne 'chomp;if($_=~/(\S+)\_bqsr/){print "samtools view -b -u -q 1 $1_bqsr.bam | samtools mpileup -f /data02/zhangmengmeng/database/hg38/gencode_GRCh38.p14.genome.fa - \> $1.mpileup && echo $1 ok\n"}' > samtools_mpileup.sh
cd varscan2
perl -ne 'chomp; next if /^$/; @a = split /\t/; print "java -jar /data02/zhangmengmeng/software/VarScan.v2.4.6.jar somatic ../align/$a[1].mpileup ../align/$a[0].mpileup $a[0] --min-coverage 8 --min-reads 2 --min-var-freq 0.15 --output-vcf 1 && echo $a[0] varscan2 ok\n"' sample_pair.txt > varscan2.sh