教程:https://github.com/VanLoo-lab/ascat/tree/master/ExampleData
从ASCAT官网下载Reference文件
https://zenodo.org/records/14008443
需下载:G1000_alleles_WES_hg38.zip, G1000_loci_WES_hg38.zip, GC_G1000_WES_hg38.zip, RT_G1000_WES_hg38.zip
全部解压后修改G1000_lociAll_hg38文件夹里的所有文件,全部添加“chr”
for file in *.txt; do
sed -i 's/^/chr/' "$file"
done
构建EXAMPLE脚本用于替换
Example_ASCAT.R
library(ASCAT)
options(bitmapType='cairo')
Sys.setenv(LD_LIBRARY_PATH = paste("/data02/zhangmengmeng/software/alleleCount-4.2.1/lib", Sys.getenv("LD_LIBRARY_PATH"), sep=":"))
Sys.getenv("LD_LIBRARY_PATH")
dyn.load("/data02/zhangmengmeng/software/alleleCount-4.2.1/lib/libhts.so.3")
ascat.prepareHTS(
tumourseqfile = "/data02/zhangmengmeng/BPT/LCM-WES/align/TUMOR_bqsr.bam",
normalseqfile = "/data02/zhangmengmeng/BPT/LCM-WES/align/NORMAL_bqsr.bam",
tumourname = "TUMOR",
normalname = "NORMAL",
allelecounter_exe = "/data02/zhangmengmeng/software/alleleCount-4.2.1/bin/alleleCounter",
alleles.prefix = "/data02/zhangmengmeng/database/ASCAT_ref/G1000_allelesAll_hg38/G1000_alleles_hg38_chr",
loci.prefix = "/data02/zhangmengmeng/database/ASCAT_ref/G1000_lociAll_hg38/G1000_loci_hg38_chr",
BED_file = "/data02/zhangmengmeng/database/gatk_resource_bundle/hg38/AgilentV6_GRCh38_ex_region.sort.bed",
gender = "XX",
genomeVersion = "hg38",
nthreads = 8,
tumourLogR_file = "TUMOR_LogR.txt",
tumourBAF_file = "TUMOR_BAF.txt",
normalLogR_file = "NORMAL_LogR.txt",
normalBAF_file = "NORMAL_BAF.txt")
ascat.bc = ascat.loadData(Tumor_LogR_file = "TUMOR_LogR.txt", Tumor_BAF_file = "TUMOR_BAF.txt", Germline_LogR_file = "NORMAL_LogR.txt", Germline_BAF_file = "NORMAL_BAF.txt", gender = "XX", genomeVersion = "hg38")
ascat.plotRawData(ascat.bc, img.prefix = "TUMOR_Before_correction_")
ascat.bc = ascat.correctLogR(ascat.bc, GCcontentfile = "/data02/zhangmengmeng/database/ASCAT_ref/GC_G1000_hg38.txt", replictimingfile = "/data02/zhangmengmeng/database/ASCAT_ref/RT_G1000_hg38.txt")
ascat.plotRawData(ascat.bc, img.prefix = "TUMOR_After_correction_")
ascat.bc = ascat.aspcf(ascat.bc)
ascat.plotSegmentedData(ascat.bc)
ascat.output = ascat.runAscat(ascat.bc, gamma=1, write_segments = TRUE)
# Function to count SNPs within segment boundaries
count_snps_in_segment <- function(chr, start, end, snp_df) {
sum(snp_df$Chromosome == chr & snp_df$Position >= start & snp_df$Position <= end)
}
# Apply function to each row in segments dataframe
ascat.output$segments$nProbes <- mapply(
count_snps_in_segment,
ascat.output$segments$chr,
ascat.output$segments$startpos,
ascat.output$segments$endpos,
MoreArgs = list(snp_df = ascat.bc$SNPpo)
)
final_segment <- ascat.output$segments
colnames(final_segment) <- c("SampleID","Chr","Start","End","nMajor","nMinor","nProbes")
write.table(final_segment,"TUMOR_final_segment.txt",col.names = T,row.names = F,sep = '\t',quote = F)
QC = ascat.metrics(ascat.bc,ascat.output)
save(ascat.bc, ascat.output, QC, file = 'TUMOR_ASCAT_objects.Rdata')
构建shell脚本用于批量跑ascat
Create_Run_ASCAT.sh
#!/bin/bash
# Path to the example R script
EXAMPLE_SCRIPT="EXAMPLE_ASCAT.R"
OUTPUT_SCRIPT="run_ascat.sh"
# Start the script file
echo "#!/bin/bash" > "$OUTPUT_SCRIPT"
# Debugging: Check if sample pairs are correctly read
echo "Processing sample pairs..."
# Ensure strict tab-separated parsing
while IFS=$'\t' read -r TUMOR NORMAL PATIENT_ID; do
# Trim spaces in TUMOR and NORMAL
TUMOR=$(echo "$TUMOR" | xargs)
NORMAL=$(echo "$NORMAL" | xargs)
PATIENT_ID=$(echo "$PATIENT_ID" | xargs)
# Debug: Show extracted values
echo "TUMOR: [$TUMOR], NORMAL: [$NORMAL], PATIENT_ID: [$PATIENT_ID]"
# Skip invalid lines
if [[ -z "$TUMOR" || -z "$NORMAL" ]]; then
echo "Skipping invalid line: [$TUMOR] [$NORMAL]"
continue
fi
# Define output R script name
R_SCRIPT="ASCAT_${TUMOR}.R"
# Swap TUMOR and NORMAL in the script and create a new file
awk -v t="$TUMOR" -v n="$NORMAL" '{gsub("TUMOR", t); gsub("NORMAL", n)} 1' "$EXAMPLE_SCRIPT" > "$R_SCRIPT"
# Append the command to run the script (but don't execute)
echo "/opt/R/4.3.0/lib64/R/bin/Rscript $R_SCRIPT" >> "$OUTPUT_SCRIPT"
done < sample_pair.txt
# Make the script executable
chmod +x "$OUTPUT_SCRIPT"
echo "Commands saved in run_ascat.sh. You can run it manually with: bash run_ascat.sh"