lumpy 结构突变分析流程
-
lumpyexpress
1.比对
https://github.com/hall-lab/speedseqspeedseq align -R "@RG\tID:id\tSM:sample\tLB:lib" \ human_g1k_v37.fasta \ sample.1.fq \ sample.2.fq- call
lumpyexpress \ -B sample1.bam,sample2.bam,sample3.bam \ -S sample1.splitters.bam,sample2.splitters.bam,sample3.splitters.bam \ -D sample1.discordants.bam,sample2.discordants.bam,sample3.discordants.bam \ -o multi_sample.vcf3.svtyper call genotype
svtyper \
-B sample.bam
-S sample.splitters.bam
-i sample.vcf
> sample.gt.vcf======
参考
https://academic.oup.com/gigascience/article/8/9/giz110/5565134
为什么在lumpy之后要执行 svtyper?
SpeedSeq: ultra-fast personal genome analysis and interpretationhttp://webcache.googleusercontent.com/search?q=cache:F_SIpLosvKQJ:https://www.jayethomas.com/tfblxn/lumpy-structural-variant&newwindow=1&hl=zh-CN&strip=1&vwsrc=0
LUMPY (Layer et al., 2014) is only a variation detection tool, thus the genotype results need to be further confirmed by SVTyper (Chiang et al., 2015). -
河南项目的过程
bwa mem -t 4 -M -R "@RG\tID:ZGSP-2\tPL:Illumina\tLB:ZGSP-2\tSM:ZGSP-2" Prunus_persica_v2.0.a1_scaffolds.fasta ZGSP-2.R1.fq.gz ZGSP-2.R2.fq.gz \ | samblaster --excludeDups --addMateTags --maxSplitCount 2 --minNonOverlap 20 \ | samtools view -S -b - \ > sample.bamsamtools view -h ZGSP-683.alignment.sorted.markdup.bam | /opt/lumpy-sv/scripts/extractSplitReads_BwaMem -i stdin | samtools view -Sb - > ZGSP-683.splitters.unsorted.bam samtools sort ZGSP-683.split.unsorted.bam -o ZGSP-683.split.sorted.bamsamtools view -b -F 1294 ZGSP-683.alignment.sorted.markdup.bam > ZGSP-683.discordants.unsorted.bam1462 nohup time lumpyexpress -B ../ZGSP-1.sv.alignment.sorted.bam -S ../ZGSP-1.split.sorted.bam -D ../ZGSP-1.discordants.sorted.bam -P -o 1.vcf >>1.log 2>&1 &
1468 nohup time lumpyexpress -B ../ZGSP-3.sv.alignment.sorted.bam -S ../ZGSP-3.split.sorted.bam -D ../ZGSP-3.discordants.sorted.bam -P -o 3.vcf >>2.log 2>&1 &
1493 samtools index ZGSP-1.sv.alignment.sorted.bam
1495 samtools index ZGSP-3.sv.alignment.sorted.bam
1510 nohup time svtyper -B ../ZGSP-1.sv.alignment.sorted.bam -S ../ZGSP-1.split.sorted.bam -i 1.vcf > 1.gt.svtyper.vcf 2>>sv.1.log &
1512 nohup time svtyper -B ../ZGSP-3.sv.alignment.sorted.bam -S ../ZGSP-3.split.sorted.bam -i 3.vcf > 3.gt.svtyper.vcf 2>>sv.3.log &
1528 svtools lsort 1.gt.svtyper.vcf 3.gt.svtyper.vcf >gt.sorted.vcf
1532 svtools lmerge -i gt.sorted.vcf >gt.merged.vcf
1535 svtyper -B ../ZGSP-1.sv.alignment.sorted.bam,-B ../ZGSP-3.sv.alignment.sorted.bam -i gt.merged.vcf >all.vcf
注意 svtyper多个样本要哦逗号分开 不能是多个 -B 多个-B只去第一个样本 -
多个样本的处理方式
svtyper -B $(ls sample*.bam | paste -sd",") -i lumpy.raw.out.vcf > project.gt.vcf -
svtyper比较慢 用smoove试试
sudo docker run -d -v /ceph_disk3/sv_henan_yang/smoove/:/vcf -v /ceph_disk3/sv_henan_yang/raw_bam/:/bams brentp/smoove sh -c "ls /bams/ZGSP-*.sv.alignment.sorted.bam | xargs smoove genotype -p 10 --name prunus_sv --outdir /vcf --fasta /vcf/Prunus_persica_v2.0.a1_scaffolds.fasta --vcf /vcf/gt.merged.vcf "
-
验证svtools
cat gt.merged.vcf \ | vawk --header '{ $6="."; print }' \ | svtools genotype \ -B ../raw_bam/ZGSP-1.sv.alignment.sorted.bam \ | sed 's/PR...=[0-9\.e,-]*\(;\)\{0,1\}\(\t\)\{0,1\}/\2/g' - \ > gt/ZGSP-1.vcfparallel "svtyper -B {} -i gt.merged.vcf > gt/{= s:\.[^.]+$::; s:\.[^.]+$::; s:\.[^.]+$::; s:\.[^.]+$::; s:\..*/::; =}.vcf.tmp" ::: ../raw_bam/ZGSP-*.sv.alignment.sorted.bam需要将染色体独立成文件 放到某个目录 如chroms
#!/bin/bash splitfa(){ header="" while read line ; do if [ ${line:0:1} == ">" ] ; then header=$line echo "$header" >> ${header:1}.fa else seq="$line" echo "$seq" >> ${header:1}.fa fi done < $1 } splitfa $1nohup parallel "cnvnator -root {= s:\.[^.]+$::; s:\.[^.]+$::; s:\.[^.]+$::; s:\.[^.]+$::; s:.*/::; =}.root -tree {}" ::: /ceph_disk2/yancaofenxi/raw_bam/ZGSP-*.sv.alignment.sorted.bam & nohup parallel "cnvnator -root {} -his 100 -d chroms" ::: ../root_libs/ZGSP-*.root & nohup parallel "cnvnator -root {} -his 1000 -d ../chroms" ::: ZGSP-*.root & nohup parallel "cnvnator -root {} -stat 100" ::: ZGSP-*.root & nohup parallel "cnvnator -root {} -stat 1000" ::: ZGSP-*.root &nohup parallel "svtools copynumber --cnvnator /opt/miniconda3/envs/cnvnator_env/bin/cnvnator -s {= s:\.[^.]+$::; s:\.[^.]+$::; s:.*/::; =} -w 100 -r root_libs/{= s:\.[^.]+$::; s:\.[^.]+$::; s:.*/::; =}.root -c coordinates -i {} > cn/{= s:\.[^.]+$::; s:\.[^.]+$::; s:.*/::; =}.vcf" ::: gt/*.vcf.tmp &注意:ROOT依赖下面的包
sudo apt-get install dpkg-dev cmake g++ gcc binutils libx11-dev libxpm-dev \ libxft-dev libxext-dev python libssl-dev -
nohup parallel "svtyper -B {} -i all.gt.merged.with.f20.vcf > gt/{= s:.[^.]+$::; s:.[^.]+$::; s:.[^.]+$::; s:.[^.]+$::; s:./::; =}.vcf.tmp" ::: /ceph_disk2/yancaofenxi/raw_bam/ZGSP-.sv.alignment.sorted.bam &
通过.*/ 去掉路径
-
CNVnator非常难安装 用下面的conda环境
conda create -n cnvnator_env python=3.6
conda activate cnvnator_env
conda install -c conda-forge root_base=6.20
conda install -c conda-forge -c bioconda cnvnator./cnvnator -root file.root -tree file.bam
nohup parallel "cnvnator -root root_libs/{= s:.[^.]+$::; s:.[^.]+$::; s:.[^.]+$::; s:.[^.]+$::; s:./::; =}.root -tree {}" ::: /ceph_disk2/yancaofenxi/raw_bam/ZGSP-.sv.alignment.sorted.bam &
-
此回复已被删除! -
执行cnvnator 报错,
ERROR in cling::CIFactory::createCI(): cannot extract standard library include paths!
Invoking:
x86_64-conda_cos6-linux-gnu-c++ -O3 -DNDEBUG -xc++ -E -v /dev/null 2>&1 | sed -n -e '/^.include/,${' -e '/^ /.*++/p' -e '}'
Results was:
With exit code 0
解决办法:
x86_64-conda_cos6-linux-gnu-c++ 这个程序在环境中
export PATH=$PATH:/opt/miniconda3/envs/cnvnator_env/bin/svtools依赖python2.7 cnvnator 依赖python3.6 但是svtools会调用cnvnator
-
svtools vcfpaste
-m merged.vcf
-f cn.list
-q
| bgzip -c \merged.sv.gt.cn.vcf.gz
IOError: [Errno 24] Too many open files: '../cn/ZGSP-9.vcf'
ulimit -n 2048 扩大文件限制