暗能星系

    • 登录
    • 搜索

    MNGS流程开发笔记

    刘茜
    1
    1
    7
    正在加载更多帖子
    • 从旧到新
    • 从新到旧
    • 最多赞同
    回复
    • 在新帖中回复
    登录后回复
    此主题已被删除。只有拥有主题管理权限的用户可以查看。
    • I
      ice-melt 最后由 编辑

      伪代码记录

      参数

      param_dict={
          {"Asia1":"GCA_008797915.1_ASM879791v1_genomic"}, 
          {"SAT3":"GCA_008798995.1_ASM879899v1_genomic"}, 
          {"SAT2":"GCA_008799015.1_ASM879901v1_genomic"}, 
          {"SAT1":"GCA_008799035.1_ASM879903v1_genomic"}, 
          {"C":"GCA_008799055.1_ASM879905v1_genomic"}, 
          {"A":"GCA_008799075.1_ASM879907v1_genomic"}, 
          {"O":"GCF_002816555.1_ASM281655v1_genomic"},  
      }
      DB_DIR=/data_raid1/bioinfo/app/cromwell/cromwell-executions/data/database
      SampleID=S1
      R1=${SampleID}_R1.fastq
      R2=${SampleID}_R2.fastq
      

      创建索引

      
      cd ${DB_DIR}/FMDV_reference
      for refSuffix in param_dict.items():
          bowtie2-build ${refSuffix}.fna ${refSuffix}
      

      krakenuniq

      # {step: krakenuniq}
      krakenuniq --report-file report.tsv --db ${DB_DIR}/kraken_uniq_database \
          --threads ${T} --output report.kraken \
          --paired ${Fq1} ${Fq2} --unclassified-out unclassifed.txt --classified-out classifed.txt
      
      # {step:parse krakenuniq report}
      # result file list:
      # - fastqc_result.json
      # - report.tsv
      # - report.kraken
      # - res.json
      # - tax.json
      # - result/${SampleID}_R${n}_${SeroType}_{ReadsNum}.fastq
      

      溯源

      for SeroType,refSuffix in #{Assemble Param data from Part1 result}:
          # 进入一个单独的目录,该目录下全是同一 SeroType 不同样本的结果序列
          mkdir temp_xx && cd temp_xx
          for SampleID,ReadsNum in #{Assemble Param data from Part1 reuslt By Sample}:
              Fq1=${Path2Sample}/${SampleID}_R1_${SeroType}_{ReadsNum}.fastq
              Fq2=${Path2Sample}/${SampleID}_R2_${SeroType}_{ReadsNum}.fastq
              
              bowtie2 -x ${DB_DIR}/FMDV_reference/${refSuffix} -U ${Fq1},${Fq2} -S paired.sam 
              samtools view -bS paired.sam -o paired.bam
              samtools sort -o paired.sorted.bam paired.bam
              # {samtools mpileup 命令已过时,改用 bcftools mpileup}
              # {samtools mpileup -Ou -f ${DB_DIR}/FMDV_reference/${refSuffix}.fna paired.bam} 
              # 一致性序列,**该步骤会假设所有位点都是双倍体**
              bcftools mpileup -Ou -f ${DB_DIR}/FMDV_reference/${refSuffix}.fna -o mpileup.vcf paired.sorted.bam
              bcftools call -c -o all_site.vcf mpileup.vcf 
              # vcfutils.pl 属于 bcftools 软件的工具,将 vcf 转换成fastq
              vcfutils.pl vcf2fq all_site.vcf > cns.fastq
              # 将 fastq 转换成fasta
              seqtk seq -aQ64 -q20 -n N cns.fastq > cns.fasta
              # 将序列ID替换
              sed -i 's/^>.*$/>${SampleID}_${SeroType}_{ReadsNum}/g' ${SampleID}_${SeroType}_{ReadsNum}_cns.fasta 
      
          cat ${DB_DIR}/FMDV_type/${SeroType}.fa temp_xx/*_cns.fasta > all.fa
          mafft all.fa > fmdv_all.aln
          trimal -in fmdv_all.aln -out fmdv.aln.trimmed
          fasttree -gamma -nt -gtr -out fmdv.tree fmdv.aln.trimmed
      # 最后将所有的 fmdv.tree 汇总
      
      
      1 条回复 最后回复 回复 引用 0
      • First post
        Last post
      Powered by 暗能星系