AlphaFold2 运行验证
-
运行 alphafold
https://github.com/deepmind/alphafold
下载数据库
- bfd|(6=[a3m|cs219|hhm].[ffdata|ffindex])#~1.8T(1.7T?)
- mgnify|(1=[mgy_clusters_2018_12.fa])#~64G
- params|(16=[5 CASP14 models|5 pTM models|5 AlphaFold-Multimer models|LICENSE])#~5.3G
- pdb70|(9=pdb70_[a3m|cs219|hhm].[ffdata|ffindex],pdb70_clu.tsv,pdb_filter.dat,md5sum)#~56G(56G)
- pdb_mmcif|(TODO: 数据不完整x=[mmcif_files/about 180,000 .cif files,obsolete.dat])#~G(206G)
- pdb_seqres|(1=[pdb_seqres.txt])#~217M(0.2G)
- uniclust30(13=[uniclust30_2018_08/])#~87G(86?)
- uniprot|(1=[uniprot.fasta])#~104G(98.3?)
- uniref90|(1=[uniref90.fasta])#~63G(58?)
- small_bfd|(1=[bfd-first_non_consensus_sequences.fasta])#~17G(17G)
# 使用链接下载需要的额外处理 # bfd ***************************************************** BASENAME=bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz tar --extract --verbose --file="${BASENAME}" \ --directory=./ rm "${BASENAME}" # mgnify ***************************************************** gunzip mgy_clusters_2018_12.fa.gz # params ***************************************************** tar --extract --verbose --file=alphafold_params_2022-01-19.tar \ --directory=./ --preserve-permissions rm alphafold_params_2022-01-19.tar # pdb70 **************************************** tar --extract --verbose --file=pdb70_from_mmcif_200401.tar.gz \ --directory=./ rm pdb70_from_mmcif_200401.tar.gz # pdb_mmcif **************************************** find "raw/" -type f -iname "*.gz" -exec gunzip {} + mkdir mmcif_files find raw/ -type d -empty -delete # Delete empty directories. for subdir in raw/*; do mv "${subdir}/"*.cif mmcif_files done find raw -type d -empty -delete # download obsolete.dat # pdb_seqres **************************************** # 只有一个txt文本文件,无需处理 # uniclust30 **************************************** tar --extract --verbose --file=uniclust30_2018_08_hhsuite.tar.gz \ --directory=./ rm uniclust30_2018_08_hhsuite.tar.gz # uniprot **************************************** gunzip uniprot_trembl.fasta.gz gunzip uniprot_sprot.fasta.gz cat uniprot_sprot.fasta >> uniprot_trembl.fasta mv uniprot_trembl.fasta uniprot.fasta rm uniprot_sprot.fasta # uniref90 **************************************** gunzip uniref90.fasta.gz准备运行环境
安装 nvidia 容器工具包支持GPU
curl https://get.docker.com | sh \ && sudo systemctl --now enable docker distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \ && curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - \ && curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list curl -s -L https://nvidia.github.io/nvidia-container-runtime/experimental/$distribution/nvidia-container-runtime.list | sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list sudo apt-get update sudo apt-get install -y nvidia-docker2 sudo systemctl restart docker sudo docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi # 展示nvidia gpu信息 就OK设置非root用户运行docker
# cat /etc/group | grep docker # sudo groupadd docker sudo usermod -aG docker $USER # 重启系统,或执行以下命令激活组修改 newgrp docker # 验证 docker run hello-world # 出现该错误,修改 ~/.docker 的权限 WARNING: Error loading config file: /home/user/.docker/config.json - stat /home/user/.docker/config.json: permission denied sudo chown "$USER":"$USER" /home/"$USER"/.docker -R sudo chmod g+rwx "$HOME/.docker" -R检查AlphaFold是否可以使用GPU
docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi构建docker镜像
docker build -f docker/Dockerfile -t alphafold .修改Dockerfile 以解决以下问题
- apt-get update失败
RUN echo deb https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu2004/x86_64 / | tee /etc/apt/sources.list.d/cuda.list # apt-get update 命令之前添加以上命令- 修改hh-suite仓库代码路径
#RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \ RUN git clone --branch v3.3.0 https://github.91chi.fun/https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \- pip 下载太慢
# 将这几个命令分开,成功的步骤下次build时可以使用cache # tensorflow 太大,注释 requirements.txt 中对 tensorflow 的依赖 # tensorflow-cpu==2.5.0 改为 tensorflow==2.5.0,使同时支持cpu和gpu RUN pip3 install --upgrade pip RUN pip3 install -r /app/alphafold/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simplie RUN pip3 install tensorflow==2.5.0 -i https://pypi.tuna.tsinghua.edu.cn/simplie RUN pip3 install --upgrade jax==0.2.14 jaxlib==0.1.69+cuda${CUDA/./} -f \ https://storage.googleapis.com/jax-releases/jax_releases.html其它准备
- 确定输出目录是否存在
# docker/run_docker.py flags.DEFINE_string( 'output_dir', '/tmp/alphafold', 'Path to a directory that will store the results.') # 需要修改路径,修改这里 # 'output_dir', '/ceph_disk2/MetaDatabase/AlphaFold_DB/tmp', mkdir -p /ceph_disk2/MetaDatabase/AlphaFold_DB/tmp cd /ceph_disk2/MetaDatabase/AlphaFold_DB chmod 770 tmp- 修改镜像名
# docker/run_docker.py flags.DEFINE_string( 'docker_image_name', 'alphafold', 'Name of the AlphaFold Docker image.') # 修改为 'docker_image_name', 'anneng01:8090/library/alphafold'- 准备python环境
conda create -n af2 -y python=3.7 conda activate af2 pip3 install -r docker/requirements.txt运行验证
1. 准备数据
CASP14 target list
找到T1050,复制该序列到文件 T1050.fasta2. 运行
python3 docker/run_docker.py \ --fasta_paths=data/T1050.fasta \ --max_template_date=2020-05-14 \ --data_dir=/ceph_disk2/MetaDatabase/AlphaFold_DB/genetic_databases3. 监控运行状态,查看运行结果
watch -n 1 nvidia-smi https://www.ncbi.nlm.nih.gov/Structure/icn3d/full.html File>Open File>PDB File(appendable)> 选择文件> Append -
pdb_mmcif 下载特别慢
# 参考网站 https://www.wwpdb.org/ftp/pdb-ftp-sites # 找到合适的链接下载,比如: rsync -rlpt -v -z --delete \ rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ \ ./mmCIF # 脚本中给出的信息: #echo " * rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ (Europe)" #echo " * ftp.pdbj.org::ftp_data/structures/divided/mmCIF/ (Asia)" #echo "or see https://www.wwpdb.org/ftp/pdb-ftp-sites for more download options." #rsync --recursive --links --perms --times --compress --info=progress2 --delete --port=33444 \ # rsync.rcsb.org::ftp_data/structures/divided/mmCIF/ \ # "${RAW_DIR}"