GitHub - MrOlm/inStrain: Bioinformatics program inStrain
安装
conda create -n instrain
conda activate instrain
pip install instrain
inStrain -h
需要准备一个 scaffold-to-bin file
(.text 文件,包含由制表符分隔的两列,其中第一列是脚手架的名称,第二列是脚手架所属的 bin/基因组的名称。)
https://github.com/MrOlm/drep/blob/master/helper_scripts/parse_stb.py
可使用这个脚本提取(scaffold_to_bin.tsv是需要的)
profile部分
#! /usr/bin/env python
#########################################################
# split Vir contig for each ">"
# written by PeiZhong in IFR of CAAS
import argparse
from Bio import SeqIO
import os
parser = argparse.ArgumentParser(description='split Vir contig for each ">" ')
parser.add_argument('--input_fa', required=True, help='< input fasta >')
args = parser.parse_args()
input_fa = args.input_fa
def split_fasta(fasta_file):
folder_name = os.path.splitext(fasta_file)[0]
if not os.path.exists(folder_name):
os.makedirs(folder_name)
fasta_sequences = SeqIO.parse(open(fasta_file), 'fasta')
for fasta in fasta_sequences:
name, sequence = fasta.id, str(fasta.seq)
new_file = os.path.join(folder_name, name + ".fasta")
with open(new_file, "w") as output_file:
output_file.write(">" + name + "\n" + sequence + "\n")
split_fasta(input_fa)
instrain=$(pwd)
fp_path="/home/zhongpei/hard_disk_sda2/zhongpei/Virome/rawdata/upload_20230812/zhongpei_analyse/fastp"
source /home/zhongpei/miniconda3/bin/activate /home/zhongpei/hard_disk_sda2/zhongpei/LBSWrap/LBSWrap-inStrain
for i in all_virome_95.fa
do
num=${i%%.fa}
/home/zhongpei/hard_disk_sda2/zhongpei/Software/my_script/fasta_split_bin.py --input_fa ${i}
/home/zhongpei/hard_disk_sda2/zhongpei/Software/my_script/parse_stb.py --reverse -f ${num}/* -o ${num}.stb
bowtie2-build ${i} ${i}
cd ${fp_path}
for x in *_clean_2.fastq
do
cd ${fp_path}
numx=${x%%_clean_2.fastq}
bowtie2 -p 180 -x ${instrain}/${i} -1 ${numx}_clean_1.fastq -2 ${x} > ${instrain}/${numx}_instrain.sam
cd ${instrain}
samtools view -Sb -@180 ${numx}_instrain.sam > ${numx}_instrain.bam
samtools sort -@180 ${numx}_instrain.bam -o ${numx}_instrain_sort.bam
samtools index -@ 180 ${numx}_instrain_sort.bam
rm ${numx}_instrain.sam
rm ${numx}_instrain.bam
inStrain profile ${numx}_instrain_sort.bam ${i} -o ${numx}.IS -p 180 -s ${num}.stb
done
rm *.bt2
done
conda deactivate
conda deactivate
compare部分
inStrain compare \
-i /path/to/output/Mom1_phage_instrain /path/to/output/W1_phage_instrain \
-o /path/to/output/Mom1_W1_phage_vertical \
-p 4 \
--breadth 0.75 \
-ani 0.99 \
-cov 0.75