原始文件:
目标文件:
linux版本
#name:lin_convert_fasta_to_01.py
#! /usr/bin/env python
#usage: python hash-always.py -l 1.list -f 2.txt > out.txt
import argparse
parser = argparse.ArgumentParser(description="Advanced screening always by hash")
parser.add_argument("-f1","--file1",help="the original file,tabulated,make sure do not contain blank line")
args = parser.parse_args()
n = 0
newlist = []#创建一个列表
# b=open("out1.sequence.txt","w")
with open(args.file1,"r") as fn1:
for i in fn1:
eachline = i.strip()
n = n + 1
if eachline.startswith(">"):#把以">"开头的,打印出来,意思是把fasta序列的表头打印出来
print(eachline)
# b.write(eachline+"\n")
else:
if n ==2:
# print(eachline)
for i in eachline:
i = i.strip("\n").split()
i = "".join(i)
if i == "A":
newlist.append("1")
elif i == "T":
newlist.append("1")
elif i == "C":
newlist.append("1")
elif i == "G":
newlist.append("1")
elif i == "X":
newlist.append("1")
else:
newlist.append("0")
# print("".join(newlist))
# newlist = ["1" for i in eachline]
print("\t".join(newlist))
# b.write("".join(newlist)+"\n")
dz = eachline
else:
newlist = []
for i,j in zip(eachline,dz):
if i==j and i!="-":
newlist.append("1")
else:
newlist.append("0")
print("\t".join(newlist))