comapring 2 sequences of DNA ouput the silent and non mutations

dishaacharya96 at gmail.com dishaacharya96 at gmail.com
Sat Oct 29 15:38:20 EDT 2016


Code: 

A = 0
B= 0
i=0
j=0
# opening the files
infile1 = open("CDSsrebf1.txt")
infile2 = open("PROsrebf1.txt")
infile3 = open("mutant.txt")
print(" 1st line of WT SREBF1 (CDS):",infile1.readline())
print ("1st line of mutant protein of SREBF1: ", infile3.readline())
print ("1st line of protein of SREBF1: ",infile2.readline())
# -----------------------------------------------------
# reading the nucleotide sequence for WT SREBF1
seq1 = infile1.read()
seq1 = seq1.replace('\n', '')
len1 = len(seq1) 
# --------------------------------------------
# reading the mutant file
mutant = infile3.read()
mutant = mutant.replace('\n', '')
#---------------------------------------
# reading the protein file
# which is used to check our codon dictionary
wtPRO = infile2.read()
wtPRO = wtPRO.replace('\n', '') 
#---------------------------------------------------------
# setting up the dictionary
letters = ('G', 'A', 'C', 'T') 
codes = []
for a in letters :
    for b in letters :
        for c in letters :
            codes.append(a + b + c)
aa = 'ggggeeddaaaavvvvrrsskknnttttmiiirrrrqqhhppppllllwxccxxyyssssllff'
aa = aa.upper()
codons = {}
for i in range(64) :
    codons[codes[i]] = aa[i]
#------------------------------------------------------------------
# making the protein from the WT SREBF1, which is seq1
protein = ''
for i in range(0, len(seq1), 3) :
    codon = seq1[i:i+3]
    aminoacid = codons[codon]
    protein += aminoacid
# -----------------------------------------------------------
# making the protein from the mutant SREBF1, which is mutant
mutantPRO = ''
for i in range(0, len(mutant), 3) :
    codon = mutant[i:i+3]
    aminoacid = codons[codon]
    mutantPRO += aminoacid
# ----------------------------------------------------------
# quick check if WT and mutant are the same for the protein
if protein == mutantPRO :
    print ('The protein sequences are the same.')
else :
    print ('The protein sequences are different.')
# --------------------------------------------------------
# Printing the differences in the format XiY
# which means WT amino acid X at position i changed to mutant amino acid Y
print ('-------------------------')
print ('The mutations are:')

for i in range (len(protein) & len(seq1)) :

        if protein[i] != mutantPRO[i] :
           print (protein[i] + str(i) + mutantPRO[i])
           A+= 1
        else:
                if seq1[i:i+3] != mutant[i:i+3]:
                         print(protein[i] + str(i) + mutantPRO[i] +' Silent mutation ')
                         print(seq1[i:i+3] + mutant[i:i+3])
                         B+= 1        


print("Number of non-silent mutations are: ",A)
print("Number of silent mutations are: " , B)


output 

should be The mutations are:
M0I
D1D silent mutation C5T
V291L


I dont know what to print the C5T part 

Thank you for helping me! 



More information about the Python-list mailing list