%matplotlib inline
import matplotlib.pyplot as plt
def compare_pam120(string1,string2,fulloutput=True):
import numpy as np
import matplotlib.pyplot as plt
aa_index = {'C': 0, 'S': 1, 'T': 2, 'P': 3, 'A': 4, 'G': 5, 'N': 6, 'D': 7
, 'E': 8, 'Q': 9, 'H': 10, 'R': 11, 'K': 12, 'M': 13, 'I': 14, 'L': 15
, 'V': 16, 'F': 17, 'Y': 18, 'W': 19}
PAM120 = np.array([ [9]
,[-1, 3]
,[-3, 2, 4]
,[-3, 1,-1, 6]
,[-3, 1, 1, 1, 3]
,[-5, 1,-1,-2, 1, 5]
,[-5, 1, 0,-2, 0, 0, 4]
,[-7, 0,-1,-2, 0, 0, 2, 5]
,[-7,-1,-2,-1, 0,-1, 1, 3, 5]
,[-7,-2,-2, 0,-1,-3, 0, 1, 2, 6]
,[-4,-2,-3,-1,-3,-4, 2, 0,-1, 3, 7]
,[-4,-1,-2,-1,-3,-4,-1,-3,-3, 1, 1, 6]
,[-7,-1,-1,-2,-2,-3, 1,-1,-1, 0,-2, 2, 5]
,[-6,-2,-1,-3,-2,-4,-3,-4,-4,-1,-4,-1, 0, 8]
,[-3,-2, 0,-3,-1,-4,-2,-3,-3,-3,-4,-2,-2, 1, 6]
,[-7,-4,-3,-3,-3,-5,-4,-5,-4,-2,-3,-4,-4, 3, 1, 5]
,[-2,-2, 0,-2, 0,-2,-3,-3,-3,-3,-3,-3,-4, 1, 3, 1, 5]
,[-6,-3,-4,-5,-4,-5,-4,-7,-6,-6,-2,-4,-6,-1, 0, 0,-3, 8]
,[-1,-3,-3,-6,-4,-6,-2,-5,-4,-5,-1,-6,-6,-4,-2,-3,-3, 4, 8]
,[-8,-2,-6,-7,-7,-8,-5,-8,-8,-6,-5, 1,-5,-7,-7,-5,-8,-1,-1,12]])
score = 0
visual = np.zeros((1,len(string1)))
for i in range(0,len(string1)):
previous_score = score
if aa_index[string1[i]] > aa_index[string2[i]]:
score += int(PAM120[int(aa_index[string1[i]])][int(aa_index[string2[i]])])
else:
score += int(PAM120[int(aa_index[string2[i]])][int(aa_index[string1[i]])])
if fulloutput == True:
print score-previous_score, score, aa_index[string1[i]], aa_index[string2[i]], i, string1[i], string1[i]
visual[0,i] = score-previous_score
print "final score: ", score
plt.imshow(visual,cmap="seismic_r",vmin=int(np.min(np.min(PAM120))), vmax=int(np.max(np.max(PAM120))),interpolation='none')
return score
example1 = 'CCSSCCTPAGNDEHRVLYWDEQH'
perfect_match = compare_pam120(example1, example1)
example2 = 'CCSSCCTPAGADEHAVLYADEAH'
a_few_substitutions = compare_pam120(example1, example2)
#heavy chain, constant region, Fc, Igg1, human
#https://www.ncbi.nlm.nih.gov/protein/AEV43323.1
# versus
human_heavychain_fragment = 'PSVFLFPPKPKDTLMISRTPEVTCVVVDVSHEDPEVKFNWYVDGVEVHNAKTKPREEQYNSTYRVVSVLTVLHQDWLNGKEYKCKVSNKALPAPIEKTISKAKGQPREPQVYTLPPSREEMTKNQVSLTCLVKGFYPSDIAVEWESNGQPENNYKTTPPVLDSDGSFFLYSKLTVDKSRWQQGNVFSCSVMHEALHNHYTQKSLSLSPG'
mouse_heavychain_fragment = 'SSVFIFPPKPKDVLTITLTPKVTCVVVDISKDDPEVQFSWFVDDVEVHTAQTQPREEQFNSTFRSVSELPIMHQDWLNGKEFKCRVNSAAFPAPIEKTISKTKGRPKAPQVYTIPPPKEQMAKDKVSLTCMITDFFPEDITVEWQWNGQPAENYKNTQPIMDTDGSYFVYSKLNVQKSNWEAGNTFTCSVLHEGLHNHHTEKSLSHSPG'
plt.rcParams['figure.figsize'] = 100,1
human_versus_mouse = compare_pam120(human_heavychain_fragment,mouse_heavychain_fragment, fulloutput=False)
human_heavychain_fragment = 'PSVFLFPPKPKDTLMISRTPEVTCVVVDVSHEDPEVKFNWYVDGVEVHNAKTKPREEQYNSTYRVVSVLTVLHQDWLNGKEYKCKVSNKALPAPIEKTISKAKGQPREPQVYTLPPSREEMTKNQVSLTCLVKGFYPSDIAVEWESNGQPENNYKTTPPVLDSDGSFFLYSKLTVDKSRWQQGNVFSCSVMHEALHNHYTQKSLSLSPG'
macac_heavychain_fragment = 'PSVFLFPPKPKDTLMISRTPEVTCVVVDVSQEDPDVKFNWYVNGAEVHHAQTKPRETQYNSTYRVVSVLTVTHQDWLNGKEYTCKVSNKALPAPIQKTISKDKGQPREPQVYTLPPSREELTKNQVSLTCLVKGFYPSDIVVEWESSGQPENTYKTTPPVLDSDGSYFLYSKLTVDKSRWQQGNVFSCSVMHEALHNHYTQKSLSVSPG'
human_versus_monkey = compare_pam120(human_heavychain_fragment,macac_heavychain_fragment,fulloutput=False)
def compare_BLOSUM62(string1,string2,fulloutput=True):
import numpy as np
import matplotlib.pyplot as plt
aa_index = {'C': 0, 'S': 1, 'T': 2, 'P': 3, 'A': 4, 'G': 5, 'N': 6, 'D': 7
, 'E': 8, 'Q': 9, 'H': 10, 'R': 11, 'K': 12, 'M': 13, 'I': 14, 'L': 15
, 'V': 16, 'F': 17, 'Y': 18, 'W': 19}
PAM120 = np.array([ [9]
,[-1, 4]
,[-1, 1, 5]
,[-3,-1,-1, 7]
,[ 0, 1, 0,-1, 4]
,[-3, 0,-2,-2, 0, 6]
,[-3, 1, 0,-2,-2, 0, 6]
,[-3, 0,-1,-1,-2,-1, 1, 6]
,[-4, 0,-1,-1,-1,-2, 0, 2, 5]
,[-3, 0,-1,-1,-1,-2, 0, 0, 2, 5]
,[-3,-1,-2,-2,-2,-2, 1,-1, 0, 0, 8]
,[-3,-1,-1,-2,-1,-2, 0,-2, 0, 1, 0, 5]
,[-3, 0,-1,-1,-1,-2, 0,-1, 1, 1,-1, 2, 5]
,[-1,-1,-1,-2,-1,-3,-2,-3,-2, 0,-2,-1,-1, 5]
,[-1,-2,-1,-3,-1,-4,-3,-3,-3,-3,-3,-3,-3, 1, 4]
,[-1,-2,-1,-3,-1,-4,-3,-4,-3,-2,-3,-2,-2, 2, 2, 4]
,[-1,-2, 0,-2, 0,-3,-3,-3,-2,-2,-3, 3, 2, 1, 3, 1, 4]
,[-2,-2,-2,-4,-2,-3,-3,-3,-3,-3,-1,-3,-3, 0, 0, 0,-1, 6]
,[-2,-2,-2,-3,-2,-3,-2,-3,-2,-1, 2,-2,-2,-1,-1,-1,-1, 3, 7]
,[-2,-3,-2,-4,-3,-2,-4,-4,-3,-2,-2,-3,-3,-1,-3,-2,-3, 1, 2,11]])
score = 0
visual = np.zeros((1,len(string1)))
for i in range(0,len(string1)):
previous_score = score
if aa_index[string1[i]] > aa_index[string2[i]]:
score += int(PAM120[int(aa_index[string1[i]])][int(aa_index[string2[i]])])
else:
score += int(PAM120[int(aa_index[string2[i]])][int(aa_index[string1[i]])])
if fulloutput == True:
print score-previous_score, score, aa_index[string1[i]], aa_index[string2[i]], i, j, string1[i], string1[i]
visual[0,i] = score-previous_score
print "final score: ", score
plt.imshow(visual,cmap="seismic_r",vmin=int(np.min(np.min(PAM120))), vmax=int(np.max(np.max(PAM120))),interpolation='none')
return score
human_heavychain_fragment = 'PSVFLFPPKPKDTLMISRTPEVTCVVVDVSHEDPEVKFNWYVDGVEVHNAKTKPREEQYNSTYRVVSVLTVLHQDWLNGKEYKCKVSNKALPAPIEKTISKAKGQPREPQVYTLPPSREEMTKNQVSLTCLVKGFYPSDIAVEWESNGQPENNYKTTPPVLDSDGSFFLYSKLTVDKSRWQQGNVFSCSVMHEALHNHYTQKSLSLSPG'
macac_heavychain_fragment = 'PSVFLFPPKPKDTLMISRTPEVTCVVVDVSQEDPDVKFNWYVNGAEVHHAQTKPRETQYNSTYRVVSVLTVTHQDWLNGKEYTCKVSNKALPAPIQKTISKDKGQPREPQVYTLPPSREELTKNQVSLTCLVKGFYPSDIVVEWESSGQPENTYKTTPPVLDSDGSYFLYSKLTVDKSRWQQGNVFSCSVMHEALHNHYTQKSLSVSPG'
human_versus_monkey = compare_BLOSUM62(human_heavychain_fragment,macac_heavychain_fragment,fulloutput=False)
def score_DNA(string1,string2,fulloutput=True):
import numpy as np
import matplotlib.pyplot as plt
aa_index = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
DNA_score_matrix = np.array([ [67]
,[-96, 100]
,[-20, -79, 100]
,[-117,-20,-96, 67]])
score = 0
visual = np.zeros((1,len(string1)))
for i in range(0,len(string1)):
previous_score = score
if aa_index[string1[i]] > aa_index[string2[i]]:
score += int(DNA_score_matrix[int(aa_index[string1[i]])][int(aa_index[string2[i]])])
else:
score += int(DNA_score_matrix[int(aa_index[string2[i]])][int(aa_index[string1[i]])])
if fulloutput == True:
print score-previous_score, score, aa_index[string1[i]], aa_index[string2[i]], i, j, string1[i], string1[i]
visual[0,i] = score-previous_score
print "final score: ", score
plt.imshow(visual,cmap="seismic_r",vmin=int(np.min(np.min(DNA_score_matrix))), vmax=int(np.max(np.max(DNA_score_matrix))),interpolation='none')
return score
dnaseq1 = 'ATATATCGTATATAGCTATG'
dnaseq2 = 'ATATAGCGTATATCGATCTAG'
human_versus_monkey = score_DNA(dnaseq1,dnaseq2,fulloutput=False)