-
Notifications
You must be signed in to change notification settings - Fork 0
/
patternToNumber.py
113 lines (95 loc) · 2.32 KB
/
patternToNumber.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 26 22:54:48 2018
@author: bjwil
"""
import pdb
def symbolToNumber(symbol):
if symbol == "A":
number = 0
elif symbol == "C":
number = 1
elif symbol == "G":
number = 2
elif symbol == "T":
number = 3
return number
def patternToNumber(Pattern):
#pdb.set_trace()
patternList = list(Pattern)
if not patternList:
return 0
symbol = patternList[-1]
prefix = patternList[:-1]
#print symbol, prefix
return 4 * patternToNumber(prefix) + symbolToNumber(symbol)
def readData(filename):
with open(filename, 'r') as f:
#f.readline() # Skip input line
Pattern = f.readline()
return Pattern.strip()
if __name__ == "__main__":
Pattern = readData('dataset_3010_2.txt')
result = patternToNumber(Pattern)
print(result)
g = ['T']
patternToNumber(g)
def mismatch(text1, text2):
count = 0
if len(text1) != len(text2):
print('Lengths are different.')
sys.exit()
for i in range(len(text1)):
if text1[i] != text2[i]:
count += 1
return count
type({'A','C','G','T'})
numberToPattern(1,2)
k = 2
t = 1
hammerAA = 'AA'
hammerAT = 'AT'
mismatch(hammerAA, hammerAT)
import sys
chars = "ACGT"
for i in chars:
print(i + suffix)
def Neighbors(hammer,t):
if t == 0:
return hammer
if len(hammer) == 1:
return {'A','C','G','T'}
array = []
suffix = hammer[-(len(hammer)-1):]
prefix = hammer[0:1]
SuffixNeighbors = Neighbors(suffix,t)
for text in SuffixNeighbors:
if mismatch(suffix, text) < t:
for i in chars:
array.append(i + text)
else:
array.append(prefix + text)
return array
prefix = text[0:1]
prefix + 'AT'
suffix = text[-(len(text)-1):]
text = 'CAA'
text[-(len(text)-1):]
text[-2:]
print("\n".join(Neighbors('ACCACTGA', 2)))
patternToNumber('AC')
kmerArray = []
for i in range(0,4**k):
kmerArray.append((numberToPattern(i,k)))
kmerArray
def numberToPattern(index,k):
if k == 1:
return numberToSymbol(index)
prefixIndex = index//4
r = index % 4
if index == 0:
symbol = 'A'
else:
symbol = numberToSymbol(r)
prefixPattern = numberToPattern(prefixIndex,k-1)
return prefixPattern + symbol