-
Notifications
You must be signed in to change notification settings - Fork 2
/
aggregate.py
69 lines (58 loc) · 2.09 KB
/
aggregate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python3
import csv
import sys
table_name = sys.argv[1]
grp = int(sys.argv[2])
def is_number(s):
try:
float(s)
return True
except ValueError:
pass
try:
import unicodedata
unicodedata.numeric(s)
return True
except (TypeError, ValueError):
pass
return False
def agg_table(table:str, grp:int):
content = []
with open(table, 'r') as i_f:
cols = i_f.readline().strip().split(" ")
rows = 0
i_f.seek(0)
reader = csv.DictReader(i_f, delimiter=' ')
for row in reader:
rows += 1
content.append(row)
rows_per_grp = rows // grp
for row in range(rows):
line = content[row]
if row >= rows_per_grp:
for col in cols:
if is_number(line[col]):
if str.isdigit(str(line[col])):
content[row % rows_per_grp][col] = int(content[row % rows_per_grp][col])
content[row % rows_per_grp][col] += int(line[col])
else:
content[row % rows_per_grp][col] = float(content[row % rows_per_grp][col])
content[row % rows_per_grp][col] += float(line[col])
for row in range(rows_per_grp):
for col in cols:
if is_number(content[row][col]):
if str.isdigit(str(content[row][col])):
content[row][col] = int(content[row][col])
content[row][col] = content[row][col] // grp
else:
content[row][col] = float(content[row][col])
content[row][col] /= grp
with open(table + "_agg", 'w') as o_f:
writer = csv.writer(o_f, delimiter=' ')
writer.writerow(cols)
for row in range(rows_per_grp):
line = []
for col in cols:
line.append(content[row][col])
writer.writerow(line)
agg_table(table_name, grp)