-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_xml.py
102 lines (79 loc) · 3.31 KB
/
parse_xml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import pandas as pd
import xml.etree.ElementTree as ET
#Find node and convert content to text
def getElement(Node,ToFind):
elm = Node.find(ToFind)
if elm is not None:
elm = elm.text
else:
elm = ''
return elm
#Flattend xml in table
def parseXml(xml_to_process):
data = []
if xml_to_process is not None:
for row in xml_to_process:
file_name = row[0]
xml_content = row[1]
file_updated_at = row[2]
print(file_name)
# Load XML content from string
root = ET.fromstring(xml_content)
# File elements
sample_code = getElement(root,'SampleCode')
sample_description = getElement(root,'SampleDescription')
recipient_lab_code = getElement(root,'RecipientLabCode')
reception_date = getElement(root,'ReceptionDate')
# Navigate in XML file and extract
for fraction in root.iter('Fraction'):
for test in fraction.iter('Test'):
test_ref = getElement(test,'TestReference')
test_code = getElement(test,'TestCode')
for parameter in test.iter('Parameter'):
parameter_code = getElement(parameter,'ParameterCode')
for result in parameter.iter('Result'):
result_value = getElement(result,'ResultValue')
result_unit = getElement(result,'ResultUnit')
# Append data to a list
data.append([
file_name,
sample_code,
sample_description,
recipient_lab_code,
reception_date,
test_ref,
test_code,
parameter_code,
result_value,
result_unit,
file_updated_at
])
# Display extracted values
print("FileName:", file_name)
print("FileUpdatedAt:", file_updated_at)
print("SampleCode:", sample_code)
print("SampleDescription:", sample_description)
print("RecipientLabCode", recipient_lab_code)
print("ReceptionDate:", reception_date)
print("TestReference:", test_ref)
print("TestCode:", test_code)
print("ParameterCode:", parameter_code)
print("ResultValue:", result_value)
print("ResultUnit:", result_unit)
print("---")
# Create dataframe
result_table = pd.DataFrame(data)
result_table.columns = [
"FileName",
"SampleCode",
"SampleDescription",
"RecipientLabCode",
"ReceptionDate",
"TestReference",
"TestCode",
"ParameterCode",
"ResultValue",
"ResultUnit",
"FileUpdatedAt"
]
return result_table