Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preliminary XML support #224

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions tablib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,11 @@ def export(self, format, **kwargs):
# Formats
# -------

@property
def xml():

pass

@property
def xls():
"""A Legacy Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`separators`. Cannot be set.
Expand Down
3 changes: 2 additions & 1 deletion tablib/formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@
from . import _ods as ods
from . import _dbf as dbf
from . import _latex as latex
from . import _xml as xml

available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods)
available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods, xml)
134 changes: 134 additions & 0 deletions tablib/formats/_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# -*- coding: utf-8 -*-

""" Tablib - XML Support
"""

from __future__ import unicode_literals

import sys
import tablib

from collections import *
from xml.dom.minidom import *
import xml.etree.ElementTree as ElementTree
import datetime

title = 'xml'
extensions = ('xml',)


def export_set(dataset):
"""Returns XML representation of Dataset."""
return to_xml(dataset.dict)


def import_set(dset, in_stream):
"""Returns dataset from XML stream."""
dset.wipe()
dict = to_dict(in_stream)
dict = harmonize(dict)
dset.dict = dict


def detect(stream):
"""Returns True if given stream is valid XML."""
try:
parse(stream)
return True
except (xml.parsers.expat.ExpatError, TypeError):
return False


def create_content(content, root, tag):
if isinstance(content, Mapping):
if tag is None:
node = root
else:
node = root.ownerDocument.createElement(tag)
root.appendChild(node)
for key, value in content.items():
create_content(value, node, key)
else:
if isinstance(content, Set) or isinstance(content, list):
for item in content:
create_content(item, root, tag)
else:
node = root.ownerDocument.createElement(tag)
if isinstance(content, datetime.datetime):
sub_node = root.ownerDocument.createTextNode(content.isoformat())
else:
sub_node = root.ownerDocument.createTextNode(str(content))
node.appendChild(sub_node)
root.appendChild(node)


def to_xml(content, root_tag='root', row_tag='row'):
doc = Document()
root_node = doc.createElement(root_tag)
doc.appendChild(root_node)
create_content(content, root_node, row_tag)
return doc.toprettyxml()


def xml_tree_walk(root):
dictionary = {}
tag = root.tag
if root.text:
if root.text.strip() == '':
dictionary[tag] = {}
else:
dictionary[tag] = root.text
children = list(root)
if children:
subdictionary = {}
for child in children:
for k,v in xml_tree_walk(child).items():
if k in subdictionary:
if isinstance(subdictionary[k], list):
subdictionary[k].append(v)
else:
subdictionary[k] = [subdictionary[k], v]
else:
subdictionary[k] = v
if dictionary.get(tag):
dictionary[tag] = [dictionary[tag], subdictionary]
else:
dictionary[tag] = subdictionary
if root.attrib:
attribs = {}
for k,v in root.attrib.items():
attribs[k] = v
if dictionary.get(tag):
dictionary[tag] = [dictionary[tag], attribs]
else:
dictionary[tag] = attribs
return dictionary


def get_keys(structure):
key_list = [list(item) for item in structure]
key_list = [item for sublist in key_list for item in sublist]
return set(key_list)


def harmonize(structure):
structure = structure[list(structure)[0]]
structure = structure[list(structure)[0]]
keys = get_keys(structure)
structure = [add_keys(record, keys) for record in structure]
return structure


def add_keys(dict, keys):
diff = keys - set(list(dict))
if diff:
for key in diff:
dict[key] = None
return dict


def to_dict(xml_file):
tree = ElementTree.parse(xml_file)
root = tree.getroot()
result = xml_tree_walk(root)
return result