From a0e7591a1e81dcf0c8484f046be236bcb50c3440 Mon Sep 17 00:00:00 2001 From: Mahmood Shafeie Zargar Date: Sat, 20 Feb 2016 15:46:23 -0500 Subject: [PATCH 1/2] Preliminary XML support --- tablib/core.py | 5 ++ tablib/formats/__init__.py | 3 +- tablib/formats/_xml.py | 110 +++++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 tablib/formats/_xml.py diff --git a/tablib/core.py b/tablib/core.py index 3db24f39..ca30198c 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -467,6 +467,11 @@ def export(self, format, **kwargs): # Formats # ------- + @property + def xml(): + + pass + @property def xls(): """A Legacy Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`separators`. Cannot be set. diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index 5cca19f7..f201bcab 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -13,5 +13,6 @@ from . import _ods as ods from . import _dbf as dbf from . import _latex as latex +from . import _xml as xml -available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods) +available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods, xml) diff --git a/tablib/formats/_xml.py b/tablib/formats/_xml.py new file mode 100644 index 00000000..82a022aa --- /dev/null +++ b/tablib/formats/_xml.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- + +""" Tablib - XML Support +""" + +from __future__ import unicode_literals + +import sys +import tablib + +from collections import * +from xml.dom.minidom import * +import xml.etree.ElementTree as ElementTree +import datetime + +title = 'xml' +extensions = ('xml',) + + +def export_set(dataset): + """Returns XML representation of Dataset.""" + return to_xml(dataset.dict) + +def import_set(dset, in_stream): + """Returns dataset from XML stream.""" + + dset.wipe() + dset.dict = to_dict(in_stream) + +def create_content(content, root, tag): + if isinstance(content, Mapping): + if tag is None: + node = root + else: + node = root.ownerDocument.createElement(tag) + root.appendChild(node) + for key, value in content.items(): + create_content(value, node, key) + else: + if isinstance(content, Set) or isinstance(content, list): + for item in content: + create_content(item, root, tag) + else: + node = root.ownerDocument.createElement(tag) + if isinstance(content, datetime.datetime): + sub_node = root.ownerDocument.createTextNode(content.isoformat()) + else: + sub_node = root.ownerDocument.createTextNode(str(content)) + node.appendChild(sub_node) + root.appendChild(node) + + +def to_xml(content, root_tag='root', row_tag='row'): + doc = Document() + root_node = doc.createElement(root_tag) + doc.appendChild(root_node) + create_content(content, root_node, row_tag) + return doc.toprettyxml() + + +def xml_tree_walk(root): + dictionary = {} + tag = root.tag + if root.text: + if root.text.strip() == '': + dictionary[tag] = {} + else: + dictionary[tag] = root.text + children = list(root) + if children: + subdictionary = {} + for child in children: + for k,v in xml_tree_walk(child).items(): + if k in subdictionary: + if isinstance(subdictionary[k], list): + subdictionary[k].append(v) + else: + subdictionary[k] = [subdictionary[k], v] + else: + subdictionary[k] = v + if dictionary.get(tag): + dictionary[tag] = [dictionary[tag], subdictionary] + else: + dictionary[tag] = subdictionary + if root.attrib: + attribs = {} + for k,v in root.attrib.items(): + attribs[k] = v + if dictionary.get(tag): + dictionary[tag] = [dictionary[tag], attribs] + else: + dictionary[tag] = attribs + return dictionary + + +def to_dict(xml_file): + tree = ElementTree.parse(xml_file) + root = tree.getroot() + result = xml_tree_walk(root) + result = result[list(result)[0]] + result = result[list(result)[0]] + return result + +def detect(stream): + """Returns True if given stream is valid XML.""" + try: + parse(stream) + return True + except (xml.parsers.expat.ExpatError, TypeError): + return False From 289235c7f38a41d49e1b9a3e75e030c1b788db3d Mon Sep 17 00:00:00 2001 From: Mahmood Shafeie Zargar Date: Sun, 21 Feb 2016 02:28:31 -0500 Subject: [PATCH 2/2] Fixed crash on variable field sets --- tablib/formats/_xml.py | 48 +++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/tablib/formats/_xml.py b/tablib/formats/_xml.py index 82a022aa..fadb4441 100644 --- a/tablib/formats/_xml.py +++ b/tablib/formats/_xml.py @@ -21,11 +21,23 @@ def export_set(dataset): """Returns XML representation of Dataset.""" return to_xml(dataset.dict) + def import_set(dset, in_stream): """Returns dataset from XML stream.""" - dset.wipe() - dset.dict = to_dict(in_stream) + dict = to_dict(in_stream) + dict = harmonize(dict) + dset.dict = dict + + +def detect(stream): + """Returns True if given stream is valid XML.""" + try: + parse(stream) + return True + except (xml.parsers.expat.ExpatError, TypeError): + return False + def create_content(content, root, tag): if isinstance(content, Mapping): @@ -93,18 +105,30 @@ def xml_tree_walk(root): return dictionary +def get_keys(structure): + key_list = [list(item) for item in structure] + key_list = [item for sublist in key_list for item in sublist] + return set(key_list) + + +def harmonize(structure): + structure = structure[list(structure)[0]] + structure = structure[list(structure)[0]] + keys = get_keys(structure) + structure = [add_keys(record, keys) for record in structure] + return structure + + +def add_keys(dict, keys): + diff = keys - set(list(dict)) + if diff: + for key in diff: + dict[key] = None + return dict + + def to_dict(xml_file): tree = ElementTree.parse(xml_file) root = tree.getroot() result = xml_tree_walk(root) - result = result[list(result)[0]] - result = result[list(result)[0]] return result - -def detect(stream): - """Returns True if given stream is valid XML.""" - try: - parse(stream) - return True - except (xml.parsers.expat.ExpatError, TypeError): - return False