Skip to content

Commit

Permalink
Fixes #567 - Implement ods import
Browse files Browse the repository at this point in the history
  • Loading branch information
claudep committed Oct 28, 2023
1 parent dc23c4c commit ce04281
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 8 deletions.
2 changes: 2 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
### Improvements

- The html format now supports importing from HTML content (#243)
- The ODS format now supports importing from .ods files (#567). The support is
still a bit experimental.

### Changes

Expand Down
11 changes: 9 additions & 2 deletions docs/formats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,19 @@ If a title has been set, it will be exported as the table caption.
ods
===

Export data in OpenDocument Spreadsheet format. The ``ods`` format is currently
export-only.
Import/export data in OpenDocument Spreadsheet format.

.. versionadded:: 3.6.0

Import functionality was added.

This format is optional, install Tablib with ``pip install "tablib[ods]"`` to
make the format available.

The ``import_set()`` method also supports a ``skip_lines`` parameter that you
can set to a number of lines that should be skipped before starting to read
data.

.. admonition:: Binary Warning

:class:`Dataset.ods` contains binary data, so make sure to write in binary mode::
Expand Down
98 changes: 98 additions & 0 deletions src/tablib/formats/_ods.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
"""

import numbers
from datetime import date, datetime, time
from io import BytesIO

from odf import opendocument, style, table, text

import tablib

bold = style.Style(name="bold", family="paragraph")
bold.addElement(style.TextProperties(
fontweight="bold",
Expand Down Expand Up @@ -49,6 +52,93 @@ def export_book(cls, databook):
wb.save(stream)
return stream.getvalue()

@classmethod
def import_sheet(cls, dset, sheet, headers=True, skip_lines=0):
"""Populate dataset `dset` with sheet data."""

dset.title = sheet.getAttribute('name')

def is_real_cell(cell):
return cell.hasChildNodes() or not cell.getAttribute('numbercolumnsrepeated')

for i, row in enumerate(sheet.childNodes):
if row.tagName != 'table:table-row':
continue
if i < skip_lines:
continue
row_vals = [cls.read_cell(cell) for cell in row.childNodes if is_real_cell(cell)]
if not row_vals:
continue
if i == skip_lines and headers:
dset.headers = row_vals
else:
if i > skip_lines and len(row_vals) < dset.width:
row_vals += [''] * (dset.width - len(row_vals))
dset.append(row_vals)

@classmethod
def read_cell(cls, cell, value_type=None):
def convert_date(val):
if 'T' in val:
return datetime.strptime(val, "%Y-%m-%dT%H:%M:%S")
else:
return datetime.strptime(val, "%Y-%m-%d").date()

if value_type is None:
value_type = cell.getAttribute('valuetype')
if value_type == 'date':
date_value = cell.getAttribute('datevalue')
if date_value:
return convert_date(date_value)
if value_type == 'time':
time_value = cell.getAttribute('timevalue')
return datetime.strptime(time_value, "%H:%M:%S").time()
if value_type == 'boolean':
bool_value = cell.getAttribute('booleanvalue')
return bool_value == 'true'
if not cell.childNodes:
value = getattr(cell, 'data', None)
if value is None:
value = cell.getAttribute('value')
if value is None:
return ''
if value_type == 'float':
return float(value)
if value_type == 'date':
return convert_date(value)
return value # Any other type default to 'string'

for subnode in cell.childNodes:
value = cls.read_cell(subnode, value_type)
if value:
return value

@classmethod
def import_set(cls, dset, in_stream, headers=True, skip_lines=0):
"""Populate dataset `dset` from ODS stream."""

dset.wipe()

ods_book = opendocument.load(in_stream)
for sheet in ods_book.spreadsheet.childNodes:
if sheet.qname[1] == 'table':
cls.import_sheet(dset, sheet, headers, skip_lines)

@classmethod
def import_book(cls, dbook, in_stream, headers=True):
"""Populate databook `dbook` from ODS stream."""

dbook.wipe()

ods_book = opendocument.load(in_stream)

for sheet in ods_book.spreadsheet.childNodes:
if sheet.qname[1] != 'table':
continue
dset = tablib.Dataset()
cls.import_sheet(dset, sheet, headers)
dbook.add_sheet(dset)

@classmethod
def dset_sheet(cls, dataset, ws):
"""Completes given worksheet from given Dataset."""
Expand All @@ -66,6 +156,14 @@ def dset_sheet(cls, dataset, ws):
for j, col in enumerate(row):
if isinstance(col, numbers.Number):
cell = table.TableCell(valuetype="float", value=col)
elif isinstance(col, datetime):
cell = table.TableCell(
valuetype="date", value=col.strftime('%Y-%m-%dT%H:%M:%S')
)
elif isinstance(col, date):
cell = table.TableCell(valuetype="date", datevalue=col.strftime('%Y-%m-%d'))
elif isinstance(col, time):
cell = table.TableCell(valuetype="time", timevalue=col.strftime('%H:%M:%S'))
else:
cell = table.TableCell(valuetype="string")
cell.addElement(text.P(text=str(col), stylename=style))
Expand Down
Binary file added tests/files/book.ods
Binary file not shown.
Binary file added tests/files/ragged.ods
Binary file not shown.
Binary file added tests/files/unknown_value_type.ods
Binary file not shown.
51 changes: 45 additions & 6 deletions tests/test_tablib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1107,13 +1107,52 @@ def test_tsv_export(self):


class ODSTests(BaseTestCase):
def test_ods_export_datatypes(self):
def test_ods_export_import_set(self):
date = datetime.date(2019, 10, 4)
date_time = datetime.datetime(2019, 10, 4, 12, 30, 8)
data.append(('string', '004', 42, 21.55, Decimal('34.5'), date_time))
data.headers = ('string', 'start0', 'integer', 'float', 'decimal', 'date/time')
# ODS is currently write-only, just test that output doesn't crash.
assert data.ods is not None
assert len(data.ods)
time = datetime.time(14, 30)
data.append(('string', '004', 42, 21.55, Decimal('34.5'), date, time, date_time))
data.headers = (
'string', 'start0', 'integer', 'float', 'decimal', 'date', 'time', 'date/time'
)
_ods = data.ods
data.ods = _ods
self.assertEqual(data.dict[0]['string'], 'string')
self.assertEqual(data.dict[0]['start0'], '004')
self.assertEqual(data.dict[0]['integer'], 42)
self.assertEqual(data.dict[0]['float'], 21.55)
self.assertEqual(data.dict[0]['decimal'], 34.5)
self.assertEqual(data.dict[0]['date'], date)
self.assertEqual(data.dict[0]['time'], time)
self.assertEqual(data.dict[0]['date/time'], date_time)

def test_ods_import_book(self):
ods_source = Path(__file__).parent / 'files' / 'book.ods'
with ods_source.open('rb') as fh:
dbook = tablib.Databook().load(fh, 'ods')
self.assertEqual(len(dbook.sheets()), 2)

def test_ods_import_set_skip_lines(self):
data.append(('garbage', 'line', ''))
data.append(('', '', ''))
data.append(('id', 'name', 'description'))
_ods = data.ods
new_data = tablib.Dataset().load(_ods, skip_lines=2)
self.assertEqual(new_data.headers, ['id', 'name', 'description'])

def test_ods_import_set_ragged(self):
ods_source = Path(__file__).parent / 'files' / 'ragged.ods'
with ods_source.open('rb') as fh:
dataset = tablib.Dataset().load(fh, 'ods')
self.assertEqual(dataset.pop(), (1, '', True, ''))

def test_ods_unknown_value_type(self):
# The ods file was trafficked to contain:
# <table:table-cell office:value-type="unknown" calcext:value-type="string">
ods_source = Path(__file__).parent / 'files' / 'unknown_value_type.ods'
with ods_source.open('rb') as fh:
dataset = tablib.Dataset().load(fh, 'ods')
self.assertEqual(dataset.pop(), ('abcd',))


class XLSTests(BaseTestCase):
Expand Down

0 comments on commit ce04281

Please sign in to comment.