From 3d8f58dd9a873f69ceb6ce63a8ccc5c6fd5dad22 Mon Sep 17 00:00:00 2001 From: Angele Zamarron Date: Fri, 1 Dec 2023 14:20:52 -0800 Subject: [PATCH 1/2] fix bug --- .../grobid_augment_existing_document_parser.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/mmda/parsers/grobid_augment_existing_document_parser.py b/src/mmda/parsers/grobid_augment_existing_document_parser.py index 2f7406bf..81792bce 100644 --- a/src/mmda/parsers/grobid_augment_existing_document_parser.py +++ b/src/mmda/parsers/grobid_augment_existing_document_parser.py @@ -139,13 +139,15 @@ def _parse_xml_onto_doc(self, xml: str, doc: Document) -> Document: unallocated_tokens_dict=unallocated_section_tokens_dict, fix_overlaps=True, ) + # check that conversion to spangroups resulted in actual spans before adding them to the section if all([sg.spans for sg in this_paragraph_sentence_span_groups]): sentence_span_groups.extend(this_paragraph_sentence_span_groups) - paragraph_spans = [] - for sg in this_paragraph_sentence_span_groups: - paragraph_spans.extend(sg.spans) - # TODO add boxes to paragraph spangroups - this_section_paragraph_span_groups.append(SpanGroup(spans=paragraph_spans)) + paragraph_spans = [] + for sg in this_paragraph_sentence_span_groups: + paragraph_spans.extend(sg.spans) + # TODO add boxes to paragraph spangroups + this_section_paragraph_span_groups.append(SpanGroup(spans=paragraph_spans)) + paragraph_span_groups.extend(this_section_paragraph_span_groups) for sg in this_section_paragraph_span_groups: section_spans.extend(sg.spans) From 3f318a31b054c4eb196ec91af73e6f0214d4be96 Mon Sep 17 00:00:00 2001 From: Angele Zamarron Date: Fri, 1 Dec 2023 14:21:25 -0800 Subject: [PATCH 2/2] version bump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 847bd5d4..1b14e29d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = 'mmda' -version = '0.9.16' +version = '0.9.17' description = 'MMDA - multimodal document analysis' authors = [ {name = 'Allen Institute for Artificial Intelligence', email = 'contact@allenai.org'},