Skip to content

Commit

Permalink
grobid augmenter - fix Sections-Paragraphs bug (#282)
Browse files Browse the repository at this point in the history
* fix bug

* version bump
  • Loading branch information
geli-gel authored Dec 6, 2023
1 parent dd65039 commit a39556d
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = 'mmda'
version = '0.9.16'
version = '0.9.17'
description = 'MMDA - multimodal document analysis'
authors = [
{name = 'Allen Institute for Artificial Intelligence', email = '[email protected]'},
Expand Down
12 changes: 7 additions & 5 deletions src/mmda/parsers/grobid_augment_existing_document_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,15 @@ def _parse_xml_onto_doc(self, xml: str, doc: Document) -> Document:
unallocated_tokens_dict=unallocated_section_tokens_dict,
fix_overlaps=True,
)
# check that conversion to spangroups resulted in actual spans before adding them to the section
if all([sg.spans for sg in this_paragraph_sentence_span_groups]):
sentence_span_groups.extend(this_paragraph_sentence_span_groups)
paragraph_spans = []
for sg in this_paragraph_sentence_span_groups:
paragraph_spans.extend(sg.spans)
# TODO add boxes to paragraph spangroups
this_section_paragraph_span_groups.append(SpanGroup(spans=paragraph_spans))
paragraph_spans = []
for sg in this_paragraph_sentence_span_groups:
paragraph_spans.extend(sg.spans)
# TODO add boxes to paragraph spangroups
this_section_paragraph_span_groups.append(SpanGroup(spans=paragraph_spans))

paragraph_span_groups.extend(this_section_paragraph_span_groups)
for sg in this_section_paragraph_span_groups:
section_spans.extend(sg.spans)
Expand Down

0 comments on commit a39556d

Please sign in to comment.