diff --git a/README.md b/README.md index 071d26d..8c11f08 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ print(doc.export_to_markdown()) # output: "## DocLayNet: A Large Human-Annotate ### Convert a batch of documents -For an example of converting multiple documents, see [convert.py](https://github.com/DS4SD/docling/blob/main/examples/convert.py). +For an example of batch-converting documents, see [convert.py](https://github.com/DS4SD/docling/blob/main/examples/convert.py). From a local repo clone, you can run it with: diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 09b39f8..0515f65 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -125,7 +125,7 @@ def to_ds_document(self) -> DsDocument: desc = DsDocumentDescription(logs=[]) page_hashes = [ - PageReference(hash=p.page_hash, page=p.page_no, model="default") + PageReference(hash=p.page_hash, page=p.page_no + 1, model="default") for p in self.pages ] @@ -159,7 +159,7 @@ def to_ds_document(self) -> DsDocument: prov=[ Prov( bbox=target_bbox, - page=element.page_no, + page=element.page_no + 1, span=[0, len(element.text)], ) ], @@ -242,7 +242,7 @@ def make_spans(cell): prov=[ Prov( bbox=target_bbox, - page=element.page_no, + page=element.page_no + 1, span=[0, 0], ) ], @@ -264,7 +264,7 @@ def make_spans(cell): prov=[ Prov( bbox=target_bbox, - page=element.page_no, + page=element.page_no + 1, span=[0, 0], ) ], @@ -274,7 +274,7 @@ def make_spans(cell): ) page_dimensions = [ - PageDimensions(page=p.page_no, height=p.size.height, width=p.size.width) + PageDimensions(page=p.page_no + 1, height=p.size.height, width=p.size.width) for p in self.pages ] diff --git a/poetry.lock b/poetry.lock index 9715593..4917d3b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -715,13 +715,13 @@ files = [ [[package]] name = "docling-core" -version = "1.1.0" +version = "1.1.2" description = "A python library to define and validate data types in Docling." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "docling_core-1.1.0-py3-none-any.whl", hash = "sha256:80096ec6bbce9e616700ccd6bdd5a50e5d1a9a832d7968da3874d54b29962536"}, - {file = "docling_core-1.1.0.tar.gz", hash = "sha256:69bc83d3b192d9e56bb91d77d8434d9fc109f8cb25ab5a285d2f3bccc10899cb"}, + {file = "docling_core-1.1.2-py3-none-any.whl", hash = "sha256:bdff5643e3e37a24204449eee99505db0f1cf620b8e1ce4cf4b71850bf49496b"}, + {file = "docling_core-1.1.2.tar.gz", hash = "sha256:969cde6795631a5f5f8cbb5e7ca0e4032864c1abc8fff762415a09a9b1f7146c"}, ] [package.dependencies] @@ -4882,4 +4882,4 @@ ocr = ["easyocr"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "dcb00c6601f61b087fd204d040149c20a7dcd72ab353e912e78dc265c86e4d00" +content-hash = "a6685d5cf1b283d805e10193a437662a1807f99dad40b56ab1e58e1b708fc184" diff --git a/pyproject.toml b/pyproject.toml index ad82c0b..4546bec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ packages = [{include = "docling"}] [tool.poetry.dependencies] python = "^3.10" pydantic = "^2.0.0" -docling-core = "^1.1.0" +docling-core = "^1.1.2" docling-ibm-models = "^1.1.0" deepsearch-glm = ">=0.19.0,<1" filetype = "^1.2.0"