From d03e2cdddec7136c014f1841063329a9ecbe2211 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 25 Aug 2020 15:36:14 +0200 Subject: [PATCH] utils/image: add polygon_for_parent w/ a test --- ocrd_utils/ocrd_utils/__init__.py | 1 + ocrd_utils/ocrd_utils/image.py | 29 +++++++++++++++++++++++++++++ ocrd_utils/requirements.txt | 1 + ocrd_validators/requirements.txt | 1 - tests/utils/test_image.py | 27 +++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 tests/utils/test_image.py diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 3e882d83db..90c09d5ed0 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -97,6 +97,7 @@ points_from_x0y0x1y1, points_from_xywh, points_from_y0x0y1x1, + polygon_for_parent, polygon_from_bbox, polygon_from_points, polygon_from_x0y0x1y1, diff --git a/ocrd_utils/ocrd_utils/image.py b/ocrd_utils/ocrd_utils/image.py index 0e0298cc42..0a42c8e2f1 100644 --- a/ocrd_utils/ocrd_utils/image.py +++ b/ocrd_utils/ocrd_utils/image.py @@ -2,6 +2,7 @@ import numpy as np from PIL import Image, ImageStat, ImageDraw, ImageChops +from shapely.geometry import Polygon from .logging import getLogger from .introspect import membername @@ -20,6 +21,7 @@ 'points_from_x0y0x1y1', 'points_from_xywh', 'points_from_y0x0y1x1', + 'polygon_for_parent', 'polygon_from_bbox', 'polygon_from_points', 'polygon_from_x0y0x1y1', @@ -541,6 +543,33 @@ def points_from_x0y0x1y1(xyxy): x0, y1 ) +def polygon_for_parent(polygon, parent): + """ + Clip polygon to parent polygon range. + """ + childp = Polygon(polygon) + # XXX better test would be + # if isinstance(parent, PageType): + # but that would require ocrd_utils to circularly depened on ocrd_utils + if parent.__class__.__name__ == 'PageType': + if parent.get_Border(): + parentp = Polygon(polygon_from_points(parent.get_Border().get_Coords().points)) + else: + parentp = Polygon([[0,0], [0,parent.get_imageHeight()], + [parent.get_imageWidth(),parent.get_imageHeight()], + [parent.get_imageWidth(),0]]) + else: + parentp = Polygon(polygon_from_points(parent.get_Coords().points)) + if childp.within(parentp): + return polygon + interp = childp.intersection(parentp) + if interp.is_empty: + # FIXME: we need a better strategy against this + raise Exception("intersection of would-be segment with parent is empty") + if interp.type == 'MultiPolygon': + interp = interp.convex_hull + return interp.exterior.coords[:-1] # keep open + def polygon_from_bbox(minx, miny, maxx, maxy): """Construct polygon coordinates in numeric list representation from a numeric list representing a bounding box.""" return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]] diff --git a/ocrd_utils/requirements.txt b/ocrd_utils/requirements.txt index 55940c83e4..4203e54696 100644 --- a/ocrd_utils/requirements.txt +++ b/ocrd_utils/requirements.txt @@ -1,2 +1,3 @@ Pillow >= 7.2.0 numpy >= 1.17.0 +shapely diff --git a/ocrd_validators/requirements.txt b/ocrd_validators/requirements.txt index d6df9061c4..d30008add2 100644 --- a/ocrd_validators/requirements.txt +++ b/ocrd_validators/requirements.txt @@ -3,4 +3,3 @@ bagit_profile >= 1.3.0 click >=7 jsonschema pyyaml -shapely diff --git a/tests/utils/test_image.py b/tests/utils/test_image.py new file mode 100644 index 0000000000..4a8736b06d --- /dev/null +++ b/tests/utils/test_image.py @@ -0,0 +1,27 @@ +from tests.base import TestCase, main, assets +from ocrd import Resolver +from ocrd_modelfactory import page_from_file +from ocrd_utils import ( + polygon_for_parent, + pushd_popd, + + MIMETYPE_PAGE +) +class TestImageUtils(TestCase): + + def test_polygon_for_parent(self): + resolver = Resolver() + with pushd_popd(assets.path_to('gutachten/data')): + ws = resolver.workspace_from_url(assets.path_to('gutachten/data/mets.xml')) + input_file = ws.download_file(ws.mets.find_files(mimetype=MIMETYPE_PAGE)[0]) + pcgts = page_from_file(input_file) + page = pcgts.get_Page() + page_image, page_coords, page_image_info = ws.image_from_page(page, 'f') + width = page_image_info.width + height = page_image_info.height + p = polygon_for_parent([(-10, -10), (width + 10, -10), (width + 10, height + 10), (-10, height + 10)], page) + self.assertEqual(p, [(0, 0), (0, height), (width, height), (width, 0)]) + + +if __name__ == '__main__': + main(__file__)