Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

utils/image: add polygon_for_parent w/ a test #577

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ocrd_utils/ocrd_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@
points_from_x0y0x1y1,
points_from_xywh,
points_from_y0x0y1x1,
polygon_for_parent,
polygon_from_bbox,
polygon_from_points,
polygon_from_x0y0x1y1,
Expand Down
29 changes: 29 additions & 0 deletions ocrd_utils/ocrd_utils/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import numpy as np
from PIL import Image, ImageStat, ImageDraw, ImageChops
from shapely.geometry import Polygon

from .logging import getLogger
from .introspect import membername
Expand All @@ -20,6 +21,7 @@
'points_from_x0y0x1y1',
'points_from_xywh',
'points_from_y0x0y1x1',
'polygon_for_parent',
'polygon_from_bbox',
'polygon_from_points',
'polygon_from_x0y0x1y1',
Expand Down Expand Up @@ -541,6 +543,33 @@ def points_from_x0y0x1y1(xyxy):
x0, y1
)

def polygon_for_parent(polygon, parent):
"""
Clip polygon to parent polygon range.
"""
childp = Polygon(polygon)
# XXX better test would be
# if isinstance(parent, PageType):
# but that would require ocrd_utils to circularly depened on ocrd_utils
if parent.__class__.__name__ == 'PageType':
if parent.get_Border():
parentp = Polygon(polygon_from_points(parent.get_Border().get_Coords().points))
else:
parentp = Polygon([[0,0], [0,parent.get_imageHeight()],
[parent.get_imageWidth(),parent.get_imageHeight()],
[parent.get_imageWidth(),0]])
else:
parentp = Polygon(polygon_from_points(parent.get_Coords().points))
if childp.within(parentp):
return polygon
interp = childp.intersection(parentp)
if interp.is_empty:
# FIXME: we need a better strategy against this
raise Exception("intersection of would-be segment with parent is empty")
if interp.type == 'MultiPolygon':
interp = interp.convex_hull
return interp.exterior.coords[:-1] # keep open

def polygon_from_bbox(minx, miny, maxx, maxy):
"""Construct polygon coordinates in numeric list representation from a numeric list representing a bounding box."""
return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]]
Expand Down
1 change: 1 addition & 0 deletions ocrd_utils/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Pillow >= 7.2.0
numpy >= 1.17.0
shapely
1 change: 0 additions & 1 deletion ocrd_validators/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,3 @@ bagit_profile >= 1.3.0
click >=7
jsonschema
pyyaml
shapely
27 changes: 27 additions & 0 deletions tests/utils/test_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from tests.base import TestCase, main, assets
from ocrd import Resolver
from ocrd_modelfactory import page_from_file
from ocrd_utils import (
polygon_for_parent,
pushd_popd,

MIMETYPE_PAGE
)
class TestImageUtils(TestCase):

def test_polygon_for_parent(self):
resolver = Resolver()
with pushd_popd(assets.path_to('gutachten/data')):
ws = resolver.workspace_from_url(assets.path_to('gutachten/data/mets.xml'))
input_file = ws.download_file(ws.mets.find_files(mimetype=MIMETYPE_PAGE)[0])
pcgts = page_from_file(input_file)
page = pcgts.get_Page()
page_image, page_coords, page_image_info = ws.image_from_page(page, 'f')
width = page_image_info.width
height = page_image_info.height
p = polygon_for_parent([(-10, -10), (width + 10, -10), (width + 10, height + 10), (-10, height + 10)], page)
self.assertEqual(p, [(0, 0), (0, height), (width, height), (width, 0)])


if __name__ == '__main__':
main(__file__)