Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ogc 508 replace elastic search by postgres v2 #1357

Draft
wants to merge 49 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
ef937bc
Resolve merge conflict
Tschuppi81 Jun 3, 2024
2fdd50e
Adds view and templates for search views
Tschuppi81 Jun 4, 2024
d90e85a
Revert type wrong type annotations
Tschuppi81 Jun 5, 2024
22932f0
Sort search results after score, timestamp
Tschuppi81 Jun 5, 2024
0507b46
Adds a simple ranking
Tschuppi81 Jun 5, 2024
e6b510e
Merge branch 'master' into feature/ogc-508-replace-elastic-search-by-…
Tschuppi81 Jun 6, 2024
b8f3b76
Resolve merge conflicts
Tschuppi81 Jul 12, 2024
3beab01
Person title and user title and userprofile are now hybrid properties
Tschuppi81 Jul 16, 2024
da09357
Rework ranking
Tschuppi81 Jul 16, 2024
db8ae9b
Cleanup
Tschuppi81 Jul 18, 2024
f9ba776
Make tickets searchable
Tschuppi81 Jul 18, 2024
5718732
Make tickets searchable
Tschuppi81 Jul 18, 2024
737fb14
makes directory entries searchable
Tschuppi81 Jul 18, 2024
7846539
Revert "makes directory entries searchable"
Tschuppi81 Jul 18, 2024
cfcd6af
Resolve merge conflict
Tschuppi81 Jul 19, 2024
904a32c
Make mypy almost happy
Tschuppi81 Jul 19, 2024
956befa
Add fixme's as ranking does not work with extra localized text for su…
Tschuppi81 Jul 19, 2024
d1f8037
Merge branch 'master' into feature/ogc-508-replace-elastic-search-by-…
Tschuppi81 Aug 23, 2024
b25a51d
Make a generic class
Tschuppi81 Aug 23, 2024
007d8cb
Fix search results for tickets as not each ticket provides the values
Tschuppi81 Aug 26, 2024
57f1ea4
Transform directory_entry.keywords to hybrid property
Tschuppi81 Aug 27, 2024
abbf86c
Adds weighted vector to search query
Tschuppi81 Sep 9, 2024
4d04f5d
Resove merge conflicts
Tschuppi81 Sep 9, 2024
ac18d16
Fix extra localized text for tickets in search results
Tschuppi81 Sep 9, 2024
45eb359
Adds future events on top of search results (in case of hit)
Tschuppi81 Sep 9, 2024
d7a560a
Make load bach result a cached property
Tschuppi81 Sep 9, 2024
85eff7e
Make mypy happy
Tschuppi81 Sep 9, 2024
6114dad
Adds default implementation for extra localized text
Tschuppi81 Sep 9, 2024
71e61b6
Fixing org search template
Tschuppi81 Sep 9, 2024
bb16507
Adjust and add search template for org
Tschuppi81 Oct 3, 2024
1c86775
Exclude documents according their access level
Tschuppi81 Oct 3, 2024
b0a3fa5
Unaccent data while indexing and also for querying
Tschuppi81 Oct 4, 2024
35cac54
Convert es_public and access to hybrid properties; fix psql search fo…
Tschuppi81 Oct 7, 2024
f4a4d33
Fix: unaccent is only needed during index creation but not while quer…
Tschuppi81 Oct 7, 2024
ce27112
Fix condition and improvements
Tschuppi81 Oct 7, 2024
7c1142c
Fix agency memberships psql expression
Tschuppi81 Oct 8, 2024
59bfda0
Ensure suggestions are tuple of strings
Tschuppi81 Oct 9, 2024
7e93291
Extend and fix a few tests
Tschuppi81 Oct 14, 2024
0167dd6
More hybrid properties and fixes for such
Tschuppi81 Oct 15, 2024
045f8ad
Extend and fix tests for landsgemeinde
Tschuppi81 Oct 18, 2024
0b026c9
Adds sql expression for GeneralFile property access
Tschuppi81 Oct 19, 2024
d05926c
Extend tests
Tschuppi81 Oct 21, 2024
6f93953
Extend tests
Tschuppi81 Oct 21, 2024
14ff52f
Fix mypy complaints
Tschuppi81 Oct 22, 2024
6bdaa97
Performance: use scalar instead of count
Tschuppi81 Oct 22, 2024
68b311c
Adds missing sql expressions and fixes one
Tschuppi81 Oct 22, 2024
bc8e1f4
Cleanup
Tschuppi81 Oct 22, 2024
d5c5b5e
Resolves merge conflicts
Tschuppi81 Oct 22, 2024
1b0e741
Fix import
Tschuppi81 Oct 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/onegov/activity/models/attendee.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ class Attendee(Base, TimestampMixin, ORMSearchable):
'name': {'type': 'text'},
'notes': {'type': 'localized'}
}
es_public = False

@hybrid_property
def es_public(self) -> bool:
return False

@property
def es_suggestion(self) -> str:
Expand Down
20 changes: 15 additions & 5 deletions src/onegov/agency/models/agency.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from sqlalchemy import and_
from sqlalchemy.orm import object_session
from sqlalchemy.orm import relationship
from sqlalchemy.ext.hybrid import hybrid_property

from onegov.agency.models.membership import ExtendedAgencyMembership
from onegov.agency.utils import get_html_paragraph_with_line_breaks
from onegov.core.crypto import random_token
Expand All @@ -11,9 +16,6 @@
from onegov.org.models.extensions import PublicationExtension
from onegov.people import Agency
from onegov.user import RoleMapping
from sqlalchemy.orm import object_session
from sqlalchemy.orm import relationship


from typing import Any
from typing import IO
Expand All @@ -24,6 +26,7 @@
from markupsafe import Markup
from onegov.agency.request import AgencyRequest
from onegov.core.types import AppenderQuery
from sqlalchemy.sql import ClauseElement
from uuid import UUID


Expand All @@ -40,10 +43,17 @@ class ExtendedAgency(Agency, AccessExtension, PublicationExtension):

es_type_name = 'extended_agency'

@property
def es_public(self) -> bool: # type:ignore[override]
@hybrid_property
def es_public(self) -> bool:
return self.access == 'public' and self.published

@es_public.expression # type:ignore[no-redef]
def es_public(cls) -> 'ClauseElement':
return and_(
cls.access == 'public',
cls.published == True
)

#: Defines which fields of a membership and person should be exported to
#: the PDF. The fields are expected to contain two parts seperated by a
#: point. The first part is either `membership` or `person`, the second
Expand Down
39 changes: 36 additions & 3 deletions src/onegov/agency/models/membership.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
from sqlalchemy import case, select
from sqlalchemy.ext.hybrid import hybrid_property

from onegov.core.orm.mixins import dict_property
from onegov.core.orm.mixins import meta_property
from onegov.org.models.extensions import AccessExtension
from onegov.org.models.extensions import PublicationExtension
from onegov.people import AgencyMembership


from typing import TYPE_CHECKING
if TYPE_CHECKING:
from onegov.agency.models import ExtendedAgency
from onegov.agency.models import ExtendedPerson
from sqlalchemy.orm import relationship
from sqlalchemy.sql import ClauseElement


class ExtendedAgencyMembership(AgencyMembership, AccessExtension,
Expand All @@ -20,8 +23,8 @@ class ExtendedAgencyMembership(AgencyMembership, AccessExtension,

es_type_name = 'extended_membership'

@property
def es_public(self) -> bool: # type:ignore[override]
@hybrid_property
def es_public(self) -> bool:
if self.agency:
if self.agency.meta.get('access', 'public') != 'public':
return False
Expand All @@ -36,6 +39,36 @@ def es_public(self) -> bool: # type:ignore[override]

return self.access == 'public'

@es_public.expression # type:ignore[no-redef]
def es_public(cls) -> 'ClauseElement':
from onegov.agency.models import ExtendedAgency, ExtendedPerson

person_meta = select([ExtendedPerson.meta]).where(
ExtendedPerson.id == cls.person_id
).as_scalar()

person_published = select([ExtendedPerson.published]).where(
ExtendedPerson.id == cls.person_id
).as_scalar()

agency_meta = select([ExtendedAgency.meta]).where(
ExtendedAgency.id == cls.agency_id
).as_scalar()

agency_published = select([ExtendedAgency.published]).where(
ExtendedAgency.id == cls.agency_id
).as_scalar()

return case(
[
(person_meta['access'] != 'public', False),
(person_published != True, False),
(agency_meta['access'] != 'public', False),
(agency_published != True, False),
],
else_=cls.meta['access'] == 'public'
)

# Todo: It is very unclear how this should be used. In the PDF rendering,
# it is placed a middle column with 0.5 cm after the title.
# On the agency, it is placed after the membership title, so not a prefix
Expand Down
35 changes: 30 additions & 5 deletions src/onegov/agency/models/person.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from sqlalchemy import func, select, and_
from sqlalchemy.orm import object_session
from sqlalchemy.ext.hybrid import hybrid_property

from onegov.agency.utils import get_html_paragraph_with_line_breaks
from onegov.org.models import Organisation
from onegov.org.models.extensions import AccessExtension
from onegov.org.models.extensions import PublicationExtension
from onegov.people import Person
from sqlalchemy.orm import object_session


from typing import TYPE_CHECKING
Expand All @@ -13,6 +16,7 @@
from onegov.agency.request import AgencyRequest
from onegov.core.types import AppenderQuery
from sqlalchemy.orm import relationship
from sqlalchemy.sql import ClauseElement


class ExtendedPerson(Person, AccessExtension, PublicationExtension):
Expand All @@ -22,10 +26,17 @@

es_type_name = 'extended_person'

@property
def es_public(self) -> bool: # type:ignore[override]
@hybrid_property
def es_public(self) -> bool:
return self.access == 'public' and self.published

@es_public.expression # type:ignore[no-redef]
def es_public(cls) -> 'ClauseElement':
return and_(
cls.access == 'public',
cls.published == True
)

es_properties = {
'title': {'type': 'text'},
'function': {'type': 'localized'},
Expand All @@ -50,14 +61,28 @@
AppenderQuery[ExtendedAgencyMembership]
]

@property
@hybrid_property
def phone_internal(self) -> str:
org = object_session(self).query(Organisation).one()
number = getattr(self, org.agency_phone_internal_field)
digits = org.agency_phone_internal_digits
return number.replace(' ', '')[-digits:] if number and digits else ''

@property
@phone_internal.expression # type:ignore[no-redef]
def phone_internal(cls) -> 'ClauseElement':
org_subquery = (
select([Organisation.agency_phone_internal_field,
Organisation.agency_phone_internal_digits])
.limit(1)
.scalar_subquery()
)
return func.substr(

Check warning on line 79 in src/onegov/agency/models/person.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/agency/models/person.py#L79

Added line #L79 was not covered by tests
func.replace(getattr(
cls, org_subquery.c.agency_phone_internal_field), ' ', ''),
-org_subquery.c.agency_phone_internal_field_digits
).label('phone_internal')

@hybrid_property
def phone_es(self) -> list[str]:
result = [self.phone_internal]
for number in (self.phone, self.phone_direct):
Expand Down
17 changes: 14 additions & 3 deletions src/onegov/agency/views/search.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from onegov.agency import AgencyApp
from onegov.agency.layout import AgencySearchLayout
from onegov.core.security import Public
from onegov.org.models import Search
from onegov.org.views.search import search as search_view

from onegov.org.models import Search, SearchPostgres
from onegov.org.views.search import search as search_view, search_postgres

from typing import TYPE_CHECKING
if TYPE_CHECKING:
Expand All @@ -23,3 +22,15 @@ def search(
if isinstance(data, dict):
data['layout'] = AgencySearchLayout(self, request)
return data


@AgencyApp.html(model=SearchPostgres, template='search_postgres.pt',
permission=Public)
def agency_search_postgres(
self: SearchPostgres['Base'],
request: 'AgencyRequest'
) -> 'RenderData | Response':
data = search_postgres(self, request)
if isinstance(data, dict):
data['layout'] = AgencySearchLayout(self, request)
return data
3 changes: 2 additions & 1 deletion src/onegov/core/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
This module should eventually replace the elements.py module.

"""
from sqlalchemy.ext.hybrid import hybrid_property

from onegov.core.templates import render_macro

Expand Down Expand Up @@ -137,7 +138,7 @@ class AccessMixin:

__slots__ = ()

@property
@hybrid_property
def access(self) -> str:
""" Wraps model.access, ensuring it is always available, even if the
model does not use it.
Expand Down
7 changes: 5 additions & 2 deletions src/onegov/directory/models/directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

from email_validator import validate_email
from enum import Enum

from sqlalchemy.ext.hybrid import hybrid_property

from onegov.core.cache import instance_lru_cache
from onegov.core.cache import lru_cache
from onegov.core.crypto import random_token
Expand Down Expand Up @@ -90,7 +93,7 @@ def directory_entry(self) -> 'DirectoryEntry | None':
entries = self.linked_directory_entries
return entries[0] if entries else None

@property
@hybrid_property
def access(self) -> str:
# we don't want these files to show up in search engines
return 'secret' if self.published else 'private'
Expand All @@ -110,7 +113,7 @@ class Directory(Base, ContentMixin, TimestampMixin,
'lead': {'type': 'localized'}
}

@property
@hybrid_property
def es_public(self) -> bool:
return False # to be overridden downstream

Expand Down
25 changes: 18 additions & 7 deletions src/onegov/directory/models/directory_entry.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from sqlalchemy.ext.hybrid import hybrid_property

from onegov.core.orm import Base
from onegov.core.orm.mixins import ContentMixin
from onegov.core.orm.mixins import TimestampMixin
Expand All @@ -6,7 +8,7 @@
from onegov.file import AssociatedFiles
from onegov.gis import CoordinatesMixin
from onegov.search import SearchableContent
from sqlalchemy import Column
from sqlalchemy import Column, func, cast, ARRAY, String
from sqlalchemy import ForeignKey
from sqlalchemy import Index
from sqlalchemy import Text
Expand All @@ -30,18 +32,18 @@ class DirectoryEntry(Base, ContentMixin, CoordinatesMixin, TimestampMixin,
__tablename__ = 'directory_entries'

es_properties = {
'keywords': {'type': 'keyword'},
'title': {'type': 'localized'},
'lead': {'type': 'localized'},
'directory_id': {'type': 'keyword'},
'keywords': {'type': 'keyword'},
# 'directory_id': {'type': 'keyword'},

# since the searchable text might include html, we remove it
# even if there's no html -> possibly decreasing the search
# quality a bit
'text': {'type': 'localized_html'}
}

@property
@hybrid_property
def es_public(self) -> bool:
return False # to be overridden downstream

Expand Down Expand Up @@ -113,17 +115,26 @@ def external_link_visible(self) -> bool | None:
def directory_name(self) -> str:
return self.directory.name

@property
@hybrid_property
def keywords(self) -> set[str]:
return set(self._keywords.keys()) if self._keywords else set()

# FIXME: asymmetric properties are not supported by mypy, switch to
# a custom descriptor, if desired.
@keywords.setter
@keywords.setter # type:ignore[no-redef]
def keywords(self, value: 'Collection[str] | None') -> None:
self._keywords = dict.fromkeys(value, '') if value else None

@property
@keywords.expression # type:ignore[no-redef]
def keywords(cls):
return func.array_to_string(
func.array_agg(
cast(func.jsonb_each_text(cls._keywords).keys(), ARRAY(String))
),
' '
)

@hybrid_property
def text(self) -> str:
return self.directory.configuration.extract_searchable(self.values)

Expand Down
4 changes: 2 additions & 2 deletions src/onegov/event/models/event.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import warnings

from datetime import datetime

from dateutil import rrule
from dateutil.rrule import rrulestr
from icalendar import Calendar as vCalendar
from icalendar import Event as vEvent
from icalendar import vRecur
from sqlalchemy.ext.hybrid import hybrid_property

from onegov.core.orm import Base
from onegov.core.orm.abstract import associated
Expand Down Expand Up @@ -194,7 +194,7 @@ def set_blob(
'filter_keywords': {'type': 'keyword'}
}

@property
@hybrid_property
def es_public(self) -> bool:
return self.state == 'published'

Expand Down
Loading
Loading