Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Case insensitive substring test function #2442

Merged
merged 1 commit into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/global/import.ml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ module String = struct
false
with Exit -> true

let is_sub_ignore_case pattern text =
contains_s (lowercase_ascii text) (lowercase_ascii pattern)

(* ripped off stringext, itself ripping it off from one of dbuenzli's libs *)
let cut s ~on =
let sep_max = length on - 1 in
Expand Down
28 changes: 12 additions & 16 deletions src/ocamlorg_web/lib/handler.ml
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,7 @@ let academic_users req =
let open Data.Academic_institution in
let pattern = String.lowercase_ascii pattern in
let name_is_s { name; _ } = String.lowercase_ascii name = pattern in
let name_contains_s { name; _ } =
String.contains_s (String.lowercase_ascii name) pattern
in
let name_contains_s { name; _ } = String.is_sub_ignore_case pattern name in
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems a bit perilous to assume that names fit in the ascii subset of English?

Copy link
Collaborator Author

@cuihtlauac cuihtlauac May 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right; it is not. Not sure how to handle this nicely here. This commit is just a refactoring. Any clues?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If pulling uucp as dependency is ok: https://erratique.ch/software/uucp/doc/Uucp/Case/index.html#caselesseq looks like a good alternative to the lowercase_ascii normalization (even if does not cover "lookalike" search).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good idea. Uucp is already part of ocamlorg dependencies (by transitivity) so there's no downside to this.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue on this: #2444

let score user =
if name_is_s user then -1
else if name_contains_s user then 0
Expand Down Expand Up @@ -233,18 +231,17 @@ let books req =
let releases req =
let search_release pattern t =
let open Data.Release in
let pattern = String.lowercase_ascii pattern in
let version_is_s { version; _ } =
String.lowercase_ascii version = pattern
let is_version { version; _ } =
String.(lowercase_ascii version = lowercase_ascii pattern)
in
let version_contains_s { version; _ } =
String.contains_s (String.lowercase_ascii version) pattern
String.is_sub_ignore_case pattern version
in
let body_contains_s { body_md; _ } =
String.contains_s (String.lowercase_ascii body_md) pattern
String.is_sub_ignore_case pattern body_md
in
let score release =
if version_is_s release then -1
if is_version release then -1
else if version_contains_s release then 0
else if body_contains_s release then 2
else failwith "impossible release score"
Expand Down Expand Up @@ -400,18 +397,17 @@ let governance_team req =
let papers req =
let search_paper pattern t =
let open Data.Paper in
let pattern = String.lowercase_ascii pattern in
let title_is_s { title; _ } = String.lowercase_ascii title = pattern in
let title_is_s { title; _ } =
String.(lowercase_ascii title = lowercase_ascii pattern)
in
let title_contains_s { title; _ } =
String.contains_s (String.lowercase_ascii title) pattern
String.is_sub_ignore_case pattern title
in
let abstract_contains_s { abstract; _ } =
String.contains_s (String.lowercase_ascii abstract) pattern
String.is_sub_ignore_case pattern abstract
in
let has_tag_s { tags; _ } =
List.exists
(fun tag -> String.contains_s (String.lowercase_ascii tag) pattern)
tags
List.exists (fun tag -> String.is_sub_ignore_case pattern tag) tags
in
let score paper =
if title_is_s paper then -1
Expand Down
Loading