Skip to content

Commit

Permalink
feat: support locate(substr, str[, pos]) function(#820) (#3943)
Browse files Browse the repository at this point in the history
  • Loading branch information
howdb authored Jul 11, 2024
1 parent b6ffe03 commit 9a81683
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 0 deletions.
24 changes: 24 additions & 0 deletions hybridse/src/codegen/udf_ir_builder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,30 @@ TEST_F(UdfIRBuilderTest, SubstringPosUdfTest) {
StringRef("1234567890"), -12);
}

TEST_F(UdfIRBuilderTest, LocateUdfTest) {
CheckUdf<int32_t, StringRef, StringRef>("locate", 1, StringRef("ab"), StringRef("abcab"));
CheckUdf<int32_t, StringRef, StringRef>("locate", 3, StringRef("ab"), StringRef("bcab"));
CheckUdf<int32_t, StringRef, StringRef>("locate", 0, StringRef("ab"), StringRef("bcAb"));
CheckUdf<int32_t, StringRef, StringRef>("locate", 1, StringRef(""), StringRef(""));
}

TEST_F(UdfIRBuilderTest, LocatePosUdfTest) {
CheckUdf<int32_t, StringRef, StringRef, int32_t>("locate", 0, StringRef("ab"), StringRef("ab"), -1);
CheckUdf<int32_t, StringRef, StringRef, int32_t>("locate", 0, StringRef("ab"), StringRef("Ab"), 1);

CheckUdf<int32_t, StringRef, StringRef, int32_t>("locate", 4, StringRef("ab"), StringRef("abcab"), 2);
CheckUdf<int32_t, StringRef, StringRef, int32_t>("locate", 0, StringRef("ab"), StringRef("abcAb"), 2);
CheckUdf<int32_t, StringRef, StringRef, int32_t>("locate", 4, StringRef("ab"), StringRef("abcab"), 2);
CheckUdf<int32_t, StringRef, StringRef, int32_t>("locate", 0, StringRef("ab"), StringRef("abcab"), 6);

CheckUdf<int32_t, StringRef, StringRef, int32_t>("locate", 5, StringRef(""), StringRef("abcab"), 5);
CheckUdf<int32_t, StringRef, StringRef, int32_t>("locate", 6, StringRef(""), StringRef("abcab"), 6);
CheckUdf<int32_t, StringRef, StringRef, int32_t>("locate", 0, StringRef(""), StringRef("abcab"), 7);

CheckUdf<int32_t, StringRef, StringRef, int32_t>("locate", 1, StringRef(""), StringRef(""), 1);
CheckUdf<int32_t, StringRef, StringRef, int32_t>("locate", 0, StringRef(""), StringRef(""), 2);
}

TEST_F(UdfIRBuilderTest, UpperUcase) {
CheckUdf<Nullable<StringRef>, Nullable<StringRef>>("upper", StringRef("SQL"), StringRef("Sql"));
CheckUdf<Nullable<StringRef>, Nullable<StringRef>>("ucase", StringRef("SQL"), StringRef("Sql"));
Expand Down
41 changes: 41 additions & 0 deletions hybridse/src/udf/default_udf_library.cc
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,47 @@ void DefaultUdfLibrary::InitStringUdf() {

RegisterAlias("substr", "substring");

RegisterExternal("locate")
.args<StringRef, StringRef>(
static_cast<int32_t (*)(StringRef*, StringRef*)>(udf::v1::locate))
.doc(R"(
@brief Returns the position of the first occurrence of substr in str. The given pos and return value are 1-based.
This is a version of the `locate` function where `pos` has a default value of 1.
Example:
@code{.sql}
select locate("wo", "hello world");
--output 7
@endcode)");

RegisterExternal("locate")
.args<StringRef, StringRef, int32_t>(
static_cast<int32_t (*)(StringRef*, StringRef*, int32_t)>(udf::v1::locate))
.doc(R"(
@brief Returns the position of the first occurrence of substr in str after position pos. The given pos and return value are 1-based.
Example:
@code{.sql}
select locate("wo", "hello world", 2);
--output 7
select locate("Wo", "hello world", 2);
--output 0
@endcode
@param substr
@param str
@param pos: define the begining search position of the str.
- Negetive value is illegal and will return 0 directly;
- If substr is "" and pos less equal len(str) + 1, return pos, other case return 0;
)");

RegisterExternal("strcmp")
.args<StringRef, StringRef>(
static_cast<int32_t (*)(StringRef*, StringRef*)>(
Expand Down
36 changes: 36 additions & 0 deletions hybridse/src/udf/udf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1085,6 +1085,42 @@ void sub_string(StringRef *str, int32_t from, int32_t len,
output->size_ = static_cast<uint32_t>(len);
return;
}

int32_t locate(StringRef *substr, StringRef *str) {
return locate(substr, str, 1);
}

int32_t locate(StringRef *substr, StringRef *str, int32_t pos) {
if (nullptr == substr || nullptr == str) {
return 0;
}
// negetive pos return 0 directly
if (pos <= 0) {
return 0;
}
uint32_t sub_size = substr->size_;
uint32_t size = str->size_;
// if substr is "" and pos <= len(str) + 1, return pos, other case return 0
if (pos + sub_size - 1 > size) {
return 0;
}
if (sub_size == 0) {
return pos;
}
for (uint32_t i = pos - 1; i <= size - sub_size; i++) {
uint32_t j = 0, k = i;
for (; j < sub_size; j++, k++) {
if (str->data_[k] != substr->data_[j]) {
break;
}
}
if (j == sub_size) {
return i + 1;
}
}
return 0;
}

int32_t strcmp(StringRef *s1, StringRef *s2) {
if (s1 == s2) {
return 0;
Expand Down
2 changes: 2 additions & 0 deletions hybridse/src/udf/udf.h
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,8 @@ void sub_string(StringRef *str, int32_t pos,
StringRef *output);
void sub_string(StringRef *str, int32_t pos, int32_t len,
StringRef *output);
int32_t locate(StringRef *substr, StringRef* str);
int32_t locate(StringRef *substr, StringRef* str, int32_t pos);
int32_t strcmp(StringRef *s1, StringRef *s2);
void bool_to_string(bool v, StringRef *output);

Expand Down

0 comments on commit 9a81683

Please sign in to comment.