Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add regex replace functionality to transformation filter extractors #301

Merged
merged 15 commits into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,19 @@ message Transformation {
// Extractions can be used to extract information from the request/response.
// The extracted information can then be referenced in template fields.
message Extraction {
// Define the Mode enum within the Extraction message
ben-taussig-solo marked this conversation as resolved.
Show resolved Hide resolved
enum Mode {
// Default mode. Extract the value of the subgroup-th capturing group.
EXTRACT = 0;
// Replace the value of the subgroup-th capturing group with the replacement_text.
// Note: replacement_text must be set for this mode.
SINGLE_REPLACE = 1;
// Replace all matches of the regex in the source with the replacement_text.
// Note: replacement_text must be set for this mode.
// Note: subgroup is ignored for this mode. configuration will fail if subgroup is set.
// Note: restrictions on the regex are different for this mode. See
ben-taussig-solo marked this conversation as resolved.
Show resolved Hide resolved
REPLACE_ALL = 2;
}

// The source of the extraction
oneof source {
Expand All @@ -162,21 +175,31 @@ message Extraction {
google.protobuf.Empty body = 4;
}

// Only strings matching this regular expression will be part of the
// extraction. The most simple value for this field is '.*', which matches the
// whole source. The field is required. If extraction fails the result is an
// empty value.
// The regex field specifies the regular expression used for matching against the source content.
// - In EXTRACT mode, the entire source must match the regex. The subgroup-th capturing group,
// if specified, determines which part of the match is extracted.
// - In SINGLE_REPLACE mode, the regex also needs to match the entire source. The subgroup-th capturing group
// is targeted for replacement with the replacement_text.
// - In REPLACE_ALL mode, the regex is applied repeatedly to find all occurrences within the source that match.
// Each matching occurrence is replaced with the replacement_text, and the subgroup field is not used.
// This field is required, and if the regex does not match the source as per the selected mode, the result of
// the extraction will be an empty value.
string regex = 2;

// If your regex contains capturing groups, use this field to determine which
// group should be selected.
// For EXTRACT and SINGLE_REPLACE, refers to the portion of the text
// to extract/replace.
// Config will be rejected if this is specified in REPLACE_ALL mode.
uint32 subgroup = 3;

// The string to replace the matched portion of the source with
// The string to replace the matched portion of the source with.
// Used in SINGLE_REPLACE and REPLACE_ALL modes.
google.protobuf.StringValue replacement_text = 5;

// If set to true, all matches of the regex in the source will be replaced by the replacement_text.
bool replace_all = 6;
// The mode of operation for the extraction.
// Defaults to EXTRACT.
Mode mode = 6;
}

// Defines a transformation template.
Expand Down
55 changes: 24 additions & 31 deletions source/extensions/filters/http/transformation/inja_transformer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ Extractor::Extractor(const envoy::api::v2::filter::http::Extraction &extractor)
extract_regex_(Solo::Regex::Utility::parseStdRegex(extractor.regex())),
has_replacement_text_(extractor.has_replacement_text()),
replacement_text_(extractor.replacement_text().value()),
replace_all_(extractor.replace_all()) {
mode_(static_cast<Mode>(extractor.mode())) {
// mark count == number of sub groups, and we need to add one for match number
// 0 so we test for < instead of <= see:
// http://www.cplusplus.com/reference/regex/basic_regex/mark_count/
Expand All @@ -69,24 +69,27 @@ Extractor::Extractor(const envoy::api::v2::filter::http::Extraction &extractor)
group_, extract_regex_.mark_count()));
}

// if replace_all is set, we must have replacement text
if (replace_all_ && !has_replacement_text_) {
throw EnvoyException(
fmt::format("replace_all set but no replacement text provided"));
}

// if replace_all is set, subgroup should be 0
if (replace_all_ && group_ != 0) {
throw EnvoyException(
fmt::format("replace_all set but subgroup is not 0"));
}

// extractionFunc is either replaceValue or extractValue depending on whether
// replacement_text_ is empty or not
if (has_replacement_text_) {
extraction_func_ = std::bind(&Extractor::replaceValue, this, _1, _2);
} else {
extraction_func_ = std::bind(&Extractor::extractValue, this, _1, _2);
switch (mode_) {
case Mode::EXTRACT:
extraction_func_ = std::bind(&Extractor::extractValue, this, _1, _2);
break;
case Mode::SINGLE_REPLACE:
if (!has_replacement_text_) {
throw EnvoyException("SINGLE_REPLACE mode set but no replacement text provided");
}
extraction_func_ = std::bind(&Extractor::replaceIndividualValue, this, _1, _2);
break;
case Mode::REPLACE_ALL:
if (!has_replacement_text_) {
throw EnvoyException("REPLACE_ALL mode set but no replacement text provided");
}
if (group_ != 0) {
throw EnvoyException("REPLACE_ALL mode set but subgroup is not 0");
}
extraction_func_ = std::bind(&Extractor::replaceAllValues, this, _1, _2);
break;
default:
throw EnvoyException("Unknown mode");
Copy link
Contributor

@ashishb-solo ashishb-solo Jan 31, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is probably fine, but you could also consider using PANIC_DUE_TO_CORRUPT_ENUM here disregard, i think this macro is intended for other uses

}
}

Expand Down Expand Up @@ -131,18 +134,6 @@ Extractor::extractValue(Http::StreamFilterCallbacks &callbacks,

// Match a regex against the input value and replace the matched subgroup with the replacement_text_ value
// writes the result to replaced_value_ and returns a absl::string_view to it
// if replace_all_ is true, __all__ substrings matching the regex in the input value will be replaced
// with the replacement_text_ value
absl::string_view
Extractor::replaceValue(Http::StreamFilterCallbacks &callbacks,
absl::string_view value) const {
if (replace_all_) {
return replaceAllValues(callbacks, value);
} else {
return replaceIndividualValue(callbacks, value);
}
}

absl::string_view
Extractor::replaceIndividualValue(Http::StreamFilterCallbacks &callbacks,
absl::string_view value) const {
Expand Down Expand Up @@ -181,6 +172,8 @@ Extractor::replaceIndividualValue(Http::StreamFilterCallbacks &callbacks,
return absl::string_view(replaced_value_);
ashishb-solo marked this conversation as resolved.
Show resolved Hide resolved
}

// Match a regex against the input value and replace all instances of the regex with the replacement_text_ value
// writes the result to replaced_value_ and returns a absl::string_view to it
absl::string_view
Extractor::replaceAllValues(Http::StreamFilterCallbacks &callbacks,
absl::string_view value) const {
Expand Down
11 changes: 8 additions & 3 deletions source/extensions/filters/http/transformation/inja_transformer.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,16 @@ class Extractor : Logger::Loggable<Logger::Id::filter> {
const Http::RequestOrResponseHeaderMap &header_map,
GetBodyFunc &body) const;

// Matching enum with the protobuf definition
enum class Mode {
EXTRACT = 0,
SINGLE_REPLACE = 1,
REPLACE_ALL = 2
ben-taussig-solo marked this conversation as resolved.
Show resolved Hide resolved
};

private:
absl::string_view extractValue(Http::StreamFilterCallbacks &callbacks,
absl::string_view value) const;
absl::string_view replaceValue(Http::StreamFilterCallbacks &callbacks,
absl::string_view value) const;
absl::string_view replaceIndividualValue(Http::StreamFilterCallbacks &callbacks,
absl::string_view value) const;
absl::string_view replaceAllValues(Http::StreamFilterCallbacks &callbacks,
Expand All @@ -100,7 +105,7 @@ class Extractor : Logger::Loggable<Logger::Id::filter> {
const std::regex extract_regex_;
const bool has_replacement_text_;
const std::string replacement_text_;
ben-taussig-solo marked this conversation as resolved.
Show resolved Hide resolved
const bool replace_all_;
const Mode mode_;

ExtractionFunc extraction_func_;
mutable std::string replaced_value_;
ashishb-solo marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ TEST(Extraction, ExtractAndReplaceValueFromBodySubgroup) {
extractor.set_subgroup(1);
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::SINGLE_REPLACE);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
Expand All @@ -94,6 +95,7 @@ TEST(Extraction, ExtractAndReplaceValueFromFullBody) {
extractor.set_subgroup(0);
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::SINGLE_REPLACE);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
Expand All @@ -111,9 +113,9 @@ TEST(Extraction, ExtractAndReplaceAllFromFullBody) {
extractor.mutable_body();
extractor.set_regex(".*");
extractor.set_subgroup(0);
extractor.set_replace_all(true);
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::REPLACE_ALL);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
Expand All @@ -138,6 +140,7 @@ TEST(Extraction, AttemptReplaceFromPartialMatch) {
extractor.set_subgroup(0);
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::SINGLE_REPLACE);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
Expand All @@ -160,6 +163,7 @@ TEST(Extraction, AttemptReplaceFromPartialMatchNonNilSubgroup) {
extractor.set_subgroup(1);
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::SINGLE_REPLACE);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
Expand All @@ -181,6 +185,7 @@ TEST(Extraction, ReplaceFromFullLiteralMatch) {
extractor.set_subgroup(0);
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::SINGLE_REPLACE);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
Expand All @@ -199,14 +204,15 @@ TEST(Extraction, AttemptToReplaceFromInvalidSubgroup) {
extractor.set_subgroup(1);
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::SINGLE_REPLACE);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
GetBodyFunc bodyfunc = [&body]() -> const std::string & { return body; };
EXPECT_THROW_WITH_MESSAGE(Extractor(extractor).extract(callbacks, headers, bodyfunc), EnvoyException, "group 1 requested for regex with only 0 sub groups");
}

TEST(Extraction, NestedSubgroups) {
TEST(Extraction, ReplaceInNestedSubgroups) {
Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}};

envoy::api::v2::filter::http::Extraction extractor;
Expand All @@ -215,6 +221,7 @@ TEST(Extraction, NestedSubgroups) {
extractor.set_subgroup(2);
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::SINGLE_REPLACE);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
Expand All @@ -224,7 +231,7 @@ TEST(Extraction, NestedSubgroups) {
EXPECT_EQ("not BAZ body", res);
}

TEST(Extraction, SubgroupUnset) {
TEST(Extraction, ReplaceWithSubgroupUnset) {
Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}};

envoy::api::v2::filter::http::Extraction extractor;
Expand All @@ -233,6 +240,7 @@ TEST(Extraction, SubgroupUnset) {
// subgroup is unset
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::SINGLE_REPLACE);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
Expand All @@ -243,7 +251,7 @@ TEST(Extraction, SubgroupUnset) {
}

// In regular extractor, I expect that this will hit the "this should never happen" block
TEST(Extraction, NoMatch) {
TEST(Extraction, ReplaceNoMatch) {
Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}};

envoy::api::v2::filter::http::Extraction extractor;
Expand All @@ -252,6 +260,7 @@ TEST(Extraction, NoMatch) {
extractor.set_subgroup(0);
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::SINGLE_REPLACE);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
Expand All @@ -270,6 +279,7 @@ TEST(Extraction, NilReplace) {
extractor.set_subgroup(1);
auto replacement_text = "";
extractor.mutable_replacement_text()->set_value(replacement_text);
ben-taussig-solo marked this conversation as resolved.
Show resolved Hide resolved
extractor.set_mode(envoy::api::v2::filter::http::Extraction::SINGLE_REPLACE);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
Expand All @@ -288,6 +298,7 @@ TEST(Extraction, NilReplaceWithSubgroupUnset) {
extractor.set_regex(".*(body)");
auto replacement_text = "";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::SINGLE_REPLACE);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
Expand All @@ -296,7 +307,7 @@ TEST(Extraction, NilReplaceWithSubgroupUnset) {
EXPECT_EQ("", res);
}

TEST(Extraction, HeaderHappyPath) {
TEST(Extraction, HeaderReplaceHappyPath) {
Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}, {"foo", "bar"}};

envoy::api::v2::filter::http::Extraction extractor;
Expand All @@ -305,6 +316,7 @@ TEST(Extraction, HeaderHappyPath) {
extractor.set_subgroup(0);
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::SINGLE_REPLACE);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
Expand All @@ -322,7 +334,7 @@ TEST(Extraction, ReplaceAllWithReplacementTextUnset) {
extractor.mutable_body();
extractor.set_regex("bar");
extractor.set_subgroup(0);
extractor.set_replace_all(true);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::REPLACE_ALL);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("bar bar bar");
Expand All @@ -340,11 +352,11 @@ TEST(Extraction, ReplaceAllWithSubgroupSet) {
// Note that the regex contains enough capture groups
// that this (in theory) could be valid subgroup
extractor.set_subgroup(1);
// However, subgroup needs to be unset (i.e., 0) for replace all to work
// so this config should be rejected
extractor.set_replace_all(true);
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
// However, subgroup needs to be unset (i.e., 0) for replace all to work
// so this config should be rejected
extractor.set_mode(envoy::api::v2::filter::http::Extraction::REPLACE_ALL);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("bar bar bar");
Expand All @@ -360,9 +372,9 @@ TEST(Extraction, ReplaceAllHappyPath) {
extractor.mutable_body();
extractor.set_regex("bar");
extractor.set_subgroup(0);
extractor.set_replace_all(true);
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::REPLACE_ALL);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("bar bar bar");
Expand All @@ -384,6 +396,7 @@ TEST(Extraction, IndividualReplaceIdentity) {
extractor.set_subgroup(1);
auto replacement_text = "bar";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::SINGLE_REPLACE);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("bar bar bar");
Expand All @@ -401,9 +414,9 @@ TEST(Extraction, ReplaceAllIdentity) {
extractor.mutable_body();
extractor.set_regex("bar");
extractor.set_subgroup(0);
extractor.set_replace_all(true);
auto replacement_text = "bar";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::REPLACE_ALL);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("bar bar bar");
Expand All @@ -423,9 +436,9 @@ TEST(Extraction, ReplaceAllNoMatch) {
extractor.mutable_body();
extractor.set_regex("this will not match the input string");
extractor.set_subgroup(0);
extractor.set_replace_all(true);
auto replacement_text = "BAZ";
extractor.mutable_replacement_text()->set_value(replacement_text);
extractor.set_mode(envoy::api::v2::filter::http::Extraction::REPLACE_ALL);

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
std::string body("not json body");
Expand Down
Loading
Loading