From 857d9d2e6742377779bb89c2a882c349ea827a82 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Fri, 25 Oct 2024 18:51:52 +0800 Subject: [PATCH] Support invalid partToExtract for parse_url Signed-off-by: Haoyang Li --- integration_tests/src/main/python/url_test.py | 2 +- .../main/scala/com/nvidia/spark/rapids/GpuOverrides.scala | 2 +- .../scala/org/apache/spark/sql/rapids/GpuParseUrl.scala | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/integration_tests/src/main/python/url_test.py b/integration_tests/src/main/python/url_test.py index 9d601c72675..e1bf9c821a8 100644 --- a/integration_tests/src/main/python/url_test.py +++ b/integration_tests/src/main/python/url_test.py @@ -148,7 +148,7 @@ url_gen = StringGen(url_pattern) -supported_parts = ['PROTOCOL', 'HOST', 'QUERY', 'PATH'] +supported_parts = ['PROTOCOL', 'HOST', 'QUERY', 'PATH', 'invalid', 'path'] unsupported_parts = ['REF', 'FILE', 'AUTHORITY', 'USERINFO'] @pytest.mark.parametrize('data_gen', [url_gen, edge_cases_gen], ids=idfn) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 7a01329fef1..19aff51664a 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -3379,7 +3379,7 @@ object GpuOverrides extends Logging { willNotWorkOnGpu("Fail on error is not supported on GPU when parsing urls.") } - extractStringLit(a.children(1)).map(_.toUpperCase) match { + extractStringLit(a.children(1)) match { // In Spark, the key in parse_url could act like a regex, but GPU will match the key // exactly. When key is literal, GPU will check if the key contains regex special and // fallbcak to CPU if it does, but we are not able to fallback when key is column. diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuParseUrl.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuParseUrl.scala index 20f0181c7e1..8874d2a1904 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuParseUrl.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuParseUrl.scala @@ -40,10 +40,10 @@ object GpuParseUrl { def isSupportedPart(part: String): Boolean = { part match { - case PROTOCOL | HOST | QUERY | PATH => - true - case _ => + case REF | FILE | AUTHORITY | USERINFO => false + case _ => // PROTOCOL, HOST, QUERY, PATH and invalid parts are supported + true } } } @@ -73,7 +73,7 @@ case class GpuParseUrl(children: Seq[Expression]) throw new UnsupportedOperationException(s"$this is not supported partToExtract=$part. " + s"Only PROTOCOL, HOST, QUERY and PATH are supported") case _ => - throw new IllegalArgumentException(s"Invalid partToExtract: $partToExtract") + return GpuColumnVector.columnVectorFromNull(url.getRowCount.toInt, StringType) } }