From ca541f4d56dba55d235fe52a0bba066cfd7367e5 Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Sat, 17 Jan 2026 01:07:54 +0530 Subject: [PATCH 1/6] feat: add CometRight serializer for RIGHT expression --- .../org/apache/comet/serde/strings.scala | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index 15f4b238f2..4ad9f335d8 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -21,13 +21,14 @@ package org.apache.comet.serde import java.util.Locale -import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Concat, Expression, InitCap, Length, Like, Literal, Lower, RegExpReplace, RLike, StringLPad, StringRepeat, StringRPad, Substring, Upper} +import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Concat, Expression, InitCap, Length, Like, Literal, Lower, RegExpReplace, Right, RLike, StringLPad, StringRepeat, StringRPad, Substring, Upper} import org.apache.spark.sql.types.{BinaryType, DataTypes, LongType, StringType} import org.apache.comet.CometConf import org.apache.comet.CometSparkSessionExtensions.withInfo import org.apache.comet.expressions.{CometCast, CometEvalMode, RegExp} import org.apache.comet.serde.ExprOuterClass.Expr +import org.apache.comet.serde.LiteralOuterClass import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr, exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} object CometStringRepeat extends CometExpressionSerde[StringRepeat] { @@ -113,6 +114,44 @@ object CometSubstring extends CometExpressionSerde[Substring] { } } +object CometRight extends CometExpressionSerde[Right] { + + override def convert(expr: Right, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = { + expr.len match { + case Literal(lenValue, _) => + val lenInt = lenValue.asInstanceOf[Int] + if (lenInt <= 0) { + val literalBuilder = LiteralOuterClass.Literal.newBuilder() + literalBuilder.setIsNull(false) + literalBuilder.setStringVal("") + Some(ExprOuterClass.Expr.newBuilder().setLiteral(literalBuilder).build()) + } else { + exprToProtoInternal(expr.str, inputs, binding) match { + case Some(strExpr) => + val builder = ExprOuterClass.Substring.newBuilder() + builder.setChild(strExpr) + builder.setStart(-lenInt) + builder.setLen(lenInt) + Some(ExprOuterClass.Expr.newBuilder().setSubstring(builder).build()) + case None => + withInfo(expr, expr.str) + None + } + } + case _ => + withInfo(expr, "RIGHT len must be a literal") + None + } + } + + override def getSupportLevel(expr: Right): SupportLevel = { + expr.str.dataType match { + case _: StringType => Compatible() + case _ => Unsupported(Some(s"RIGHT does not support ${expr.str.dataType}")) + } + } +} + object CometConcat extends CometScalarFunction[Concat]("concat") { val unsupportedReason = "CONCAT supports only string input parameters" From c37a72e7ceeded7d0dcca5df8ec4f94a5e39e4ef Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Sat, 17 Jan 2026 01:07:54 +0530 Subject: [PATCH 2/6] feat: register Right expression in QueryPlanSerde --- spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index e50b1d80e6..a5ce6ee522 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -170,6 +170,7 @@ object QueryPlanSerde extends Logging with CometExprShim { classOf[StringTrimBoth] -> CometScalarFunction("btrim"), classOf[StringTrimLeft] -> CometScalarFunction("ltrim"), classOf[StringTrimRight] -> CometScalarFunction("rtrim"), + classOf[Right] -> CometRight, classOf[Substring] -> CometSubstring, classOf[Upper] -> CometUpper) From 53486fd4d4a381013f45407123a3c6aa96054421 Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Sat, 17 Jan 2026 01:07:55 +0530 Subject: [PATCH 3/6] test: add comprehensive tests for RIGHT function --- .../apache/comet/CometExpressionSuite.scala | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 0352da7850..93a7c1b701 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -506,6 +506,52 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } + test("RIGHT function") { + withParquetTable((0 until 10).map(i => (s"test$i", i)), "tbl") { + checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 2) FROM tbl") + checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 4) FROM tbl") + checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM tbl") + checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, -1) FROM tbl") + checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 100) FROM tbl") + checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), 2) FROM tbl LIMIT 1") + } + } + + test("RIGHT function with unicode") { + val data = Seq("café", "hello世界", "😀emoji", "తెలుగు") + withParquetTable(data.zipWithIndex, "unicode_tbl") { + checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 2) FROM unicode_tbl") + checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 3) FROM unicode_tbl") + checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM unicode_tbl") + } + } + + test("RIGHT function equivalence with SUBSTRING negative pos") { + withParquetTable((0 until 20).map(i => Tuple1(s"test$i")), "equiv_tbl") { + val df = spark.sql(""" + SELECT _1, + RIGHT(_1, 3) as right_result, + SUBSTRING(_1, -3, 3) as substring_result + FROM equiv_tbl + """) + checkAnswer( + df.filter( + "right_result != substring_result OR " + + "(right_result IS NULL AND substring_result IS NOT NULL) OR " + + "(right_result IS NOT NULL AND substring_result IS NULL)"), + Seq.empty) + } + } + + test("RIGHT function with dictionary") { + val data = (0 until 1000) + .map(_ % 5) + .map(i => s"value$i") + withParquetTable(data.zipWithIndex, "dict_tbl") { + checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 3) FROM dict_tbl") + } + } + test("hour, minute, second") { Seq(true, false).foreach { dictionaryEnabled => withTempDir { dir => From 2783434cef0c4b7fa6f80e516aae18fb716fcf7a Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Wed, 21 Jan 2026 23:58:13 +0530 Subject: [PATCH 4/6] chore: remove unused LiteralOuterClass import --- spark/src/main/scala/org/apache/comet/serde/strings.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index 4ad9f335d8..02accf6865 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -28,7 +28,6 @@ import org.apache.comet.CometConf import org.apache.comet.CometSparkSessionExtensions.withInfo import org.apache.comet.expressions.{CometCast, CometEvalMode, RegExp} import org.apache.comet.serde.ExprOuterClass.Expr -import org.apache.comet.serde.LiteralOuterClass import org.apache.comet.serde.QueryPlanSerde.{createBinaryExpr, exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto} object CometStringRepeat extends CometExpressionSerde[StringRepeat] { From e6666d15ea0f9331c9b22b41826346531caaae8e Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Thu, 22 Jan 2026 00:07:32 +0530 Subject: [PATCH 5/6] docs: update generated configs for Right expression --- docs/source/user-guide/latest/compatibility.md | 9 +++------ docs/source/user-guide/latest/configs.md | 1 + 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/source/user-guide/latest/compatibility.md b/docs/source/user-guide/latest/compatibility.md index 0ca6f8ea97..48c3601390 100644 --- a/docs/source/user-guide/latest/compatibility.md +++ b/docs/source/user-guide/latest/compatibility.md @@ -105,7 +105,6 @@ Cast operations in Comet fall into three levels of support: **Notes:** - - **decimal -> string**: There can be formatting differences in some case due to Spark using scientific notation where Comet does not - **double -> decimal**: There can be rounding differences - **double -> string**: There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 @@ -113,7 +112,7 @@ Cast operations in Comet fall into three levels of support: - **float -> string**: There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 - **string -> date**: Only supports years between 262143 BC and 262142 AD - **string -> decimal**: Does not support fullwidth unicode digits (e.g \\uFF10) - or strings containing null bytes (e.g \\u0000) +or strings containing null bytes (e.g \\u0000) - **string -> timestamp**: Not all valid formats are supported @@ -140,7 +139,6 @@ Cast operations in Comet fall into three levels of support: **Notes:** - - **decimal -> string**: There can be formatting differences in some case due to Spark using scientific notation where Comet does not - **double -> decimal**: There can be rounding differences - **double -> string**: There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 @@ -148,7 +146,7 @@ Cast operations in Comet fall into three levels of support: - **float -> string**: There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 - **string -> date**: Only supports years between 262143 BC and 262142 AD - **string -> decimal**: Does not support fullwidth unicode digits (e.g \\uFF10) - or strings containing null bytes (e.g \\u0000) +or strings containing null bytes (e.g \\u0000) - **string -> timestamp**: Not all valid formats are supported @@ -175,7 +173,6 @@ Cast operations in Comet fall into three levels of support: **Notes:** - - **decimal -> string**: There can be formatting differences in some case due to Spark using scientific notation where Comet does not - **double -> decimal**: There can be rounding differences - **double -> string**: There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 @@ -183,7 +180,7 @@ Cast operations in Comet fall into three levels of support: - **float -> string**: There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 - **string -> date**: Only supports years between 262143 BC and 262142 AD - **string -> decimal**: Does not support fullwidth unicode digits (e.g \\uFF10) - or strings containing null bytes (e.g \\u0000) +or strings containing null bytes (e.g \\u0000) - **string -> timestamp**: ANSI mode not supported diff --git a/docs/source/user-guide/latest/configs.md b/docs/source/user-guide/latest/configs.md index 1a273ad033..4f11cca93a 100644 --- a/docs/source/user-guide/latest/configs.md +++ b/docs/source/user-guide/latest/configs.md @@ -297,6 +297,7 @@ These settings can be used to determine which parts of the plan are accelerated | `spark.comet.expression.RegExpReplace.enabled` | Enable Comet acceleration for `RegExpReplace` | true | | `spark.comet.expression.Remainder.enabled` | Enable Comet acceleration for `Remainder` | true | | `spark.comet.expression.Reverse.enabled` | Enable Comet acceleration for `Reverse` | true | +| `spark.comet.expression.Right.enabled` | Enable Comet acceleration for `Right` | true | | `spark.comet.expression.Round.enabled` | Enable Comet acceleration for `Round` | true | | `spark.comet.expression.ScalarSubquery.enabled` | Enable Comet acceleration for `ScalarSubquery` | true | | `spark.comet.expression.Second.enabled` | Enable Comet acceleration for `Second` | true | From dc39f924ed83ec6588b959d3fc04e82f57e285d4 Mon Sep 17 00:00:00 2001 From: shekharrajak Date: Thu, 22 Jan 2026 13:02:33 +0530 Subject: [PATCH 6/6] fix: RIGHT expression NULL handling for len <= 0 --- .../org/apache/comet/serde/strings.scala | 16 ++++++++---- .../apache/comet/CometExpressionSuite.scala | 25 +++++++++++++++++++ 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index 02accf6865..f7cfec783f 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -21,8 +21,9 @@ package org.apache.comet.serde import java.util.Locale -import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Concat, Expression, InitCap, Length, Like, Literal, Lower, RegExpReplace, Right, RLike, StringLPad, StringRepeat, StringRPad, Substring, Upper} +import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Concat, Expression, If, InitCap, IsNull, Length, Like, Literal, Lower, RegExpReplace, Right, RLike, StringLPad, StringRepeat, StringRPad, Substring, Upper} import org.apache.spark.sql.types.{BinaryType, DataTypes, LongType, StringType} +import org.apache.spark.unsafe.types.UTF8String import org.apache.comet.CometConf import org.apache.comet.CometSparkSessionExtensions.withInfo @@ -120,10 +121,15 @@ object CometRight extends CometExpressionSerde[Right] { case Literal(lenValue, _) => val lenInt = lenValue.asInstanceOf[Int] if (lenInt <= 0) { - val literalBuilder = LiteralOuterClass.Literal.newBuilder() - literalBuilder.setIsNull(false) - literalBuilder.setStringVal("") - Some(ExprOuterClass.Expr.newBuilder().setLiteral(literalBuilder).build()) + // Match Spark's behavior: If(IsNull(str), NULL, "") + // This ensures NULL propagation: RIGHT(NULL, 0) -> NULL, RIGHT("hello", 0) -> "" + val isNullExpr = IsNull(expr.str) + val nullLiteral = Literal.create(null, StringType) + val emptyStringLiteral = Literal(UTF8String.EMPTY_UTF8, StringType) + val ifExpr = If(isNullExpr, nullLiteral, emptyStringLiteral) + + // Serialize the If expression using existing infrastructure + exprToProtoInternal(ifExpr, inputs, binding) } else { exprToProtoInternal(expr.str, inputs, binding) match { case Some(strExpr) => diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 93a7c1b701..7317abf985 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -552,6 +552,31 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } + test("RIGHT function NULL handling") { + // Test NULL propagation with len = 0 (critical edge case) + withParquetTable((0 until 5).map(i => (s"test$i", i)), "null_tbl") { + checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), 0) FROM null_tbl LIMIT 1") + checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), -1) FROM null_tbl LIMIT 1") + checkSparkAnswerAndOperator("SELECT RIGHT(CAST(NULL AS STRING), -5) FROM null_tbl LIMIT 1") + } + + // Test non-NULL strings with len <= 0 (should return empty string) + withParquetTable((0 until 5).map(i => (s"test$i", i)), "edge_tbl") { + checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, 0) FROM edge_tbl") + checkSparkAnswerAndOperator("SELECT _1, RIGHT(_1, -1) FROM edge_tbl") + } + + // Test mixed NULL and non-NULL values with a table + val table = "right_null_edge" + withTable(table) { + sql(s"create table $table(str string) using parquet") + sql(s"insert into $table values('hello'), (NULL), (''), ('world')") + checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, 0) FROM $table") + checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, -1) FROM $table") + checkSparkAnswerAndOperator(s"SELECT str, RIGHT(str, 2) FROM $table") + } + } + test("hour, minute, second") { Seq(true, false).foreach { dictionaryEnabled => withTempDir { dir =>