Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sqlglot/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7370,6 +7370,10 @@ class Median(AggFunc):
pass


class Mode(AggFunc):
Copy link
Collaborator

@geooo109 geooo109 Dec 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is supported by other dialects.

For example: Databricks mode supports up to 2 arguments:
mode(expr [, deterministic ])

We should verify if other databases support this function (from their docs online), and find a common representation for all of them. Then add parsing tests for each of them validate_identity or validate_all.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to support 3 patterns:

Pattern 1: Simple Aggregate

Mode(this=category, deterministic=None)
-> Snowflake: MODE(category)

Pattern 2: With Deterministic Parameter

Mode(this=category, deterministic=TRUE/FALSE)
-> Databricks/Spark: mode(category, true)

Pattern 3: Empty Function with WITHIN GROUP

WithinGroup(
this=Mode(this=None, deterministic=None),
expression=Order(expressions=[Ordered(this=category)])
)
-> PostgreSQL: mode() WITHIN GROUP (ORDER BY category)

arg_types = {"this": False, "deterministic": False}


class Min(AggFunc):
arg_types = {"this": True, "expressions": False}
is_var_len_args = True
Expand Down
1 change: 1 addition & 0 deletions sqlglot/typing/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ def _annotate_math_with_float_decfloat(
exp.DateTrunc,
exp.Floor,
exp.Left,
exp.Mode,
exp.Pad,
exp.Right,
exp.Round,
Expand Down
2 changes: 2 additions & 0 deletions tests/dialects/test_databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def test_databricks(self):
self.validate_identity("SELECT BITMAP_BUCKET_NUMBER(32769)")
self.validate_identity("SELECT BITMAP_CONSTRUCT_AGG(value)")
self.validate_identity("SELECT EXP(1)")
self.validate_identity("SELECT MODE(category)")
self.validate_identity("SELECT MODE(price, TRUE) AS deterministic_mode FROM products")
self.validate_identity("REGEXP_LIKE(x, y)")
self.validate_identity("SELECT CAST(NULL AS VOID)")
self.validate_identity("SELECT void FROM t")
Expand Down
1 change: 1 addition & 0 deletions tests/dialects/test_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def test_duckdb(self):
)
self.validate_identity("SELECT str[0:1]")
self.validate_identity("SELECT COSH(1.5)")
self.validate_identity("SELECT MODE(category)")
with self.assertRaises(ParseError):
parse_one("1 //", read="duckdb")

Expand Down
3 changes: 3 additions & 0 deletions tests/dialects/test_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ def test_postgres(self):
self.validate_identity("SELECT * FROM t GROUP BY ROLLUP (a || '^' || b)")
self.validate_identity("SELECT COSH(1.5)")
self.validate_identity("SELECT EXP(1)")
self.validate_identity(
"SELECT MODE() WITHIN GROUP (ORDER BY status DESC) AS most_common FROM orders"
)
self.validate_identity("SELECT ST_DISTANCE(gg1, gg2, FALSE) AS sphere_dist")
self.validate_identity("SHA384(x)")
self.validate_identity("1.x", "1. AS x")
Expand Down
2 changes: 2 additions & 0 deletions tests/dialects/test_snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ def test_snowflake(self):
self.validate_identity("SELECT MAX(x)")
self.validate_identity("SELECT COUNT(x)")
self.validate_identity("SELECT MIN(amount)")
self.validate_identity("SELECT MODE(x)")
self.validate_identity("SELECT MODE(status) OVER (PARTITION BY region) FROM orders")
self.validate_identity("SELECT TAN(x)")
self.validate_identity("SELECT COS(x)")
self.validate_identity("SELECT SINH(1.5)")
Expand Down
4 changes: 3 additions & 1 deletion tests/dialects/test_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ def test_ddl(self):
self.validate_identity("DAYOFMONTH(TO_DATE(x))")
self.validate_identity("DAYOFYEAR(TO_DATE(x))")
self.validate_identity("WEEKOFYEAR(TO_DATE(x))")

self.validate_identity("SELECT MODE(category)")
self.validate_identity("SELECT MODE(price, TRUE) AS deterministic_mode FROM products")
Comment on lines +17 to +18
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto. Also, instead of repeating tests in each file we can also put the common ones in identity_function.sql such as MODE(x)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the problem is that Postgres only seems to support semantics like "SELECT MODE() WITHIN GROUP (ORDER BY category) FROM table1", not "SELECT MODE(x)"

self.validate_identity("SELECT MODE() WITHIN GROUP (ORDER BY status) FROM orders")
self.validate_identity("DROP NAMESPACE my_catalog.my_namespace")
self.validate_identity("CREATE NAMESPACE my_catalog.my_namespace")
self.validate_identity("INSERT OVERWRITE TABLE db1.tb1 TABLE db2.tb2")
Expand Down
32 changes: 32 additions & 0 deletions tests/fixtures/optimizer/annotate_functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3984,6 +3984,38 @@ OBJECT;
XMLGET(PARSE_XML('<root><item>a</item><item>b</item></root>'), 'item', 1);
OBJECT;

# dialect: snowflake
MODE(tbl.double_col);
DOUBLE;

# dialect: snowflake
MODE(tbl.date_col);
DATE;

# dialect: snowflake
MODE(tbl.timestamp_col);
TIMESTAMP;

# dialect: snowflake
MODE(tbl.bool_col);
BOOLEAN;

# dialect: snowflake
MODE(CAST(100 AS DECIMAL(10,2)));
DECIMAL(10, 2);

# dialect: snowflake
MODE(tbl.bigint_col) OVER (PARTITION BY 1);
BIGINT;

# dialect: snowflake
MODE(CAST(NULL AS INT));
INT;

# dialect: snowflake
MODE(tbl.str_col) OVER (PARTITION BY tbl.int_col);
VARCHAR;

--------------------------------------
-- T-SQL
--------------------------------------
Expand Down