Edit on GitHub

sqlglot.dialects.dialect

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from enum import Enum, auto
   6from functools import reduce
   7
   8from sqlglot import exp
   9from sqlglot.errors import ParseError
  10from sqlglot.generator import Generator
  11from sqlglot.helper import AutoName, flatten, is_int, seq_get
  12from sqlglot.jsonpath import parse as parse_json_path
  13from sqlglot.parser import Parser
  14from sqlglot.time import TIMEZONES, format_time
  15from sqlglot.tokens import Token, Tokenizer, TokenType
  16from sqlglot.trie import new_trie
  17
  18DATE_ADD_OR_DIFF = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateDiff, exp.TsOrDsDiff]
  19DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateSub]
  20JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar]
  21
  22
  23if t.TYPE_CHECKING:
  24    from sqlglot._typing import B, E, F
  25
  26logger = logging.getLogger("sqlglot")
  27
  28UNESCAPED_SEQUENCES = {
  29    "\\a": "\a",
  30    "\\b": "\b",
  31    "\\f": "\f",
  32    "\\n": "\n",
  33    "\\r": "\r",
  34    "\\t": "\t",
  35    "\\v": "\v",
  36    "\\\\": "\\",
  37}
  38
  39
  40class Dialects(str, Enum):
  41    """Dialects supported by SQLGLot."""
  42
  43    DIALECT = ""
  44
  45    ATHENA = "athena"
  46    BIGQUERY = "bigquery"
  47    CLICKHOUSE = "clickhouse"
  48    DATABRICKS = "databricks"
  49    DORIS = "doris"
  50    DRILL = "drill"
  51    DUCKDB = "duckdb"
  52    HIVE = "hive"
  53    MATERIALIZE = "materialize"
  54    MYSQL = "mysql"
  55    ORACLE = "oracle"
  56    POSTGRES = "postgres"
  57    PRESTO = "presto"
  58    PRQL = "prql"
  59    REDSHIFT = "redshift"
  60    RISINGWAVE = "risingwave"
  61    SNOWFLAKE = "snowflake"
  62    SPARK = "spark"
  63    SPARK2 = "spark2"
  64    SQLITE = "sqlite"
  65    STARROCKS = "starrocks"
  66    TABLEAU = "tableau"
  67    TERADATA = "teradata"
  68    TRINO = "trino"
  69    TSQL = "tsql"
  70
  71
  72class NormalizationStrategy(str, AutoName):
  73    """Specifies the strategy according to which identifiers should be normalized."""
  74
  75    LOWERCASE = auto()
  76    """Unquoted identifiers are lowercased."""
  77
  78    UPPERCASE = auto()
  79    """Unquoted identifiers are uppercased."""
  80
  81    CASE_SENSITIVE = auto()
  82    """Always case-sensitive, regardless of quotes."""
  83
  84    CASE_INSENSITIVE = auto()
  85    """Always case-insensitive, regardless of quotes."""
  86
  87
  88class _Dialect(type):
  89    classes: t.Dict[str, t.Type[Dialect]] = {}
  90
  91    def __eq__(cls, other: t.Any) -> bool:
  92        if cls is other:
  93            return True
  94        if isinstance(other, str):
  95            return cls is cls.get(other)
  96        if isinstance(other, Dialect):
  97            return cls is type(other)
  98
  99        return False
 100
 101    def __hash__(cls) -> int:
 102        return hash(cls.__name__.lower())
 103
 104    @classmethod
 105    def __getitem__(cls, key: str) -> t.Type[Dialect]:
 106        return cls.classes[key]
 107
 108    @classmethod
 109    def get(
 110        cls, key: str, default: t.Optional[t.Type[Dialect]] = None
 111    ) -> t.Optional[t.Type[Dialect]]:
 112        return cls.classes.get(key, default)
 113
 114    def __new__(cls, clsname, bases, attrs):
 115        klass = super().__new__(cls, clsname, bases, attrs)
 116        enum = Dialects.__members__.get(clsname.upper())
 117        cls.classes[enum.value if enum is not None else clsname.lower()] = klass
 118
 119        klass.TIME_TRIE = new_trie(klass.TIME_MAPPING)
 120        klass.FORMAT_TRIE = (
 121            new_trie(klass.FORMAT_MAPPING) if klass.FORMAT_MAPPING else klass.TIME_TRIE
 122        )
 123        klass.INVERSE_TIME_MAPPING = {v: k for k, v in klass.TIME_MAPPING.items()}
 124        klass.INVERSE_TIME_TRIE = new_trie(klass.INVERSE_TIME_MAPPING)
 125
 126        base = seq_get(bases, 0)
 127        base_tokenizer = (getattr(base, "tokenizer_class", Tokenizer),)
 128        base_parser = (getattr(base, "parser_class", Parser),)
 129        base_generator = (getattr(base, "generator_class", Generator),)
 130
 131        klass.tokenizer_class = klass.__dict__.get(
 132            "Tokenizer", type("Tokenizer", base_tokenizer, {})
 133        )
 134        klass.parser_class = klass.__dict__.get("Parser", type("Parser", base_parser, {}))
 135        klass.generator_class = klass.__dict__.get(
 136            "Generator", type("Generator", base_generator, {})
 137        )
 138
 139        klass.QUOTE_START, klass.QUOTE_END = list(klass.tokenizer_class._QUOTES.items())[0]
 140        klass.IDENTIFIER_START, klass.IDENTIFIER_END = list(
 141            klass.tokenizer_class._IDENTIFIERS.items()
 142        )[0]
 143
 144        def get_start_end(token_type: TokenType) -> t.Tuple[t.Optional[str], t.Optional[str]]:
 145            return next(
 146                (
 147                    (s, e)
 148                    for s, (e, t) in klass.tokenizer_class._FORMAT_STRINGS.items()
 149                    if t == token_type
 150                ),
 151                (None, None),
 152            )
 153
 154        klass.BIT_START, klass.BIT_END = get_start_end(TokenType.BIT_STRING)
 155        klass.HEX_START, klass.HEX_END = get_start_end(TokenType.HEX_STRING)
 156        klass.BYTE_START, klass.BYTE_END = get_start_end(TokenType.BYTE_STRING)
 157        klass.UNICODE_START, klass.UNICODE_END = get_start_end(TokenType.UNICODE_STRING)
 158
 159        if "\\" in klass.tokenizer_class.STRING_ESCAPES:
 160            klass.UNESCAPED_SEQUENCES = {
 161                **UNESCAPED_SEQUENCES,
 162                **klass.UNESCAPED_SEQUENCES,
 163            }
 164
 165        klass.ESCAPED_SEQUENCES = {v: k for k, v in klass.UNESCAPED_SEQUENCES.items()}
 166
 167        klass.SUPPORTS_COLUMN_JOIN_MARKS = "(+)" in klass.tokenizer_class.KEYWORDS
 168
 169        if enum not in ("", "bigquery"):
 170            klass.generator_class.SELECT_KINDS = ()
 171
 172        if enum not in ("", "athena", "presto", "trino"):
 173            klass.generator_class.TRY_SUPPORTED = False
 174            klass.generator_class.SUPPORTS_UESCAPE = False
 175
 176        if enum not in ("", "databricks", "hive", "spark", "spark2"):
 177            modifier_transforms = klass.generator_class.AFTER_HAVING_MODIFIER_TRANSFORMS.copy()
 178            for modifier in ("cluster", "distribute", "sort"):
 179                modifier_transforms.pop(modifier, None)
 180
 181            klass.generator_class.AFTER_HAVING_MODIFIER_TRANSFORMS = modifier_transforms
 182
 183        if enum not in ("", "doris", "mysql"):
 184            klass.parser_class.ID_VAR_TOKENS = klass.parser_class.ID_VAR_TOKENS | {
 185                TokenType.STRAIGHT_JOIN,
 186            }
 187            klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | {
 188                TokenType.STRAIGHT_JOIN,
 189            }
 190
 191        if not klass.SUPPORTS_SEMI_ANTI_JOIN:
 192            klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | {
 193                TokenType.ANTI,
 194                TokenType.SEMI,
 195            }
 196
 197        return klass
 198
 199
 200class Dialect(metaclass=_Dialect):
 201    INDEX_OFFSET = 0
 202    """The base index offset for arrays."""
 203
 204    WEEK_OFFSET = 0
 205    """First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday."""
 206
 207    UNNEST_COLUMN_ONLY = False
 208    """Whether `UNNEST` table aliases are treated as column aliases."""
 209
 210    ALIAS_POST_TABLESAMPLE = False
 211    """Whether the table alias comes after tablesample."""
 212
 213    TABLESAMPLE_SIZE_IS_PERCENT = False
 214    """Whether a size in the table sample clause represents percentage."""
 215
 216    NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE
 217    """Specifies the strategy according to which identifiers should be normalized."""
 218
 219    IDENTIFIERS_CAN_START_WITH_DIGIT = False
 220    """Whether an unquoted identifier can start with a digit."""
 221
 222    DPIPE_IS_STRING_CONCAT = True
 223    """Whether the DPIPE token (`||`) is a string concatenation operator."""
 224
 225    STRICT_STRING_CONCAT = False
 226    """Whether `CONCAT`'s arguments must be strings."""
 227
 228    SUPPORTS_USER_DEFINED_TYPES = True
 229    """Whether user-defined data types are supported."""
 230
 231    SUPPORTS_SEMI_ANTI_JOIN = True
 232    """Whether `SEMI` or `ANTI` joins are supported."""
 233
 234    SUPPORTS_COLUMN_JOIN_MARKS = False
 235    """Whether the old-style outer join (+) syntax is supported."""
 236
 237    COPY_PARAMS_ARE_CSV = True
 238    """Separator of COPY statement parameters."""
 239
 240    NORMALIZE_FUNCTIONS: bool | str = "upper"
 241    """
 242    Determines how function names are going to be normalized.
 243    Possible values:
 244        "upper" or True: Convert names to uppercase.
 245        "lower": Convert names to lowercase.
 246        False: Disables function name normalization.
 247    """
 248
 249    LOG_BASE_FIRST: t.Optional[bool] = True
 250    """
 251    Whether the base comes first in the `LOG` function.
 252    Possible values: `True`, `False`, `None` (two arguments are not supported by `LOG`)
 253    """
 254
 255    NULL_ORDERING = "nulls_are_small"
 256    """
 257    Default `NULL` ordering method to use if not explicitly set.
 258    Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"`
 259    """
 260
 261    TYPED_DIVISION = False
 262    """
 263    Whether the behavior of `a / b` depends on the types of `a` and `b`.
 264    False means `a / b` is always float division.
 265    True means `a / b` is integer division if both `a` and `b` are integers.
 266    """
 267
 268    SAFE_DIVISION = False
 269    """Whether division by zero throws an error (`False`) or returns NULL (`True`)."""
 270
 271    CONCAT_COALESCE = False
 272    """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string."""
 273
 274    HEX_LOWERCASE = False
 275    """Whether the `HEX` function returns a lowercase hexadecimal string."""
 276
 277    DATE_FORMAT = "'%Y-%m-%d'"
 278    DATEINT_FORMAT = "'%Y%m%d'"
 279    TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
 280
 281    TIME_MAPPING: t.Dict[str, str] = {}
 282    """Associates this dialect's time formats with their equivalent Python `strftime` formats."""
 283
 284    # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time
 285    # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE
 286    FORMAT_MAPPING: t.Dict[str, str] = {}
 287    """
 288    Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`.
 289    If empty, the corresponding trie will be constructed off of `TIME_MAPPING`.
 290    """
 291
 292    UNESCAPED_SEQUENCES: t.Dict[str, str] = {}
 293    """Mapping of an escaped sequence (`\\n`) to its unescaped version (`\n`)."""
 294
 295    PSEUDOCOLUMNS: t.Set[str] = set()
 296    """
 297    Columns that are auto-generated by the engine corresponding to this dialect.
 298    For example, such columns may be excluded from `SELECT *` queries.
 299    """
 300
 301    PREFER_CTE_ALIAS_COLUMN = False
 302    """
 303    Some dialects, such as Snowflake, allow you to reference a CTE column alias in the
 304    HAVING clause of the CTE. This flag will cause the CTE alias columns to override
 305    any projection aliases in the subquery.
 306
 307    For example,
 308        WITH y(c) AS (
 309            SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0
 310        ) SELECT c FROM y;
 311
 312        will be rewritten as
 313
 314        WITH y(c) AS (
 315            SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
 316        ) SELECT c FROM y;
 317    """
 318
 319    COPY_PARAMS_ARE_CSV = True
 320    """
 321    Whether COPY statement parameters are separated by comma or whitespace
 322    """
 323
 324    # --- Autofilled ---
 325
 326    tokenizer_class = Tokenizer
 327    parser_class = Parser
 328    generator_class = Generator
 329
 330    # A trie of the time_mapping keys
 331    TIME_TRIE: t.Dict = {}
 332    FORMAT_TRIE: t.Dict = {}
 333
 334    INVERSE_TIME_MAPPING: t.Dict[str, str] = {}
 335    INVERSE_TIME_TRIE: t.Dict = {}
 336
 337    ESCAPED_SEQUENCES: t.Dict[str, str] = {}
 338
 339    # Delimiters for string literals and identifiers
 340    QUOTE_START = "'"
 341    QUOTE_END = "'"
 342    IDENTIFIER_START = '"'
 343    IDENTIFIER_END = '"'
 344
 345    # Delimiters for bit, hex, byte and unicode literals
 346    BIT_START: t.Optional[str] = None
 347    BIT_END: t.Optional[str] = None
 348    HEX_START: t.Optional[str] = None
 349    HEX_END: t.Optional[str] = None
 350    BYTE_START: t.Optional[str] = None
 351    BYTE_END: t.Optional[str] = None
 352    UNICODE_START: t.Optional[str] = None
 353    UNICODE_END: t.Optional[str] = None
 354
 355    DATE_PART_MAPPING = {
 356        "Y": "YEAR",
 357        "YY": "YEAR",
 358        "YYY": "YEAR",
 359        "YYYY": "YEAR",
 360        "YR": "YEAR",
 361        "YEARS": "YEAR",
 362        "YRS": "YEAR",
 363        "MM": "MONTH",
 364        "MON": "MONTH",
 365        "MONS": "MONTH",
 366        "MONTHS": "MONTH",
 367        "D": "DAY",
 368        "DD": "DAY",
 369        "DAYS": "DAY",
 370        "DAYOFMONTH": "DAY",
 371        "DAY OF WEEK": "DAYOFWEEK",
 372        "WEEKDAY": "DAYOFWEEK",
 373        "DOW": "DAYOFWEEK",
 374        "DW": "DAYOFWEEK",
 375        "WEEKDAY_ISO": "DAYOFWEEKISO",
 376        "DOW_ISO": "DAYOFWEEKISO",
 377        "DW_ISO": "DAYOFWEEKISO",
 378        "DAY OF YEAR": "DAYOFYEAR",
 379        "DOY": "DAYOFYEAR",
 380        "DY": "DAYOFYEAR",
 381        "W": "WEEK",
 382        "WK": "WEEK",
 383        "WEEKOFYEAR": "WEEK",
 384        "WOY": "WEEK",
 385        "WY": "WEEK",
 386        "WEEK_ISO": "WEEKISO",
 387        "WEEKOFYEARISO": "WEEKISO",
 388        "WEEKOFYEAR_ISO": "WEEKISO",
 389        "Q": "QUARTER",
 390        "QTR": "QUARTER",
 391        "QTRS": "QUARTER",
 392        "QUARTERS": "QUARTER",
 393        "H": "HOUR",
 394        "HH": "HOUR",
 395        "HR": "HOUR",
 396        "HOURS": "HOUR",
 397        "HRS": "HOUR",
 398        "M": "MINUTE",
 399        "MI": "MINUTE",
 400        "MIN": "MINUTE",
 401        "MINUTES": "MINUTE",
 402        "MINS": "MINUTE",
 403        "S": "SECOND",
 404        "SEC": "SECOND",
 405        "SECONDS": "SECOND",
 406        "SECS": "SECOND",
 407        "MS": "MILLISECOND",
 408        "MSEC": "MILLISECOND",
 409        "MSECS": "MILLISECOND",
 410        "MSECOND": "MILLISECOND",
 411        "MSECONDS": "MILLISECOND",
 412        "MILLISEC": "MILLISECOND",
 413        "MILLISECS": "MILLISECOND",
 414        "MILLISECON": "MILLISECOND",
 415        "MILLISECONDS": "MILLISECOND",
 416        "US": "MICROSECOND",
 417        "USEC": "MICROSECOND",
 418        "USECS": "MICROSECOND",
 419        "MICROSEC": "MICROSECOND",
 420        "MICROSECS": "MICROSECOND",
 421        "USECOND": "MICROSECOND",
 422        "USECONDS": "MICROSECOND",
 423        "MICROSECONDS": "MICROSECOND",
 424        "NS": "NANOSECOND",
 425        "NSEC": "NANOSECOND",
 426        "NANOSEC": "NANOSECOND",
 427        "NSECOND": "NANOSECOND",
 428        "NSECONDS": "NANOSECOND",
 429        "NANOSECS": "NANOSECOND",
 430        "EPOCH_SECOND": "EPOCH",
 431        "EPOCH_SECONDS": "EPOCH",
 432        "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND",
 433        "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND",
 434        "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND",
 435        "TZH": "TIMEZONE_HOUR",
 436        "TZM": "TIMEZONE_MINUTE",
 437        "DEC": "DECADE",
 438        "DECS": "DECADE",
 439        "DECADES": "DECADE",
 440        "MIL": "MILLENIUM",
 441        "MILS": "MILLENIUM",
 442        "MILLENIA": "MILLENIUM",
 443        "C": "CENTURY",
 444        "CENT": "CENTURY",
 445        "CENTS": "CENTURY",
 446        "CENTURIES": "CENTURY",
 447    }
 448
 449    @classmethod
 450    def get_or_raise(cls, dialect: DialectType) -> Dialect:
 451        """
 452        Look up a dialect in the global dialect registry and return it if it exists.
 453
 454        Args:
 455            dialect: The target dialect. If this is a string, it can be optionally followed by
 456                additional key-value pairs that are separated by commas and are used to specify
 457                dialect settings, such as whether the dialect's identifiers are case-sensitive.
 458
 459        Example:
 460            >>> dialect = dialect_class = get_or_raise("duckdb")
 461            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
 462
 463        Returns:
 464            The corresponding Dialect instance.
 465        """
 466
 467        if not dialect:
 468            return cls()
 469        if isinstance(dialect, _Dialect):
 470            return dialect()
 471        if isinstance(dialect, Dialect):
 472            return dialect
 473        if isinstance(dialect, str):
 474            try:
 475                dialect_name, *kv_pairs = dialect.split(",")
 476                kwargs = {k.strip(): v.strip() for k, v in (kv.split("=") for kv in kv_pairs)}
 477            except ValueError:
 478                raise ValueError(
 479                    f"Invalid dialect format: '{dialect}'. "
 480                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
 481                )
 482
 483            result = cls.get(dialect_name.strip())
 484            if not result:
 485                from difflib import get_close_matches
 486
 487                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
 488                if similar:
 489                    similar = f" Did you mean {similar}?"
 490
 491                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
 492
 493            return result(**kwargs)
 494
 495        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
 496
 497    @classmethod
 498    def format_time(
 499        cls, expression: t.Optional[str | exp.Expression]
 500    ) -> t.Optional[exp.Expression]:
 501        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
 502        if isinstance(expression, str):
 503            return exp.Literal.string(
 504                # the time formats are quoted
 505                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
 506            )
 507
 508        if expression and expression.is_string:
 509            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
 510
 511        return expression
 512
 513    def __init__(self, **kwargs) -> None:
 514        normalization_strategy = kwargs.pop("normalization_strategy", None)
 515
 516        if normalization_strategy is None:
 517            self.normalization_strategy = self.NORMALIZATION_STRATEGY
 518        else:
 519            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
 520
 521        self.settings = kwargs
 522
 523    def __eq__(self, other: t.Any) -> bool:
 524        # Does not currently take dialect state into account
 525        return type(self) == other
 526
 527    def __hash__(self) -> int:
 528        # Does not currently take dialect state into account
 529        return hash(type(self))
 530
 531    def normalize_identifier(self, expression: E) -> E:
 532        """
 533        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
 534
 535        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
 536        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
 537        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
 538        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
 539
 540        There are also dialects like Spark, which are case-insensitive even when quotes are
 541        present, and dialects like MySQL, whose resolution rules match those employed by the
 542        underlying operating system, for example they may always be case-sensitive in Linux.
 543
 544        Finally, the normalization behavior of some engines can even be controlled through flags,
 545        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
 546
 547        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
 548        that it can analyze queries in the optimizer and successfully capture their semantics.
 549        """
 550        if (
 551            isinstance(expression, exp.Identifier)
 552            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
 553            and (
 554                not expression.quoted
 555                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
 556            )
 557        ):
 558            expression.set(
 559                "this",
 560                (
 561                    expression.this.upper()
 562                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
 563                    else expression.this.lower()
 564                ),
 565            )
 566
 567        return expression
 568
 569    def case_sensitive(self, text: str) -> bool:
 570        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
 571        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
 572            return False
 573
 574        unsafe = (
 575            str.islower
 576            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
 577            else str.isupper
 578        )
 579        return any(unsafe(char) for char in text)
 580
 581    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
 582        """Checks if text can be identified given an identify option.
 583
 584        Args:
 585            text: The text to check.
 586            identify:
 587                `"always"` or `True`: Always returns `True`.
 588                `"safe"`: Only returns `True` if the identifier is case-insensitive.
 589
 590        Returns:
 591            Whether the given text can be identified.
 592        """
 593        if identify is True or identify == "always":
 594            return True
 595
 596        if identify == "safe":
 597            return not self.case_sensitive(text)
 598
 599        return False
 600
 601    def quote_identifier(self, expression: E, identify: bool = True) -> E:
 602        """
 603        Adds quotes to a given identifier.
 604
 605        Args:
 606            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
 607            identify: If set to `False`, the quotes will only be added if the identifier is deemed
 608                "unsafe", with respect to its characters and this dialect's normalization strategy.
 609        """
 610        if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func):
 611            name = expression.this
 612            expression.set(
 613                "quoted",
 614                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
 615            )
 616
 617        return expression
 618
 619    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
 620        if isinstance(path, exp.Literal):
 621            path_text = path.name
 622            if path.is_number:
 623                path_text = f"[{path_text}]"
 624
 625            try:
 626                return parse_json_path(path_text)
 627            except ParseError as e:
 628                logger.warning(f"Invalid JSON path syntax. {str(e)}")
 629
 630        return path
 631
 632    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
 633        return self.parser(**opts).parse(self.tokenize(sql), sql)
 634
 635    def parse_into(
 636        self, expression_type: exp.IntoType, sql: str, **opts
 637    ) -> t.List[t.Optional[exp.Expression]]:
 638        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
 639
 640    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
 641        return self.generator(**opts).generate(expression, copy=copy)
 642
 643    def transpile(self, sql: str, **opts) -> t.List[str]:
 644        return [
 645            self.generate(expression, copy=False, **opts) if expression else ""
 646            for expression in self.parse(sql)
 647        ]
 648
 649    def tokenize(self, sql: str) -> t.List[Token]:
 650        return self.tokenizer.tokenize(sql)
 651
 652    @property
 653    def tokenizer(self) -> Tokenizer:
 654        if not hasattr(self, "_tokenizer"):
 655            self._tokenizer = self.tokenizer_class(dialect=self)
 656        return self._tokenizer
 657
 658    def parser(self, **opts) -> Parser:
 659        return self.parser_class(dialect=self, **opts)
 660
 661    def generator(self, **opts) -> Generator:
 662        return self.generator_class(dialect=self, **opts)
 663
 664
 665DialectType = t.Union[str, Dialect, t.Type[Dialect], None]
 666
 667
 668def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
 669    return lambda self, expression: self.func(name, *flatten(expression.args.values()))
 670
 671
 672def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
 673    if expression.args.get("accuracy"):
 674        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
 675    return self.func("APPROX_COUNT_DISTINCT", expression.this)
 676
 677
 678def if_sql(
 679    name: str = "IF", false_value: t.Optional[exp.Expression | str] = None
 680) -> t.Callable[[Generator, exp.If], str]:
 681    def _if_sql(self: Generator, expression: exp.If) -> str:
 682        return self.func(
 683            name,
 684            expression.this,
 685            expression.args.get("true"),
 686            expression.args.get("false") or false_value,
 687        )
 688
 689    return _if_sql
 690
 691
 692def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
 693    this = expression.this
 694    if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string:
 695        this.replace(exp.cast(this, exp.DataType.Type.JSON))
 696
 697    return self.binary(expression, "->" if isinstance(expression, exp.JSONExtract) else "->>")
 698
 699
 700def inline_array_sql(self: Generator, expression: exp.Array) -> str:
 701    return f"[{self.expressions(expression, dynamic=True, new_line=True, skip_first=True, skip_last=True)}]"
 702
 703
 704def inline_array_unless_query(self: Generator, expression: exp.Array) -> str:
 705    elem = seq_get(expression.expressions, 0)
 706    if isinstance(elem, exp.Expression) and elem.find(exp.Query):
 707        return self.func("ARRAY", elem)
 708    return inline_array_sql(self, expression)
 709
 710
 711def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
 712    return self.like_sql(
 713        exp.Like(
 714            this=exp.Lower(this=expression.this), expression=exp.Lower(this=expression.expression)
 715        )
 716    )
 717
 718
 719def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
 720    zone = self.sql(expression, "this")
 721    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
 722
 723
 724def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
 725    if expression.args.get("recursive"):
 726        self.unsupported("Recursive CTEs are unsupported")
 727        expression.args["recursive"] = False
 728    return self.with_sql(expression)
 729
 730
 731def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
 732    n = self.sql(expression, "this")
 733    d = self.sql(expression, "expression")
 734    return f"IF(({d}) <> 0, ({n}) / ({d}), NULL)"
 735
 736
 737def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
 738    self.unsupported("TABLESAMPLE unsupported")
 739    return self.sql(expression.this)
 740
 741
 742def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
 743    self.unsupported("PIVOT unsupported")
 744    return ""
 745
 746
 747def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
 748    return self.cast_sql(expression)
 749
 750
 751def no_comment_column_constraint_sql(
 752    self: Generator, expression: exp.CommentColumnConstraint
 753) -> str:
 754    self.unsupported("CommentColumnConstraint unsupported")
 755    return ""
 756
 757
 758def no_map_from_entries_sql(self: Generator, expression: exp.MapFromEntries) -> str:
 759    self.unsupported("MAP_FROM_ENTRIES unsupported")
 760    return ""
 761
 762
 763def str_position_sql(
 764    self: Generator, expression: exp.StrPosition, generate_instance: bool = False
 765) -> str:
 766    this = self.sql(expression, "this")
 767    substr = self.sql(expression, "substr")
 768    position = self.sql(expression, "position")
 769    instance = expression.args.get("instance") if generate_instance else None
 770    position_offset = ""
 771
 772    if position:
 773        # Normalize third 'pos' argument into 'SUBSTR(..) + offset' across dialects
 774        this = self.func("SUBSTR", this, position)
 775        position_offset = f" + {position} - 1"
 776
 777    return self.func("STRPOS", this, substr, instance) + position_offset
 778
 779
 780def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
 781    return (
 782        f"{self.sql(expression, 'this')}.{self.sql(exp.to_identifier(expression.expression.name))}"
 783    )
 784
 785
 786def var_map_sql(
 787    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
 788) -> str:
 789    keys = expression.args["keys"]
 790    values = expression.args["values"]
 791
 792    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
 793        self.unsupported("Cannot convert array columns into map.")
 794        return self.func(map_func_name, keys, values)
 795
 796    args = []
 797    for key, value in zip(keys.expressions, values.expressions):
 798        args.append(self.sql(key))
 799        args.append(self.sql(value))
 800
 801    return self.func(map_func_name, *args)
 802
 803
 804def build_formatted_time(
 805    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
 806) -> t.Callable[[t.List], E]:
 807    """Helper used for time expressions.
 808
 809    Args:
 810        exp_class: the expression class to instantiate.
 811        dialect: target sql dialect.
 812        default: the default format, True being time.
 813
 814    Returns:
 815        A callable that can be used to return the appropriately formatted time expression.
 816    """
 817
 818    def _builder(args: t.List):
 819        return exp_class(
 820            this=seq_get(args, 0),
 821            format=Dialect[dialect].format_time(
 822                seq_get(args, 1)
 823                or (Dialect[dialect].TIME_FORMAT if default is True else default or None)
 824            ),
 825        )
 826
 827    return _builder
 828
 829
 830def time_format(
 831    dialect: DialectType = None,
 832) -> t.Callable[[Generator, exp.UnixToStr | exp.StrToUnix], t.Optional[str]]:
 833    def _time_format(self: Generator, expression: exp.UnixToStr | exp.StrToUnix) -> t.Optional[str]:
 834        """
 835        Returns the time format for a given expression, unless it's equivalent
 836        to the default time format of the dialect of interest.
 837        """
 838        time_format = self.format_time(expression)
 839        return time_format if time_format != Dialect.get_or_raise(dialect).TIME_FORMAT else None
 840
 841    return _time_format
 842
 843
 844def build_date_delta(
 845    exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None
 846) -> t.Callable[[t.List], E]:
 847    def _builder(args: t.List) -> E:
 848        unit_based = len(args) == 3
 849        this = args[2] if unit_based else seq_get(args, 0)
 850        unit = args[0] if unit_based else exp.Literal.string("DAY")
 851        unit = exp.var(unit_mapping.get(unit.name.lower(), unit.name)) if unit_mapping else unit
 852        return exp_class(this=this, expression=seq_get(args, 1), unit=unit)
 853
 854    return _builder
 855
 856
 857def build_date_delta_with_interval(
 858    expression_class: t.Type[E],
 859) -> t.Callable[[t.List], t.Optional[E]]:
 860    def _builder(args: t.List) -> t.Optional[E]:
 861        if len(args) < 2:
 862            return None
 863
 864        interval = args[1]
 865
 866        if not isinstance(interval, exp.Interval):
 867            raise ParseError(f"INTERVAL expression expected but got '{interval}'")
 868
 869        expression = interval.this
 870        if expression and expression.is_string:
 871            expression = exp.Literal.number(expression.this)
 872
 873        return expression_class(this=args[0], expression=expression, unit=unit_to_str(interval))
 874
 875    return _builder
 876
 877
 878def date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc:
 879    unit = seq_get(args, 0)
 880    this = seq_get(args, 1)
 881
 882    if isinstance(this, exp.Cast) and this.is_type("date"):
 883        return exp.DateTrunc(unit=unit, this=this)
 884    return exp.TimestampTrunc(this=this, unit=unit)
 885
 886
 887def date_add_interval_sql(
 888    data_type: str, kind: str
 889) -> t.Callable[[Generator, exp.Expression], str]:
 890    def func(self: Generator, expression: exp.Expression) -> str:
 891        this = self.sql(expression, "this")
 892        interval = exp.Interval(this=expression.expression, unit=unit_to_var(expression))
 893        return f"{data_type}_{kind}({this}, {self.sql(interval)})"
 894
 895    return func
 896
 897
 898def timestamptrunc_sql(zone: bool = False) -> t.Callable[[Generator, exp.TimestampTrunc], str]:
 899    def _timestamptrunc_sql(self: Generator, expression: exp.TimestampTrunc) -> str:
 900        args = [unit_to_str(expression), expression.this]
 901        if zone:
 902            args.append(expression.args.get("zone"))
 903        return self.func("DATE_TRUNC", *args)
 904
 905    return _timestamptrunc_sql
 906
 907
 908def no_timestamp_sql(self: Generator, expression: exp.Timestamp) -> str:
 909    if not expression.expression:
 910        from sqlglot.optimizer.annotate_types import annotate_types
 911
 912        target_type = annotate_types(expression).type or exp.DataType.Type.TIMESTAMP
 913        return self.sql(exp.cast(expression.this, target_type))
 914    if expression.text("expression").lower() in TIMEZONES:
 915        return self.sql(
 916            exp.AtTimeZone(
 917                this=exp.cast(expression.this, exp.DataType.Type.TIMESTAMP),
 918                zone=expression.expression,
 919            )
 920        )
 921    return self.func("TIMESTAMP", expression.this, expression.expression)
 922
 923
 924def locate_to_strposition(args: t.List) -> exp.Expression:
 925    return exp.StrPosition(
 926        this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2)
 927    )
 928
 929
 930def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
 931    return self.func(
 932        "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position")
 933    )
 934
 935
 936def left_to_substring_sql(self: Generator, expression: exp.Left) -> str:
 937    return self.sql(
 938        exp.Substring(
 939            this=expression.this, start=exp.Literal.number(1), length=expression.expression
 940        )
 941    )
 942
 943
 944def right_to_substring_sql(self: Generator, expression: exp.Left) -> str:
 945    return self.sql(
 946        exp.Substring(
 947            this=expression.this,
 948            start=exp.Length(this=expression.this) - exp.paren(expression.expression - 1),
 949        )
 950    )
 951
 952
 953def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str:
 954    return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP))
 955
 956
 957def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
 958    return self.sql(exp.cast(expression.this, exp.DataType.Type.DATE))
 959
 960
 961# Used for Presto and Duckdb which use functions that don't support charset, and assume utf-8
 962def encode_decode_sql(
 963    self: Generator, expression: exp.Expression, name: str, replace: bool = True
 964) -> str:
 965    charset = expression.args.get("charset")
 966    if charset and charset.name.lower() != "utf-8":
 967        self.unsupported(f"Expected utf-8 character set, got {charset}.")
 968
 969    return self.func(name, expression.this, expression.args.get("replace") if replace else None)
 970
 971
 972def min_or_least(self: Generator, expression: exp.Min) -> str:
 973    name = "LEAST" if expression.expressions else "MIN"
 974    return rename_func(name)(self, expression)
 975
 976
 977def max_or_greatest(self: Generator, expression: exp.Max) -> str:
 978    name = "GREATEST" if expression.expressions else "MAX"
 979    return rename_func(name)(self, expression)
 980
 981
 982def count_if_to_sum(self: Generator, expression: exp.CountIf) -> str:
 983    cond = expression.this
 984
 985    if isinstance(expression.this, exp.Distinct):
 986        cond = expression.this.expressions[0]
 987        self.unsupported("DISTINCT is not supported when converting COUNT_IF to SUM")
 988
 989    return self.func("sum", exp.func("if", cond, 1, 0))
 990
 991
 992def trim_sql(self: Generator, expression: exp.Trim) -> str:
 993    target = self.sql(expression, "this")
 994    trim_type = self.sql(expression, "position")
 995    remove_chars = self.sql(expression, "expression")
 996    collation = self.sql(expression, "collation")
 997
 998    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
 999    if not remove_chars and not collation:
1000        return self.trim_sql(expression)
1001
1002    trim_type = f"{trim_type} " if trim_type else ""
1003    remove_chars = f"{remove_chars} " if remove_chars else ""
1004    from_part = "FROM " if trim_type or remove_chars else ""
1005    collation = f" COLLATE {collation}" if collation else ""
1006    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
1007
1008
1009def str_to_time_sql(self: Generator, expression: exp.Expression) -> str:
1010    return self.func("STRPTIME", expression.this, self.format_time(expression))
1011
1012
1013def concat_to_dpipe_sql(self: Generator, expression: exp.Concat) -> str:
1014    return self.sql(reduce(lambda x, y: exp.DPipe(this=x, expression=y), expression.expressions))
1015
1016
1017def concat_ws_to_dpipe_sql(self: Generator, expression: exp.ConcatWs) -> str:
1018    delim, *rest_args = expression.expressions
1019    return self.sql(
1020        reduce(
1021            lambda x, y: exp.DPipe(this=x, expression=exp.DPipe(this=delim, expression=y)),
1022            rest_args,
1023        )
1024    )
1025
1026
1027def regexp_extract_sql(self: Generator, expression: exp.RegexpExtract) -> str:
1028    bad_args = list(filter(expression.args.get, ("position", "occurrence", "parameters")))
1029    if bad_args:
1030        self.unsupported(f"REGEXP_EXTRACT does not support the following arg(s): {bad_args}")
1031
1032    return self.func(
1033        "REGEXP_EXTRACT", expression.this, expression.expression, expression.args.get("group")
1034    )
1035
1036
1037def regexp_replace_sql(self: Generator, expression: exp.RegexpReplace) -> str:
1038    bad_args = list(filter(expression.args.get, ("position", "occurrence", "modifiers")))
1039    if bad_args:
1040        self.unsupported(f"REGEXP_REPLACE does not support the following arg(s): {bad_args}")
1041
1042    return self.func(
1043        "REGEXP_REPLACE", expression.this, expression.expression, expression.args["replacement"]
1044    )
1045
1046
1047def pivot_column_names(aggregations: t.List[exp.Expression], dialect: DialectType) -> t.List[str]:
1048    names = []
1049    for agg in aggregations:
1050        if isinstance(agg, exp.Alias):
1051            names.append(agg.alias)
1052        else:
1053            """
1054            This case corresponds to aggregations without aliases being used as suffixes
1055            (e.g. col_avg(foo)). We need to unquote identifiers because they're going to
1056            be quoted in the base parser's `_parse_pivot` method, due to `to_identifier`.
1057            Otherwise, we'd end up with `col_avg(`foo`)` (notice the double quotes).
1058            """
1059            agg_all_unquoted = agg.transform(
1060                lambda node: (
1061                    exp.Identifier(this=node.name, quoted=False)
1062                    if isinstance(node, exp.Identifier)
1063                    else node
1064                )
1065            )
1066            names.append(agg_all_unquoted.sql(dialect=dialect, normalize_functions="lower"))
1067
1068    return names
1069
1070
1071def binary_from_function(expr_type: t.Type[B]) -> t.Callable[[t.List], B]:
1072    return lambda args: expr_type(this=seq_get(args, 0), expression=seq_get(args, 1))
1073
1074
1075# Used to represent DATE_TRUNC in Doris, Postgres and Starrocks dialects
1076def build_timestamp_trunc(args: t.List) -> exp.TimestampTrunc:
1077    return exp.TimestampTrunc(this=seq_get(args, 1), unit=seq_get(args, 0))
1078
1079
1080def any_value_to_max_sql(self: Generator, expression: exp.AnyValue) -> str:
1081    return self.func("MAX", expression.this)
1082
1083
1084def bool_xor_sql(self: Generator, expression: exp.Xor) -> str:
1085    a = self.sql(expression.left)
1086    b = self.sql(expression.right)
1087    return f"({a} AND (NOT {b})) OR ((NOT {a}) AND {b})"
1088
1089
1090def is_parse_json(expression: exp.Expression) -> bool:
1091    return isinstance(expression, exp.ParseJSON) or (
1092        isinstance(expression, exp.Cast) and expression.is_type("json")
1093    )
1094
1095
1096def isnull_to_is_null(args: t.List) -> exp.Expression:
1097    return exp.Paren(this=exp.Is(this=seq_get(args, 0), expression=exp.null()))
1098
1099
1100def generatedasidentitycolumnconstraint_sql(
1101    self: Generator, expression: exp.GeneratedAsIdentityColumnConstraint
1102) -> str:
1103    start = self.sql(expression, "start") or "1"
1104    increment = self.sql(expression, "increment") or "1"
1105    return f"IDENTITY({start}, {increment})"
1106
1107
1108def arg_max_or_min_no_count(name: str) -> t.Callable[[Generator, exp.ArgMax | exp.ArgMin], str]:
1109    def _arg_max_or_min_sql(self: Generator, expression: exp.ArgMax | exp.ArgMin) -> str:
1110        if expression.args.get("count"):
1111            self.unsupported(f"Only two arguments are supported in function {name}.")
1112
1113        return self.func(name, expression.this, expression.expression)
1114
1115    return _arg_max_or_min_sql
1116
1117
1118def ts_or_ds_add_cast(expression: exp.TsOrDsAdd) -> exp.TsOrDsAdd:
1119    this = expression.this.copy()
1120
1121    return_type = expression.return_type
1122    if return_type.is_type(exp.DataType.Type.DATE):
1123        # If we need to cast to a DATE, we cast to TIMESTAMP first to make sure we
1124        # can truncate timestamp strings, because some dialects can't cast them to DATE
1125        this = exp.cast(this, exp.DataType.Type.TIMESTAMP)
1126
1127    expression.this.replace(exp.cast(this, return_type))
1128    return expression
1129
1130
1131def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE_ADD_OR_DIFF], str]:
1132    def _delta_sql(self: Generator, expression: DATE_ADD_OR_DIFF) -> str:
1133        if cast and isinstance(expression, exp.TsOrDsAdd):
1134            expression = ts_or_ds_add_cast(expression)
1135
1136        return self.func(
1137            name,
1138            unit_to_var(expression),
1139            expression.expression,
1140            expression.this,
1141        )
1142
1143    return _delta_sql
1144
1145
1146def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1147    unit = expression.args.get("unit")
1148
1149    if isinstance(unit, exp.Placeholder):
1150        return unit
1151    if unit:
1152        return exp.Literal.string(unit.name)
1153    return exp.Literal.string(default) if default else None
1154
1155
1156def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1157    unit = expression.args.get("unit")
1158
1159    if isinstance(unit, (exp.Var, exp.Placeholder)):
1160        return unit
1161    return exp.Var(this=default) if default else None
1162
1163
1164@t.overload
1165def map_date_part(part: exp.Expression, dialect: DialectType = Dialect) -> exp.Var:
1166    pass
1167
1168
1169@t.overload
1170def map_date_part(
1171    part: t.Optional[exp.Expression], dialect: DialectType = Dialect
1172) -> t.Optional[exp.Expression]:
1173    pass
1174
1175
1176def map_date_part(part, dialect: DialectType = Dialect):
1177    mapped = (
1178        Dialect.get_or_raise(dialect).DATE_PART_MAPPING.get(part.name.upper()) if part else None
1179    )
1180    return exp.var(mapped) if mapped else part
1181
1182
1183def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str:
1184    trunc_curr_date = exp.func("date_trunc", "month", expression.this)
1185    plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month")
1186    minus_one_day = exp.func("date_sub", plus_one_month, 1, "day")
1187
1188    return self.sql(exp.cast(minus_one_day, exp.DataType.Type.DATE))
1189
1190
1191def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str:
1192    """Remove table refs from columns in when statements."""
1193    alias = expression.this.args.get("alias")
1194
1195    def normalize(identifier: t.Optional[exp.Identifier]) -> t.Optional[str]:
1196        return self.dialect.normalize_identifier(identifier).name if identifier else None
1197
1198    targets = {normalize(expression.this.this)}
1199
1200    if alias:
1201        targets.add(normalize(alias.this))
1202
1203    for when in expression.expressions:
1204        when.transform(
1205            lambda node: (
1206                exp.column(node.this)
1207                if isinstance(node, exp.Column) and normalize(node.args.get("table")) in targets
1208                else node
1209            ),
1210            copy=False,
1211        )
1212
1213    return self.merge_sql(expression)
1214
1215
1216def build_json_extract_path(
1217    expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False
1218) -> t.Callable[[t.List], F]:
1219    def _builder(args: t.List) -> F:
1220        segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
1221        for arg in args[1:]:
1222            if not isinstance(arg, exp.Literal):
1223                # We use the fallback parser because we can't really transpile non-literals safely
1224                return expr_type.from_arg_list(args)
1225
1226            text = arg.name
1227            if is_int(text):
1228                index = int(text)
1229                segments.append(
1230                    exp.JSONPathSubscript(this=index if zero_based_indexing else index - 1)
1231                )
1232            else:
1233                segments.append(exp.JSONPathKey(this=text))
1234
1235        # This is done to avoid failing in the expression validator due to the arg count
1236        del args[2:]
1237        return expr_type(
1238            this=seq_get(args, 0),
1239            expression=exp.JSONPath(expressions=segments),
1240            only_json_types=arrow_req_json_type,
1241        )
1242
1243    return _builder
1244
1245
1246def json_extract_segments(
1247    name: str, quoted_index: bool = True, op: t.Optional[str] = None
1248) -> t.Callable[[Generator, JSON_EXTRACT_TYPE], str]:
1249    def _json_extract_segments(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
1250        path = expression.expression
1251        if not isinstance(path, exp.JSONPath):
1252            return rename_func(name)(self, expression)
1253
1254        segments = []
1255        for segment in path.expressions:
1256            path = self.sql(segment)
1257            if path:
1258                if isinstance(segment, exp.JSONPathPart) and (
1259                    quoted_index or not isinstance(segment, exp.JSONPathSubscript)
1260                ):
1261                    path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}"
1262
1263                segments.append(path)
1264
1265        if op:
1266            return f" {op} ".join([self.sql(expression.this), *segments])
1267        return self.func(name, expression.this, *segments)
1268
1269    return _json_extract_segments
1270
1271
1272def json_path_key_only_name(self: Generator, expression: exp.JSONPathKey) -> str:
1273    if isinstance(expression.this, exp.JSONPathWildcard):
1274        self.unsupported("Unsupported wildcard in JSONPathKey expression")
1275
1276    return expression.name
1277
1278
1279def filter_array_using_unnest(self: Generator, expression: exp.ArrayFilter) -> str:
1280    cond = expression.expression
1281    if isinstance(cond, exp.Lambda) and len(cond.expressions) == 1:
1282        alias = cond.expressions[0]
1283        cond = cond.this
1284    elif isinstance(cond, exp.Predicate):
1285        alias = "_u"
1286    else:
1287        self.unsupported("Unsupported filter condition")
1288        return ""
1289
1290    unnest = exp.Unnest(expressions=[expression.this])
1291    filtered = exp.select(alias).from_(exp.alias_(unnest, None, table=[alias])).where(cond)
1292    return self.sql(exp.Array(expressions=[filtered]))
1293
1294
1295def to_number_with_nls_param(self: Generator, expression: exp.ToNumber) -> str:
1296    return self.func(
1297        "TO_NUMBER",
1298        expression.this,
1299        expression.args.get("format"),
1300        expression.args.get("nlsparam"),
1301    )
1302
1303
1304def build_default_decimal_type(
1305    precision: t.Optional[int] = None, scale: t.Optional[int] = None
1306) -> t.Callable[[exp.DataType], exp.DataType]:
1307    def _builder(dtype: exp.DataType) -> exp.DataType:
1308        if dtype.expressions or precision is None:
1309            return dtype
1310
1311        params = f"{precision}{f', {scale}' if scale is not None else ''}"
1312        return exp.DataType.build(f"DECIMAL({params})")
1313
1314    return _builder
1315
1316
1317def build_timestamp_from_parts(args: t.List) -> exp.Func:
1318    if len(args) == 2:
1319        # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept,
1320        # so we parse this into Anonymous for now instead of introducing complexity
1321        return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args)
1322
1323    return exp.TimestampFromParts.from_arg_list(args)
1324
1325
1326def sha256_sql(self: Generator, expression: exp.SHA2) -> str:
1327    return self.func(f"SHA{expression.text('length') or '256'}", expression.this)
logger = <Logger sqlglot (WARNING)>
UNESCAPED_SEQUENCES = {'\\a': '\x07', '\\b': '\x08', '\\f': '\x0c', '\\n': '\n', '\\r': '\r', '\\t': '\t', '\\v': '\x0b', '\\\\': '\\'}
class Dialects(builtins.str, enum.Enum):
41class Dialects(str, Enum):
42    """Dialects supported by SQLGLot."""
43
44    DIALECT = ""
45
46    ATHENA = "athena"
47    BIGQUERY = "bigquery"
48    CLICKHOUSE = "clickhouse"
49    DATABRICKS = "databricks"
50    DORIS = "doris"
51    DRILL = "drill"
52    DUCKDB = "duckdb"
53    HIVE = "hive"
54    MATERIALIZE = "materialize"
55    MYSQL = "mysql"
56    ORACLE = "oracle"
57    POSTGRES = "postgres"
58    PRESTO = "presto"
59    PRQL = "prql"
60    REDSHIFT = "redshift"
61    RISINGWAVE = "risingwave"
62    SNOWFLAKE = "snowflake"
63    SPARK = "spark"
64    SPARK2 = "spark2"
65    SQLITE = "sqlite"
66    STARROCKS = "starrocks"
67    TABLEAU = "tableau"
68    TERADATA = "teradata"
69    TRINO = "trino"
70    TSQL = "tsql"

Dialects supported by SQLGLot.

DIALECT = <Dialects.DIALECT: ''>
ATHENA = <Dialects.ATHENA: 'athena'>
BIGQUERY = <Dialects.BIGQUERY: 'bigquery'>
CLICKHOUSE = <Dialects.CLICKHOUSE: 'clickhouse'>
DATABRICKS = <Dialects.DATABRICKS: 'databricks'>
DORIS = <Dialects.DORIS: 'doris'>
DRILL = <Dialects.DRILL: 'drill'>
DUCKDB = <Dialects.DUCKDB: 'duckdb'>
HIVE = <Dialects.HIVE: 'hive'>
MATERIALIZE = <Dialects.MATERIALIZE: 'materialize'>
MYSQL = <Dialects.MYSQL: 'mysql'>
ORACLE = <Dialects.ORACLE: 'oracle'>
POSTGRES = <Dialects.POSTGRES: 'postgres'>
PRESTO = <Dialects.PRESTO: 'presto'>
PRQL = <Dialects.PRQL: 'prql'>
REDSHIFT = <Dialects.REDSHIFT: 'redshift'>
RISINGWAVE = <Dialects.RISINGWAVE: 'risingwave'>
SNOWFLAKE = <Dialects.SNOWFLAKE: 'snowflake'>
SPARK = <Dialects.SPARK: 'spark'>
SPARK2 = <Dialects.SPARK2: 'spark2'>
SQLITE = <Dialects.SQLITE: 'sqlite'>
STARROCKS = <Dialects.STARROCKS: 'starrocks'>
TABLEAU = <Dialects.TABLEAU: 'tableau'>
TERADATA = <Dialects.TERADATA: 'teradata'>
TRINO = <Dialects.TRINO: 'trino'>
TSQL = <Dialects.TSQL: 'tsql'>
Inherited Members
enum.Enum
name
value
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
find
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
class NormalizationStrategy(builtins.str, sqlglot.helper.AutoName):
73class NormalizationStrategy(str, AutoName):
74    """Specifies the strategy according to which identifiers should be normalized."""
75
76    LOWERCASE = auto()
77    """Unquoted identifiers are lowercased."""
78
79    UPPERCASE = auto()
80    """Unquoted identifiers are uppercased."""
81
82    CASE_SENSITIVE = auto()
83    """Always case-sensitive, regardless of quotes."""
84
85    CASE_INSENSITIVE = auto()
86    """Always case-insensitive, regardless of quotes."""

Specifies the strategy according to which identifiers should be normalized.

LOWERCASE = <NormalizationStrategy.LOWERCASE: 'LOWERCASE'>

Unquoted identifiers are lowercased.

UPPERCASE = <NormalizationStrategy.UPPERCASE: 'UPPERCASE'>

Unquoted identifiers are uppercased.

CASE_SENSITIVE = <NormalizationStrategy.CASE_SENSITIVE: 'CASE_SENSITIVE'>

Always case-sensitive, regardless of quotes.

CASE_INSENSITIVE = <NormalizationStrategy.CASE_INSENSITIVE: 'CASE_INSENSITIVE'>

Always case-insensitive, regardless of quotes.

Inherited Members
enum.Enum
name
value
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
find
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
class Dialect:
201class Dialect(metaclass=_Dialect):
202    INDEX_OFFSET = 0
203    """The base index offset for arrays."""
204
205    WEEK_OFFSET = 0
206    """First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday."""
207
208    UNNEST_COLUMN_ONLY = False
209    """Whether `UNNEST` table aliases are treated as column aliases."""
210
211    ALIAS_POST_TABLESAMPLE = False
212    """Whether the table alias comes after tablesample."""
213
214    TABLESAMPLE_SIZE_IS_PERCENT = False
215    """Whether a size in the table sample clause represents percentage."""
216
217    NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE
218    """Specifies the strategy according to which identifiers should be normalized."""
219
220    IDENTIFIERS_CAN_START_WITH_DIGIT = False
221    """Whether an unquoted identifier can start with a digit."""
222
223    DPIPE_IS_STRING_CONCAT = True
224    """Whether the DPIPE token (`||`) is a string concatenation operator."""
225
226    STRICT_STRING_CONCAT = False
227    """Whether `CONCAT`'s arguments must be strings."""
228
229    SUPPORTS_USER_DEFINED_TYPES = True
230    """Whether user-defined data types are supported."""
231
232    SUPPORTS_SEMI_ANTI_JOIN = True
233    """Whether `SEMI` or `ANTI` joins are supported."""
234
235    SUPPORTS_COLUMN_JOIN_MARKS = False
236    """Whether the old-style outer join (+) syntax is supported."""
237
238    COPY_PARAMS_ARE_CSV = True
239    """Separator of COPY statement parameters."""
240
241    NORMALIZE_FUNCTIONS: bool | str = "upper"
242    """
243    Determines how function names are going to be normalized.
244    Possible values:
245        "upper" or True: Convert names to uppercase.
246        "lower": Convert names to lowercase.
247        False: Disables function name normalization.
248    """
249
250    LOG_BASE_FIRST: t.Optional[bool] = True
251    """
252    Whether the base comes first in the `LOG` function.
253    Possible values: `True`, `False`, `None` (two arguments are not supported by `LOG`)
254    """
255
256    NULL_ORDERING = "nulls_are_small"
257    """
258    Default `NULL` ordering method to use if not explicitly set.
259    Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"`
260    """
261
262    TYPED_DIVISION = False
263    """
264    Whether the behavior of `a / b` depends on the types of `a` and `b`.
265    False means `a / b` is always float division.
266    True means `a / b` is integer division if both `a` and `b` are integers.
267    """
268
269    SAFE_DIVISION = False
270    """Whether division by zero throws an error (`False`) or returns NULL (`True`)."""
271
272    CONCAT_COALESCE = False
273    """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string."""
274
275    HEX_LOWERCASE = False
276    """Whether the `HEX` function returns a lowercase hexadecimal string."""
277
278    DATE_FORMAT = "'%Y-%m-%d'"
279    DATEINT_FORMAT = "'%Y%m%d'"
280    TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
281
282    TIME_MAPPING: t.Dict[str, str] = {}
283    """Associates this dialect's time formats with their equivalent Python `strftime` formats."""
284
285    # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time
286    # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE
287    FORMAT_MAPPING: t.Dict[str, str] = {}
288    """
289    Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`.
290    If empty, the corresponding trie will be constructed off of `TIME_MAPPING`.
291    """
292
293    UNESCAPED_SEQUENCES: t.Dict[str, str] = {}
294    """Mapping of an escaped sequence (`\\n`) to its unescaped version (`\n`)."""
295
296    PSEUDOCOLUMNS: t.Set[str] = set()
297    """
298    Columns that are auto-generated by the engine corresponding to this dialect.
299    For example, such columns may be excluded from `SELECT *` queries.
300    """
301
302    PREFER_CTE_ALIAS_COLUMN = False
303    """
304    Some dialects, such as Snowflake, allow you to reference a CTE column alias in the
305    HAVING clause of the CTE. This flag will cause the CTE alias columns to override
306    any projection aliases in the subquery.
307
308    For example,
309        WITH y(c) AS (
310            SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0
311        ) SELECT c FROM y;
312
313        will be rewritten as
314
315        WITH y(c) AS (
316            SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
317        ) SELECT c FROM y;
318    """
319
320    COPY_PARAMS_ARE_CSV = True
321    """
322    Whether COPY statement parameters are separated by comma or whitespace
323    """
324
325    # --- Autofilled ---
326
327    tokenizer_class = Tokenizer
328    parser_class = Parser
329    generator_class = Generator
330
331    # A trie of the time_mapping keys
332    TIME_TRIE: t.Dict = {}
333    FORMAT_TRIE: t.Dict = {}
334
335    INVERSE_TIME_MAPPING: t.Dict[str, str] = {}
336    INVERSE_TIME_TRIE: t.Dict = {}
337
338    ESCAPED_SEQUENCES: t.Dict[str, str] = {}
339
340    # Delimiters for string literals and identifiers
341    QUOTE_START = "'"
342    QUOTE_END = "'"
343    IDENTIFIER_START = '"'
344    IDENTIFIER_END = '"'
345
346    # Delimiters for bit, hex, byte and unicode literals
347    BIT_START: t.Optional[str] = None
348    BIT_END: t.Optional[str] = None
349    HEX_START: t.Optional[str] = None
350    HEX_END: t.Optional[str] = None
351    BYTE_START: t.Optional[str] = None
352    BYTE_END: t.Optional[str] = None
353    UNICODE_START: t.Optional[str] = None
354    UNICODE_END: t.Optional[str] = None
355
356    DATE_PART_MAPPING = {
357        "Y": "YEAR",
358        "YY": "YEAR",
359        "YYY": "YEAR",
360        "YYYY": "YEAR",
361        "YR": "YEAR",
362        "YEARS": "YEAR",
363        "YRS": "YEAR",
364        "MM": "MONTH",
365        "MON": "MONTH",
366        "MONS": "MONTH",
367        "MONTHS": "MONTH",
368        "D": "DAY",
369        "DD": "DAY",
370        "DAYS": "DAY",
371        "DAYOFMONTH": "DAY",
372        "DAY OF WEEK": "DAYOFWEEK",
373        "WEEKDAY": "DAYOFWEEK",
374        "DOW": "DAYOFWEEK",
375        "DW": "DAYOFWEEK",
376        "WEEKDAY_ISO": "DAYOFWEEKISO",
377        "DOW_ISO": "DAYOFWEEKISO",
378        "DW_ISO": "DAYOFWEEKISO",
379        "DAY OF YEAR": "DAYOFYEAR",
380        "DOY": "DAYOFYEAR",
381        "DY": "DAYOFYEAR",
382        "W": "WEEK",
383        "WK": "WEEK",
384        "WEEKOFYEAR": "WEEK",
385        "WOY": "WEEK",
386        "WY": "WEEK",
387        "WEEK_ISO": "WEEKISO",
388        "WEEKOFYEARISO": "WEEKISO",
389        "WEEKOFYEAR_ISO": "WEEKISO",
390        "Q": "QUARTER",
391        "QTR": "QUARTER",
392        "QTRS": "QUARTER",
393        "QUARTERS": "QUARTER",
394        "H": "HOUR",
395        "HH": "HOUR",
396        "HR": "HOUR",
397        "HOURS": "HOUR",
398        "HRS": "HOUR",
399        "M": "MINUTE",
400        "MI": "MINUTE",
401        "MIN": "MINUTE",
402        "MINUTES": "MINUTE",
403        "MINS": "MINUTE",
404        "S": "SECOND",
405        "SEC": "SECOND",
406        "SECONDS": "SECOND",
407        "SECS": "SECOND",
408        "MS": "MILLISECOND",
409        "MSEC": "MILLISECOND",
410        "MSECS": "MILLISECOND",
411        "MSECOND": "MILLISECOND",
412        "MSECONDS": "MILLISECOND",
413        "MILLISEC": "MILLISECOND",
414        "MILLISECS": "MILLISECOND",
415        "MILLISECON": "MILLISECOND",
416        "MILLISECONDS": "MILLISECOND",
417        "US": "MICROSECOND",
418        "USEC": "MICROSECOND",
419        "USECS": "MICROSECOND",
420        "MICROSEC": "MICROSECOND",
421        "MICROSECS": "MICROSECOND",
422        "USECOND": "MICROSECOND",
423        "USECONDS": "MICROSECOND",
424        "MICROSECONDS": "MICROSECOND",
425        "NS": "NANOSECOND",
426        "NSEC": "NANOSECOND",
427        "NANOSEC": "NANOSECOND",
428        "NSECOND": "NANOSECOND",
429        "NSECONDS": "NANOSECOND",
430        "NANOSECS": "NANOSECOND",
431        "EPOCH_SECOND": "EPOCH",
432        "EPOCH_SECONDS": "EPOCH",
433        "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND",
434        "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND",
435        "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND",
436        "TZH": "TIMEZONE_HOUR",
437        "TZM": "TIMEZONE_MINUTE",
438        "DEC": "DECADE",
439        "DECS": "DECADE",
440        "DECADES": "DECADE",
441        "MIL": "MILLENIUM",
442        "MILS": "MILLENIUM",
443        "MILLENIA": "MILLENIUM",
444        "C": "CENTURY",
445        "CENT": "CENTURY",
446        "CENTS": "CENTURY",
447        "CENTURIES": "CENTURY",
448    }
449
450    @classmethod
451    def get_or_raise(cls, dialect: DialectType) -> Dialect:
452        """
453        Look up a dialect in the global dialect registry and return it if it exists.
454
455        Args:
456            dialect: The target dialect. If this is a string, it can be optionally followed by
457                additional key-value pairs that are separated by commas and are used to specify
458                dialect settings, such as whether the dialect's identifiers are case-sensitive.
459
460        Example:
461            >>> dialect = dialect_class = get_or_raise("duckdb")
462            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
463
464        Returns:
465            The corresponding Dialect instance.
466        """
467
468        if not dialect:
469            return cls()
470        if isinstance(dialect, _Dialect):
471            return dialect()
472        if isinstance(dialect, Dialect):
473            return dialect
474        if isinstance(dialect, str):
475            try:
476                dialect_name, *kv_pairs = dialect.split(",")
477                kwargs = {k.strip(): v.strip() for k, v in (kv.split("=") for kv in kv_pairs)}
478            except ValueError:
479                raise ValueError(
480                    f"Invalid dialect format: '{dialect}'. "
481                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
482                )
483
484            result = cls.get(dialect_name.strip())
485            if not result:
486                from difflib import get_close_matches
487
488                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
489                if similar:
490                    similar = f" Did you mean {similar}?"
491
492                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
493
494            return result(**kwargs)
495
496        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
497
498    @classmethod
499    def format_time(
500        cls, expression: t.Optional[str | exp.Expression]
501    ) -> t.Optional[exp.Expression]:
502        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
503        if isinstance(expression, str):
504            return exp.Literal.string(
505                # the time formats are quoted
506                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
507            )
508
509        if expression and expression.is_string:
510            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
511
512        return expression
513
514    def __init__(self, **kwargs) -> None:
515        normalization_strategy = kwargs.pop("normalization_strategy", None)
516
517        if normalization_strategy is None:
518            self.normalization_strategy = self.NORMALIZATION_STRATEGY
519        else:
520            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
521
522        self.settings = kwargs
523
524    def __eq__(self, other: t.Any) -> bool:
525        # Does not currently take dialect state into account
526        return type(self) == other
527
528    def __hash__(self) -> int:
529        # Does not currently take dialect state into account
530        return hash(type(self))
531
532    def normalize_identifier(self, expression: E) -> E:
533        """
534        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
535
536        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
537        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
538        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
539        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
540
541        There are also dialects like Spark, which are case-insensitive even when quotes are
542        present, and dialects like MySQL, whose resolution rules match those employed by the
543        underlying operating system, for example they may always be case-sensitive in Linux.
544
545        Finally, the normalization behavior of some engines can even be controlled through flags,
546        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
547
548        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
549        that it can analyze queries in the optimizer and successfully capture their semantics.
550        """
551        if (
552            isinstance(expression, exp.Identifier)
553            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
554            and (
555                not expression.quoted
556                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
557            )
558        ):
559            expression.set(
560                "this",
561                (
562                    expression.this.upper()
563                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
564                    else expression.this.lower()
565                ),
566            )
567
568        return expression
569
570    def case_sensitive(self, text: str) -> bool:
571        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
572        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
573            return False
574
575        unsafe = (
576            str.islower
577            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
578            else str.isupper
579        )
580        return any(unsafe(char) for char in text)
581
582    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
583        """Checks if text can be identified given an identify option.
584
585        Args:
586            text: The text to check.
587            identify:
588                `"always"` or `True`: Always returns `True`.
589                `"safe"`: Only returns `True` if the identifier is case-insensitive.
590
591        Returns:
592            Whether the given text can be identified.
593        """
594        if identify is True or identify == "always":
595            return True
596
597        if identify == "safe":
598            return not self.case_sensitive(text)
599
600        return False
601
602    def quote_identifier(self, expression: E, identify: bool = True) -> E:
603        """
604        Adds quotes to a given identifier.
605
606        Args:
607            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
608            identify: If set to `False`, the quotes will only be added if the identifier is deemed
609                "unsafe", with respect to its characters and this dialect's normalization strategy.
610        """
611        if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func):
612            name = expression.this
613            expression.set(
614                "quoted",
615                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
616            )
617
618        return expression
619
620    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
621        if isinstance(path, exp.Literal):
622            path_text = path.name
623            if path.is_number:
624                path_text = f"[{path_text}]"
625
626            try:
627                return parse_json_path(path_text)
628            except ParseError as e:
629                logger.warning(f"Invalid JSON path syntax. {str(e)}")
630
631        return path
632
633    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
634        return self.parser(**opts).parse(self.tokenize(sql), sql)
635
636    def parse_into(
637        self, expression_type: exp.IntoType, sql: str, **opts
638    ) -> t.List[t.Optional[exp.Expression]]:
639        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
640
641    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
642        return self.generator(**opts).generate(expression, copy=copy)
643
644    def transpile(self, sql: str, **opts) -> t.List[str]:
645        return [
646            self.generate(expression, copy=False, **opts) if expression else ""
647            for expression in self.parse(sql)
648        ]
649
650    def tokenize(self, sql: str) -> t.List[Token]:
651        return self.tokenizer.tokenize(sql)
652
653    @property
654    def tokenizer(self) -> Tokenizer:
655        if not hasattr(self, "_tokenizer"):
656            self._tokenizer = self.tokenizer_class(dialect=self)
657        return self._tokenizer
658
659    def parser(self, **opts) -> Parser:
660        return self.parser_class(dialect=self, **opts)
661
662    def generator(self, **opts) -> Generator:
663        return self.generator_class(dialect=self, **opts)
Dialect(**kwargs)
514    def __init__(self, **kwargs) -> None:
515        normalization_strategy = kwargs.pop("normalization_strategy", None)
516
517        if normalization_strategy is None:
518            self.normalization_strategy = self.NORMALIZATION_STRATEGY
519        else:
520            self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper())
521
522        self.settings = kwargs
INDEX_OFFSET = 0

The base index offset for arrays.

WEEK_OFFSET = 0

First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.

UNNEST_COLUMN_ONLY = False

Whether UNNEST table aliases are treated as column aliases.

ALIAS_POST_TABLESAMPLE = False

Whether the table alias comes after tablesample.

TABLESAMPLE_SIZE_IS_PERCENT = False

Whether a size in the table sample clause represents percentage.

NORMALIZATION_STRATEGY = <NormalizationStrategy.LOWERCASE: 'LOWERCASE'>

Specifies the strategy according to which identifiers should be normalized.

IDENTIFIERS_CAN_START_WITH_DIGIT = False

Whether an unquoted identifier can start with a digit.

DPIPE_IS_STRING_CONCAT = True

Whether the DPIPE token (||) is a string concatenation operator.

STRICT_STRING_CONCAT = False

Whether CONCAT's arguments must be strings.

SUPPORTS_USER_DEFINED_TYPES = True

Whether user-defined data types are supported.

SUPPORTS_SEMI_ANTI_JOIN = True

Whether SEMI or ANTI joins are supported.

SUPPORTS_COLUMN_JOIN_MARKS = False

Whether the old-style outer join (+) syntax is supported.

COPY_PARAMS_ARE_CSV = True

Whether COPY statement parameters are separated by comma or whitespace

NORMALIZE_FUNCTIONS: bool | str = 'upper'

Determines how function names are going to be normalized.

Possible values:

"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.

LOG_BASE_FIRST: Optional[bool] = True

Whether the base comes first in the LOG function. Possible values: True, False, None (two arguments are not supported by LOG)

NULL_ORDERING = 'nulls_are_small'

Default NULL ordering method to use if not explicitly set. Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"

TYPED_DIVISION = False

Whether the behavior of a / b depends on the types of a and b. False means a / b is always float division. True means a / b is integer division if both a and b are integers.

SAFE_DIVISION = False

Whether division by zero throws an error (False) or returns NULL (True).

CONCAT_COALESCE = False

A NULL arg in CONCAT yields NULL by default, but in some dialects it yields an empty string.

HEX_LOWERCASE = False

Whether the HEX function returns a lowercase hexadecimal string.

DATE_FORMAT = "'%Y-%m-%d'"
DATEINT_FORMAT = "'%Y%m%d'"
TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'"
TIME_MAPPING: Dict[str, str] = {}

Associates this dialect's time formats with their equivalent Python strftime formats.

FORMAT_MAPPING: Dict[str, str] = {}

Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy'). If empty, the corresponding trie will be constructed off of TIME_MAPPING.

UNESCAPED_SEQUENCES: Dict[str, str] = {}

Mapping of an escaped sequence (\n) to its unescaped version ( ).

PSEUDOCOLUMNS: Set[str] = set()

Columns that are auto-generated by the engine corresponding to this dialect. For example, such columns may be excluded from SELECT * queries.

PREFER_CTE_ALIAS_COLUMN = False

Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.

For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;

will be rewritten as

WITH y(c) AS (
    SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
tokenizer_class = <class 'sqlglot.tokens.Tokenizer'>
parser_class = <class 'sqlglot.parser.Parser'>
generator_class = <class 'sqlglot.generator.Generator'>
TIME_TRIE: Dict = {}
FORMAT_TRIE: Dict = {}
INVERSE_TIME_MAPPING: Dict[str, str] = {}
INVERSE_TIME_TRIE: Dict = {}
ESCAPED_SEQUENCES: Dict[str, str] = {}
QUOTE_START = "'"
QUOTE_END = "'"
IDENTIFIER_START = '"'
IDENTIFIER_END = '"'
BIT_START: Optional[str] = None
BIT_END: Optional[str] = None
HEX_START: Optional[str] = None
HEX_END: Optional[str] = None
BYTE_START: Optional[str] = None
BYTE_END: Optional[str] = None
UNICODE_START: Optional[str] = None
UNICODE_END: Optional[str] = None
DATE_PART_MAPPING = {'Y': 'YEAR', 'YY': 'YEAR', 'YYY': 'YEAR', 'YYYY': 'YEAR', 'YR': 'YEAR', 'YEARS': 'YEAR', 'YRS': 'YEAR', 'MM': 'MONTH', 'MON': 'MONTH', 'MONS': 'MONTH', 'MONTHS': 'MONTH', 'D': 'DAY', 'DD': 'DAY', 'DAYS': 'DAY', 'DAYOFMONTH': 'DAY', 'DAY OF WEEK': 'DAYOFWEEK', 'WEEKDAY': 'DAYOFWEEK', 'DOW': 'DAYOFWEEK', 'DW': 'DAYOFWEEK', 'WEEKDAY_ISO': 'DAYOFWEEKISO', 'DOW_ISO': 'DAYOFWEEKISO', 'DW_ISO': 'DAYOFWEEKISO', 'DAY OF YEAR': 'DAYOFYEAR', 'DOY': 'DAYOFYEAR', 'DY': 'DAYOFYEAR', 'W': 'WEEK', 'WK': 'WEEK', 'WEEKOFYEAR': 'WEEK', 'WOY': 'WEEK', 'WY': 'WEEK', 'WEEK_ISO': 'WEEKISO', 'WEEKOFYEARISO': 'WEEKISO', 'WEEKOFYEAR_ISO': 'WEEKISO', 'Q': 'QUARTER', 'QTR': 'QUARTER', 'QTRS': 'QUARTER', 'QUARTERS': 'QUARTER', 'H': 'HOUR', 'HH': 'HOUR', 'HR': 'HOUR', 'HOURS': 'HOUR', 'HRS': 'HOUR', 'M': 'MINUTE', 'MI': 'MINUTE', 'MIN': 'MINUTE', 'MINUTES': 'MINUTE', 'MINS': 'MINUTE', 'S': 'SECOND', 'SEC': 'SECOND', 'SECONDS': 'SECOND', 'SECS': 'SECOND', 'MS': 'MILLISECOND', 'MSEC': 'MILLISECOND', 'MSECS': 'MILLISECOND', 'MSECOND': 'MILLISECOND', 'MSECONDS': 'MILLISECOND', 'MILLISEC': 'MILLISECOND', 'MILLISECS': 'MILLISECOND', 'MILLISECON': 'MILLISECOND', 'MILLISECONDS': 'MILLISECOND', 'US': 'MICROSECOND', 'USEC': 'MICROSECOND', 'USECS': 'MICROSECOND', 'MICROSEC': 'MICROSECOND', 'MICROSECS': 'MICROSECOND', 'USECOND': 'MICROSECOND', 'USECONDS': 'MICROSECOND', 'MICROSECONDS': 'MICROSECOND', 'NS': 'NANOSECOND', 'NSEC': 'NANOSECOND', 'NANOSEC': 'NANOSECOND', 'NSECOND': 'NANOSECOND', 'NSECONDS': 'NANOSECOND', 'NANOSECS': 'NANOSECOND', 'EPOCH_SECOND': 'EPOCH', 'EPOCH_SECONDS': 'EPOCH', 'EPOCH_MILLISECONDS': 'EPOCH_MILLISECOND', 'EPOCH_MICROSECONDS': 'EPOCH_MICROSECOND', 'EPOCH_NANOSECONDS': 'EPOCH_NANOSECOND', 'TZH': 'TIMEZONE_HOUR', 'TZM': 'TIMEZONE_MINUTE', 'DEC': 'DECADE', 'DECS': 'DECADE', 'DECADES': 'DECADE', 'MIL': 'MILLENIUM', 'MILS': 'MILLENIUM', 'MILLENIA': 'MILLENIUM', 'C': 'CENTURY', 'CENT': 'CENTURY', 'CENTS': 'CENTURY', 'CENTURIES': 'CENTURY'}
@classmethod
def get_or_raise( cls, dialect: Union[str, Dialect, Type[Dialect], NoneType]) -> Dialect:
450    @classmethod
451    def get_or_raise(cls, dialect: DialectType) -> Dialect:
452        """
453        Look up a dialect in the global dialect registry and return it if it exists.
454
455        Args:
456            dialect: The target dialect. If this is a string, it can be optionally followed by
457                additional key-value pairs that are separated by commas and are used to specify
458                dialect settings, such as whether the dialect's identifiers are case-sensitive.
459
460        Example:
461            >>> dialect = dialect_class = get_or_raise("duckdb")
462            >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
463
464        Returns:
465            The corresponding Dialect instance.
466        """
467
468        if not dialect:
469            return cls()
470        if isinstance(dialect, _Dialect):
471            return dialect()
472        if isinstance(dialect, Dialect):
473            return dialect
474        if isinstance(dialect, str):
475            try:
476                dialect_name, *kv_pairs = dialect.split(",")
477                kwargs = {k.strip(): v.strip() for k, v in (kv.split("=") for kv in kv_pairs)}
478            except ValueError:
479                raise ValueError(
480                    f"Invalid dialect format: '{dialect}'. "
481                    "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'."
482                )
483
484            result = cls.get(dialect_name.strip())
485            if not result:
486                from difflib import get_close_matches
487
488                similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or ""
489                if similar:
490                    similar = f" Did you mean {similar}?"
491
492                raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}")
493
494            return result(**kwargs)
495
496        raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")

Look up a dialect in the global dialect registry and return it if it exists.

Arguments:
  • dialect: The target dialect. If this is a string, it can be optionally followed by additional key-value pairs that are separated by commas and are used to specify dialect settings, such as whether the dialect's identifiers are case-sensitive.
Example:
>>> dialect = dialect_class = get_or_raise("duckdb")
>>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
Returns:

The corresponding Dialect instance.

@classmethod
def format_time( cls, expression: Union[str, sqlglot.expressions.Expression, NoneType]) -> Optional[sqlglot.expressions.Expression]:
498    @classmethod
499    def format_time(
500        cls, expression: t.Optional[str | exp.Expression]
501    ) -> t.Optional[exp.Expression]:
502        """Converts a time format in this dialect to its equivalent Python `strftime` format."""
503        if isinstance(expression, str):
504            return exp.Literal.string(
505                # the time formats are quoted
506                format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE)
507            )
508
509        if expression and expression.is_string:
510            return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE))
511
512        return expression

Converts a time format in this dialect to its equivalent Python strftime format.

settings
def normalize_identifier(self, expression: ~E) -> ~E:
532    def normalize_identifier(self, expression: E) -> E:
533        """
534        Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
535
536        For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it
537        lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
538        it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive,
539        and so any normalization would be prohibited in order to avoid "breaking" the identifier.
540
541        There are also dialects like Spark, which are case-insensitive even when quotes are
542        present, and dialects like MySQL, whose resolution rules match those employed by the
543        underlying operating system, for example they may always be case-sensitive in Linux.
544
545        Finally, the normalization behavior of some engines can even be controlled through flags,
546        like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
547
548        SQLGlot aims to understand and handle all of these different behaviors gracefully, so
549        that it can analyze queries in the optimizer and successfully capture their semantics.
550        """
551        if (
552            isinstance(expression, exp.Identifier)
553            and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE
554            and (
555                not expression.quoted
556                or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE
557            )
558        ):
559            expression.set(
560                "this",
561                (
562                    expression.this.upper()
563                    if self.normalization_strategy is NormalizationStrategy.UPPERCASE
564                    else expression.this.lower()
565                ),
566            )
567
568        return expression

Transforms an identifier in a way that resembles how it'd be resolved by this dialect.

For example, an identifier like FoO would be resolved as foo in Postgres, because it lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so it would resolve it as FOO. If it was quoted, it'd need to be treated as case-sensitive, and so any normalization would be prohibited in order to avoid "breaking" the identifier.

There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.

Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.

SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.

def case_sensitive(self, text: str) -> bool:
570    def case_sensitive(self, text: str) -> bool:
571        """Checks if text contains any case sensitive characters, based on the dialect's rules."""
572        if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE:
573            return False
574
575        unsafe = (
576            str.islower
577            if self.normalization_strategy is NormalizationStrategy.UPPERCASE
578            else str.isupper
579        )
580        return any(unsafe(char) for char in text)

Checks if text contains any case sensitive characters, based on the dialect's rules.

def can_identify(self, text: str, identify: str | bool = 'safe') -> bool:
582    def can_identify(self, text: str, identify: str | bool = "safe") -> bool:
583        """Checks if text can be identified given an identify option.
584
585        Args:
586            text: The text to check.
587            identify:
588                `"always"` or `True`: Always returns `True`.
589                `"safe"`: Only returns `True` if the identifier is case-insensitive.
590
591        Returns:
592            Whether the given text can be identified.
593        """
594        if identify is True or identify == "always":
595            return True
596
597        if identify == "safe":
598            return not self.case_sensitive(text)
599
600        return False

Checks if text can be identified given an identify option.

Arguments:
  • text: The text to check.
  • identify: "always" or True: Always returns True. "safe": Only returns True if the identifier is case-insensitive.
Returns:

Whether the given text can be identified.

def quote_identifier(self, expression: ~E, identify: bool = True) -> ~E:
602    def quote_identifier(self, expression: E, identify: bool = True) -> E:
603        """
604        Adds quotes to a given identifier.
605
606        Args:
607            expression: The expression of interest. If it's not an `Identifier`, this method is a no-op.
608            identify: If set to `False`, the quotes will only be added if the identifier is deemed
609                "unsafe", with respect to its characters and this dialect's normalization strategy.
610        """
611        if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func):
612            name = expression.this
613            expression.set(
614                "quoted",
615                identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name),
616            )
617
618        return expression

Adds quotes to a given identifier.

Arguments:
  • expression: The expression of interest. If it's not an Identifier, this method is a no-op.
  • identify: If set to False, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
def to_json_path( self, path: Optional[sqlglot.expressions.Expression]) -> Optional[sqlglot.expressions.Expression]:
620    def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
621        if isinstance(path, exp.Literal):
622            path_text = path.name
623            if path.is_number:
624                path_text = f"[{path_text}]"
625
626            try:
627                return parse_json_path(path_text)
628            except ParseError as e:
629                logger.warning(f"Invalid JSON path syntax. {str(e)}")
630
631        return path
def parse(self, sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
633    def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
634        return self.parser(**opts).parse(self.tokenize(sql), sql)
def parse_into( self, expression_type: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], sql: str, **opts) -> List[Optional[sqlglot.expressions.Expression]]:
636    def parse_into(
637        self, expression_type: exp.IntoType, sql: str, **opts
638    ) -> t.List[t.Optional[exp.Expression]]:
639        return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql)
def generate( self, expression: sqlglot.expressions.Expression, copy: bool = True, **opts) -> str:
641    def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
642        return self.generator(**opts).generate(expression, copy=copy)
def transpile(self, sql: str, **opts) -> List[str]:
644    def transpile(self, sql: str, **opts) -> t.List[str]:
645        return [
646            self.generate(expression, copy=False, **opts) if expression else ""
647            for expression in self.parse(sql)
648        ]
def tokenize(self, sql: str) -> List[sqlglot.tokens.Token]:
650    def tokenize(self, sql: str) -> t.List[Token]:
651        return self.tokenizer.tokenize(sql)
tokenizer: sqlglot.tokens.Tokenizer
653    @property
654    def tokenizer(self) -> Tokenizer:
655        if not hasattr(self, "_tokenizer"):
656            self._tokenizer = self.tokenizer_class(dialect=self)
657        return self._tokenizer
def parser(self, **opts) -> sqlglot.parser.Parser:
659    def parser(self, **opts) -> Parser:
660        return self.parser_class(dialect=self, **opts)
def generator(self, **opts) -> sqlglot.generator.Generator:
662    def generator(self, **opts) -> Generator:
663        return self.generator_class(dialect=self, **opts)
DialectType = typing.Union[str, Dialect, typing.Type[Dialect], NoneType]
def rename_func( name: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]:
669def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]:
670    return lambda self, expression: self.func(name, *flatten(expression.args.values()))
def approx_count_distinct_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ApproxDistinct) -> str:
673def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str:
674    if expression.args.get("accuracy"):
675        self.unsupported("APPROX_COUNT_DISTINCT does not support accuracy")
676    return self.func("APPROX_COUNT_DISTINCT", expression.this)
def if_sql( name: str = 'IF', false_value: Union[str, sqlglot.expressions.Expression, NoneType] = None) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.If], str]:
679def if_sql(
680    name: str = "IF", false_value: t.Optional[exp.Expression | str] = None
681) -> t.Callable[[Generator, exp.If], str]:
682    def _if_sql(self: Generator, expression: exp.If) -> str:
683        return self.func(
684            name,
685            expression.this,
686            expression.args.get("true"),
687            expression.args.get("false") or false_value,
688        )
689
690    return _if_sql
def arrow_json_extract_sql( self: sqlglot.generator.Generator, expression: Union[sqlglot.expressions.JSONExtract, sqlglot.expressions.JSONExtractScalar]) -> str:
693def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
694    this = expression.this
695    if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string:
696        this.replace(exp.cast(this, exp.DataType.Type.JSON))
697
698    return self.binary(expression, "->" if isinstance(expression, exp.JSONExtract) else "->>")
def inline_array_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Array) -> str:
701def inline_array_sql(self: Generator, expression: exp.Array) -> str:
702    return f"[{self.expressions(expression, dynamic=True, new_line=True, skip_first=True, skip_last=True)}]"
def inline_array_unless_query( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Array) -> str:
705def inline_array_unless_query(self: Generator, expression: exp.Array) -> str:
706    elem = seq_get(expression.expressions, 0)
707    if isinstance(elem, exp.Expression) and elem.find(exp.Query):
708        return self.func("ARRAY", elem)
709    return inline_array_sql(self, expression)
def no_ilike_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ILike) -> str:
712def no_ilike_sql(self: Generator, expression: exp.ILike) -> str:
713    return self.like_sql(
714        exp.Like(
715            this=exp.Lower(this=expression.this), expression=exp.Lower(this=expression.expression)
716        )
717    )
def no_paren_current_date_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CurrentDate) -> str:
720def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str:
721    zone = self.sql(expression, "this")
722    return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE"
def no_recursive_cte_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.With) -> str:
725def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str:
726    if expression.args.get("recursive"):
727        self.unsupported("Recursive CTEs are unsupported")
728        expression.args["recursive"] = False
729    return self.with_sql(expression)
def no_safe_divide_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.SafeDivide) -> str:
732def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide) -> str:
733    n = self.sql(expression, "this")
734    d = self.sql(expression, "expression")
735    return f"IF(({d}) <> 0, ({n}) / ({d}), NULL)"
def no_tablesample_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TableSample) -> str:
738def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str:
739    self.unsupported("TABLESAMPLE unsupported")
740    return self.sql(expression.this)
def no_pivot_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Pivot) -> str:
743def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str:
744    self.unsupported("PIVOT unsupported")
745    return ""
def no_trycast_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TryCast) -> str:
748def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str:
749    return self.cast_sql(expression)
def no_comment_column_constraint_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CommentColumnConstraint) -> str:
752def no_comment_column_constraint_sql(
753    self: Generator, expression: exp.CommentColumnConstraint
754) -> str:
755    self.unsupported("CommentColumnConstraint unsupported")
756    return ""
def no_map_from_entries_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.MapFromEntries) -> str:
759def no_map_from_entries_sql(self: Generator, expression: exp.MapFromEntries) -> str:
760    self.unsupported("MAP_FROM_ENTRIES unsupported")
761    return ""
def str_position_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition, generate_instance: bool = False) -> str:
764def str_position_sql(
765    self: Generator, expression: exp.StrPosition, generate_instance: bool = False
766) -> str:
767    this = self.sql(expression, "this")
768    substr = self.sql(expression, "substr")
769    position = self.sql(expression, "position")
770    instance = expression.args.get("instance") if generate_instance else None
771    position_offset = ""
772
773    if position:
774        # Normalize third 'pos' argument into 'SUBSTR(..) + offset' across dialects
775        this = self.func("SUBSTR", this, position)
776        position_offset = f" + {position} - 1"
777
778    return self.func("STRPOS", this, substr, instance) + position_offset
def struct_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StructExtract) -> str:
781def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
782    return (
783        f"{self.sql(expression, 'this')}.{self.sql(exp.to_identifier(expression.expression.name))}"
784    )
def var_map_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Map | sqlglot.expressions.VarMap, map_func_name: str = 'MAP') -> str:
787def var_map_sql(
788    self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
789) -> str:
790    keys = expression.args["keys"]
791    values = expression.args["values"]
792
793    if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array):
794        self.unsupported("Cannot convert array columns into map.")
795        return self.func(map_func_name, keys, values)
796
797    args = []
798    for key, value in zip(keys.expressions, values.expressions):
799        args.append(self.sql(key))
800        args.append(self.sql(value))
801
802    return self.func(map_func_name, *args)
def build_formatted_time( exp_class: Type[~E], dialect: str, default: Union[str, bool, NoneType] = None) -> Callable[[List], ~E]:
805def build_formatted_time(
806    exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
807) -> t.Callable[[t.List], E]:
808    """Helper used for time expressions.
809
810    Args:
811        exp_class: the expression class to instantiate.
812        dialect: target sql dialect.
813        default: the default format, True being time.
814
815    Returns:
816        A callable that can be used to return the appropriately formatted time expression.
817    """
818
819    def _builder(args: t.List):
820        return exp_class(
821            this=seq_get(args, 0),
822            format=Dialect[dialect].format_time(
823                seq_get(args, 1)
824                or (Dialect[dialect].TIME_FORMAT if default is True else default or None)
825            ),
826        )
827
828    return _builder

Helper used for time expressions.

Arguments:
  • exp_class: the expression class to instantiate.
  • dialect: target sql dialect.
  • default: the default format, True being time.
Returns:

A callable that can be used to return the appropriately formatted time expression.

def time_format( dialect: Union[str, Dialect, Type[Dialect], NoneType] = None) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.UnixToStr | sqlglot.expressions.StrToUnix], Optional[str]]:
831def time_format(
832    dialect: DialectType = None,
833) -> t.Callable[[Generator, exp.UnixToStr | exp.StrToUnix], t.Optional[str]]:
834    def _time_format(self: Generator, expression: exp.UnixToStr | exp.StrToUnix) -> t.Optional[str]:
835        """
836        Returns the time format for a given expression, unless it's equivalent
837        to the default time format of the dialect of interest.
838        """
839        time_format = self.format_time(expression)
840        return time_format if time_format != Dialect.get_or_raise(dialect).TIME_FORMAT else None
841
842    return _time_format
def build_date_delta( exp_class: Type[~E], unit_mapping: Optional[Dict[str, str]] = None) -> Callable[[List], ~E]:
845def build_date_delta(
846    exp_class: t.Type[E], unit_mapping: t.Optional[t.Dict[str, str]] = None
847) -> t.Callable[[t.List], E]:
848    def _builder(args: t.List) -> E:
849        unit_based = len(args) == 3
850        this = args[2] if unit_based else seq_get(args, 0)
851        unit = args[0] if unit_based else exp.Literal.string("DAY")
852        unit = exp.var(unit_mapping.get(unit.name.lower(), unit.name)) if unit_mapping else unit
853        return exp_class(this=this, expression=seq_get(args, 1), unit=unit)
854
855    return _builder
def build_date_delta_with_interval(expression_class: Type[~E]) -> Callable[[List], Optional[~E]]:
858def build_date_delta_with_interval(
859    expression_class: t.Type[E],
860) -> t.Callable[[t.List], t.Optional[E]]:
861    def _builder(args: t.List) -> t.Optional[E]:
862        if len(args) < 2:
863            return None
864
865        interval = args[1]
866
867        if not isinstance(interval, exp.Interval):
868            raise ParseError(f"INTERVAL expression expected but got '{interval}'")
869
870        expression = interval.this
871        if expression and expression.is_string:
872            expression = exp.Literal.number(expression.this)
873
874        return expression_class(this=args[0], expression=expression, unit=unit_to_str(interval))
875
876    return _builder
def date_trunc_to_time( args: List) -> sqlglot.expressions.DateTrunc | sqlglot.expressions.TimestampTrunc:
879def date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc:
880    unit = seq_get(args, 0)
881    this = seq_get(args, 1)
882
883    if isinstance(this, exp.Cast) and this.is_type("date"):
884        return exp.DateTrunc(unit=unit, this=this)
885    return exp.TimestampTrunc(this=this, unit=unit)
def date_add_interval_sql( data_type: str, kind: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.Expression], str]:
888def date_add_interval_sql(
889    data_type: str, kind: str
890) -> t.Callable[[Generator, exp.Expression], str]:
891    def func(self: Generator, expression: exp.Expression) -> str:
892        this = self.sql(expression, "this")
893        interval = exp.Interval(this=expression.expression, unit=unit_to_var(expression))
894        return f"{data_type}_{kind}({this}, {self.sql(interval)})"
895
896    return func
def timestamptrunc_sql( zone: bool = False) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.TimestampTrunc], str]:
899def timestamptrunc_sql(zone: bool = False) -> t.Callable[[Generator, exp.TimestampTrunc], str]:
900    def _timestamptrunc_sql(self: Generator, expression: exp.TimestampTrunc) -> str:
901        args = [unit_to_str(expression), expression.this]
902        if zone:
903            args.append(expression.args.get("zone"))
904        return self.func("DATE_TRUNC", *args)
905
906    return _timestamptrunc_sql
def no_timestamp_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Timestamp) -> str:
909def no_timestamp_sql(self: Generator, expression: exp.Timestamp) -> str:
910    if not expression.expression:
911        from sqlglot.optimizer.annotate_types import annotate_types
912
913        target_type = annotate_types(expression).type or exp.DataType.Type.TIMESTAMP
914        return self.sql(exp.cast(expression.this, target_type))
915    if expression.text("expression").lower() in TIMEZONES:
916        return self.sql(
917            exp.AtTimeZone(
918                this=exp.cast(expression.this, exp.DataType.Type.TIMESTAMP),
919                zone=expression.expression,
920            )
921        )
922    return self.func("TIMESTAMP", expression.this, expression.expression)
def locate_to_strposition(args: List) -> sqlglot.expressions.Expression:
925def locate_to_strposition(args: t.List) -> exp.Expression:
926    return exp.StrPosition(
927        this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2)
928    )
def strposition_to_locate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.StrPosition) -> str:
931def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str:
932    return self.func(
933        "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position")
934    )
def left_to_substring_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Left) -> str:
937def left_to_substring_sql(self: Generator, expression: exp.Left) -> str:
938    return self.sql(
939        exp.Substring(
940            this=expression.this, start=exp.Literal.number(1), length=expression.expression
941        )
942    )
def right_to_substring_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Left) -> str:
945def right_to_substring_sql(self: Generator, expression: exp.Left) -> str:
946    return self.sql(
947        exp.Substring(
948            this=expression.this,
949            start=exp.Length(this=expression.this) - exp.paren(expression.expression - 1),
950        )
951    )
def timestrtotime_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.TimeStrToTime) -> str:
954def timestrtotime_sql(self: Generator, expression: exp.TimeStrToTime) -> str:
955    return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP))
def datestrtodate_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.DateStrToDate) -> str:
958def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str:
959    return self.sql(exp.cast(expression.this, exp.DataType.Type.DATE))
def encode_decode_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Expression, name: str, replace: bool = True) -> str:
963def encode_decode_sql(
964    self: Generator, expression: exp.Expression, name: str, replace: bool = True
965) -> str:
966    charset = expression.args.get("charset")
967    if charset and charset.name.lower() != "utf-8":
968        self.unsupported(f"Expected utf-8 character set, got {charset}.")
969
970    return self.func(name, expression.this, expression.args.get("replace") if replace else None)
def min_or_least( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Min) -> str:
973def min_or_least(self: Generator, expression: exp.Min) -> str:
974    name = "LEAST" if expression.expressions else "MIN"
975    return rename_func(name)(self, expression)
def max_or_greatest( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Max) -> str:
978def max_or_greatest(self: Generator, expression: exp.Max) -> str:
979    name = "GREATEST" if expression.expressions else "MAX"
980    return rename_func(name)(self, expression)
def count_if_to_sum( self: sqlglot.generator.Generator, expression: sqlglot.expressions.CountIf) -> str:
983def count_if_to_sum(self: Generator, expression: exp.CountIf) -> str:
984    cond = expression.this
985
986    if isinstance(expression.this, exp.Distinct):
987        cond = expression.this.expressions[0]
988        self.unsupported("DISTINCT is not supported when converting COUNT_IF to SUM")
989
990    return self.func("sum", exp.func("if", cond, 1, 0))
def trim_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Trim) -> str:
 993def trim_sql(self: Generator, expression: exp.Trim) -> str:
 994    target = self.sql(expression, "this")
 995    trim_type = self.sql(expression, "position")
 996    remove_chars = self.sql(expression, "expression")
 997    collation = self.sql(expression, "collation")
 998
 999    # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific
1000    if not remove_chars and not collation:
1001        return self.trim_sql(expression)
1002
1003    trim_type = f"{trim_type} " if trim_type else ""
1004    remove_chars = f"{remove_chars} " if remove_chars else ""
1005    from_part = "FROM " if trim_type or remove_chars else ""
1006    collation = f" COLLATE {collation}" if collation else ""
1007    return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
def str_to_time_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Expression) -> str:
1010def str_to_time_sql(self: Generator, expression: exp.Expression) -> str:
1011    return self.func("STRPTIME", expression.this, self.format_time(expression))
def concat_to_dpipe_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Concat) -> str:
1014def concat_to_dpipe_sql(self: Generator, expression: exp.Concat) -> str:
1015    return self.sql(reduce(lambda x, y: exp.DPipe(this=x, expression=y), expression.expressions))
def concat_ws_to_dpipe_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ConcatWs) -> str:
1018def concat_ws_to_dpipe_sql(self: Generator, expression: exp.ConcatWs) -> str:
1019    delim, *rest_args = expression.expressions
1020    return self.sql(
1021        reduce(
1022            lambda x, y: exp.DPipe(this=x, expression=exp.DPipe(this=delim, expression=y)),
1023            rest_args,
1024        )
1025    )
def regexp_extract_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.RegexpExtract) -> str:
1028def regexp_extract_sql(self: Generator, expression: exp.RegexpExtract) -> str:
1029    bad_args = list(filter(expression.args.get, ("position", "occurrence", "parameters")))
1030    if bad_args:
1031        self.unsupported(f"REGEXP_EXTRACT does not support the following arg(s): {bad_args}")
1032
1033    return self.func(
1034        "REGEXP_EXTRACT", expression.this, expression.expression, expression.args.get("group")
1035    )
def regexp_replace_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.RegexpReplace) -> str:
1038def regexp_replace_sql(self: Generator, expression: exp.RegexpReplace) -> str:
1039    bad_args = list(filter(expression.args.get, ("position", "occurrence", "modifiers")))
1040    if bad_args:
1041        self.unsupported(f"REGEXP_REPLACE does not support the following arg(s): {bad_args}")
1042
1043    return self.func(
1044        "REGEXP_REPLACE", expression.this, expression.expression, expression.args["replacement"]
1045    )
def pivot_column_names( aggregations: List[sqlglot.expressions.Expression], dialect: Union[str, Dialect, Type[Dialect], NoneType]) -> List[str]:
1048def pivot_column_names(aggregations: t.List[exp.Expression], dialect: DialectType) -> t.List[str]:
1049    names = []
1050    for agg in aggregations:
1051        if isinstance(agg, exp.Alias):
1052            names.append(agg.alias)
1053        else:
1054            """
1055            This case corresponds to aggregations without aliases being used as suffixes
1056            (e.g. col_avg(foo)). We need to unquote identifiers because they're going to
1057            be quoted in the base parser's `_parse_pivot` method, due to `to_identifier`.
1058            Otherwise, we'd end up with `col_avg(`foo`)` (notice the double quotes).
1059            """
1060            agg_all_unquoted = agg.transform(
1061                lambda node: (
1062                    exp.Identifier(this=node.name, quoted=False)
1063                    if isinstance(node, exp.Identifier)
1064                    else node
1065                )
1066            )
1067            names.append(agg_all_unquoted.sql(dialect=dialect, normalize_functions="lower"))
1068
1069    return names
def binary_from_function(expr_type: Type[~B]) -> Callable[[List], ~B]:
1072def binary_from_function(expr_type: t.Type[B]) -> t.Callable[[t.List], B]:
1073    return lambda args: expr_type(this=seq_get(args, 0), expression=seq_get(args, 1))
def build_timestamp_trunc(args: List) -> sqlglot.expressions.TimestampTrunc:
1077def build_timestamp_trunc(args: t.List) -> exp.TimestampTrunc:
1078    return exp.TimestampTrunc(this=seq_get(args, 1), unit=seq_get(args, 0))
def any_value_to_max_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.AnyValue) -> str:
1081def any_value_to_max_sql(self: Generator, expression: exp.AnyValue) -> str:
1082    return self.func("MAX", expression.this)
def bool_xor_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Xor) -> str:
1085def bool_xor_sql(self: Generator, expression: exp.Xor) -> str:
1086    a = self.sql(expression.left)
1087    b = self.sql(expression.right)
1088    return f"({a} AND (NOT {b})) OR ((NOT {a}) AND {b})"
def is_parse_json(expression: sqlglot.expressions.Expression) -> bool:
1091def is_parse_json(expression: exp.Expression) -> bool:
1092    return isinstance(expression, exp.ParseJSON) or (
1093        isinstance(expression, exp.Cast) and expression.is_type("json")
1094    )
def isnull_to_is_null(args: List) -> sqlglot.expressions.Expression:
1097def isnull_to_is_null(args: t.List) -> exp.Expression:
1098    return exp.Paren(this=exp.Is(this=seq_get(args, 0), expression=exp.null()))
def generatedasidentitycolumnconstraint_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.GeneratedAsIdentityColumnConstraint) -> str:
1101def generatedasidentitycolumnconstraint_sql(
1102    self: Generator, expression: exp.GeneratedAsIdentityColumnConstraint
1103) -> str:
1104    start = self.sql(expression, "start") or "1"
1105    increment = self.sql(expression, "increment") or "1"
1106    return f"IDENTITY({start}, {increment})"
def arg_max_or_min_no_count( name: str) -> Callable[[sqlglot.generator.Generator, sqlglot.expressions.ArgMax | sqlglot.expressions.ArgMin], str]:
1109def arg_max_or_min_no_count(name: str) -> t.Callable[[Generator, exp.ArgMax | exp.ArgMin], str]:
1110    def _arg_max_or_min_sql(self: Generator, expression: exp.ArgMax | exp.ArgMin) -> str:
1111        if expression.args.get("count"):
1112            self.unsupported(f"Only two arguments are supported in function {name}.")
1113
1114        return self.func(name, expression.this, expression.expression)
1115
1116    return _arg_max_or_min_sql
def ts_or_ds_add_cast( expression: sqlglot.expressions.TsOrDsAdd) -> sqlglot.expressions.TsOrDsAdd:
1119def ts_or_ds_add_cast(expression: exp.TsOrDsAdd) -> exp.TsOrDsAdd:
1120    this = expression.this.copy()
1121
1122    return_type = expression.return_type
1123    if return_type.is_type(exp.DataType.Type.DATE):
1124        # If we need to cast to a DATE, we cast to TIMESTAMP first to make sure we
1125        # can truncate timestamp strings, because some dialects can't cast them to DATE
1126        this = exp.cast(this, exp.DataType.Type.TIMESTAMP)
1127
1128    expression.this.replace(exp.cast(this, return_type))
1129    return expression
def date_delta_sql( name: str, cast: bool = False) -> Callable[[sqlglot.generator.Generator, Union[sqlglot.expressions.DateAdd, sqlglot.expressions.TsOrDsAdd, sqlglot.expressions.DateDiff, sqlglot.expressions.TsOrDsDiff]], str]:
1132def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE_ADD_OR_DIFF], str]:
1133    def _delta_sql(self: Generator, expression: DATE_ADD_OR_DIFF) -> str:
1134        if cast and isinstance(expression, exp.TsOrDsAdd):
1135            expression = ts_or_ds_add_cast(expression)
1136
1137        return self.func(
1138            name,
1139            unit_to_var(expression),
1140            expression.expression,
1141            expression.this,
1142        )
1143
1144    return _delta_sql
def unit_to_str( expression: sqlglot.expressions.Expression, default: str = 'DAY') -> Optional[sqlglot.expressions.Expression]:
1147def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1148    unit = expression.args.get("unit")
1149
1150    if isinstance(unit, exp.Placeholder):
1151        return unit
1152    if unit:
1153        return exp.Literal.string(unit.name)
1154    return exp.Literal.string(default) if default else None
def unit_to_var( expression: sqlglot.expressions.Expression, default: str = 'DAY') -> Optional[sqlglot.expressions.Expression]:
1157def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1158    unit = expression.args.get("unit")
1159
1160    if isinstance(unit, (exp.Var, exp.Placeholder)):
1161        return unit
1162    return exp.Var(this=default) if default else None
def map_date_part( part, dialect: Union[str, Dialect, Type[Dialect], NoneType] = <class 'Dialect'>):
1177def map_date_part(part, dialect: DialectType = Dialect):
1178    mapped = (
1179        Dialect.get_or_raise(dialect).DATE_PART_MAPPING.get(part.name.upper()) if part else None
1180    )
1181    return exp.var(mapped) if mapped else part
def no_last_day_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.LastDay) -> str:
1184def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str:
1185    trunc_curr_date = exp.func("date_trunc", "month", expression.this)
1186    plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month")
1187    minus_one_day = exp.func("date_sub", plus_one_month, 1, "day")
1188
1189    return self.sql(exp.cast(minus_one_day, exp.DataType.Type.DATE))
def merge_without_target_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.Merge) -> str:
1192def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str:
1193    """Remove table refs from columns in when statements."""
1194    alias = expression.this.args.get("alias")
1195
1196    def normalize(identifier: t.Optional[exp.Identifier]) -> t.Optional[str]:
1197        return self.dialect.normalize_identifier(identifier).name if identifier else None
1198
1199    targets = {normalize(expression.this.this)}
1200
1201    if alias:
1202        targets.add(normalize(alias.this))
1203
1204    for when in expression.expressions:
1205        when.transform(
1206            lambda node: (
1207                exp.column(node.this)
1208                if isinstance(node, exp.Column) and normalize(node.args.get("table")) in targets
1209                else node
1210            ),
1211            copy=False,
1212        )
1213
1214    return self.merge_sql(expression)

Remove table refs from columns in when statements.

def build_json_extract_path( expr_type: Type[~F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False) -> Callable[[List], ~F]:
1217def build_json_extract_path(
1218    expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False
1219) -> t.Callable[[t.List], F]:
1220    def _builder(args: t.List) -> F:
1221        segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
1222        for arg in args[1:]:
1223            if not isinstance(arg, exp.Literal):
1224                # We use the fallback parser because we can't really transpile non-literals safely
1225                return expr_type.from_arg_list(args)
1226
1227            text = arg.name
1228            if is_int(text):
1229                index = int(text)
1230                segments.append(
1231                    exp.JSONPathSubscript(this=index if zero_based_indexing else index - 1)
1232                )
1233            else:
1234                segments.append(exp.JSONPathKey(this=text))
1235
1236        # This is done to avoid failing in the expression validator due to the arg count
1237        del args[2:]
1238        return expr_type(
1239            this=seq_get(args, 0),
1240            expression=exp.JSONPath(expressions=segments),
1241            only_json_types=arrow_req_json_type,
1242        )
1243
1244    return _builder
def json_extract_segments( name: str, quoted_index: bool = True, op: Optional[str] = None) -> Callable[[sqlglot.generator.Generator, Union[sqlglot.expressions.JSONExtract, sqlglot.expressions.JSONExtractScalar]], str]:
1247def json_extract_segments(
1248    name: str, quoted_index: bool = True, op: t.Optional[str] = None
1249) -> t.Callable[[Generator, JSON_EXTRACT_TYPE], str]:
1250    def _json_extract_segments(self: Generator, expression: JSON_EXTRACT_TYPE) -> str:
1251        path = expression.expression
1252        if not isinstance(path, exp.JSONPath):
1253            return rename_func(name)(self, expression)
1254
1255        segments = []
1256        for segment in path.expressions:
1257            path = self.sql(segment)
1258            if path:
1259                if isinstance(segment, exp.JSONPathPart) and (
1260                    quoted_index or not isinstance(segment, exp.JSONPathSubscript)
1261                ):
1262                    path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}"
1263
1264                segments.append(path)
1265
1266        if op:
1267            return f" {op} ".join([self.sql(expression.this), *segments])
1268        return self.func(name, expression.this, *segments)
1269
1270    return _json_extract_segments
def json_path_key_only_name( self: sqlglot.generator.Generator, expression: sqlglot.expressions.JSONPathKey) -> str:
1273def json_path_key_only_name(self: Generator, expression: exp.JSONPathKey) -> str:
1274    if isinstance(expression.this, exp.JSONPathWildcard):
1275        self.unsupported("Unsupported wildcard in JSONPathKey expression")
1276
1277    return expression.name
def filter_array_using_unnest( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ArrayFilter) -> str:
1280def filter_array_using_unnest(self: Generator, expression: exp.ArrayFilter) -> str:
1281    cond = expression.expression
1282    if isinstance(cond, exp.Lambda) and len(cond.expressions) == 1:
1283        alias = cond.expressions[0]
1284        cond = cond.this
1285    elif isinstance(cond, exp.Predicate):
1286        alias = "_u"
1287    else:
1288        self.unsupported("Unsupported filter condition")
1289        return ""
1290
1291    unnest = exp.Unnest(expressions=[expression.this])
1292    filtered = exp.select(alias).from_(exp.alias_(unnest, None, table=[alias])).where(cond)
1293    return self.sql(exp.Array(expressions=[filtered]))
def to_number_with_nls_param( self: sqlglot.generator.Generator, expression: sqlglot.expressions.ToNumber) -> str:
1296def to_number_with_nls_param(self: Generator, expression: exp.ToNumber) -> str:
1297    return self.func(
1298        "TO_NUMBER",
1299        expression.this,
1300        expression.args.get("format"),
1301        expression.args.get("nlsparam"),
1302    )
def build_default_decimal_type( precision: Optional[int] = None, scale: Optional[int] = None) -> Callable[[sqlglot.expressions.DataType], sqlglot.expressions.DataType]:
1305def build_default_decimal_type(
1306    precision: t.Optional[int] = None, scale: t.Optional[int] = None
1307) -> t.Callable[[exp.DataType], exp.DataType]:
1308    def _builder(dtype: exp.DataType) -> exp.DataType:
1309        if dtype.expressions or precision is None:
1310            return dtype
1311
1312        params = f"{precision}{f', {scale}' if scale is not None else ''}"
1313        return exp.DataType.build(f"DECIMAL({params})")
1314
1315    return _builder
def build_timestamp_from_parts(args: List) -> sqlglot.expressions.Func:
1318def build_timestamp_from_parts(args: t.List) -> exp.Func:
1319    if len(args) == 2:
1320        # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept,
1321        # so we parse this into Anonymous for now instead of introducing complexity
1322        return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args)
1323
1324    return exp.TimestampFromParts.from_arg_list(args)
def sha256_sql( self: sqlglot.generator.Generator, expression: sqlglot.expressions.SHA2) -> str:
1327def sha256_sql(self: Generator, expression: exp.SHA2) -> str:
1328    return self.func(f"SHA{expression.text('length') or '256'}", expression.this)