Source code for jsonpolars.base_expr

# -*- coding: utf-8 -*-

import typing as T
import enum
import dataclasses

import polars as pl

from .arg import REQ, rm_na, T_KWARGS
from .model import BaseModel

if T.TYPE_CHECKING:  # pragma: no cover
    from .expr.api import T_EXPR


[docs]class ExprEnum(str, enum.Enum): # Aggregation agg = "agg" agg_groups = "agg_groups" arg_max = "arg_max" arg_min = "arg_min" agg_count = "agg_count" agg_first = "agg_first" agg_implode = "agg_implode" agg_last = "agg_last" agg_len = "agg_len" agg_max = "agg_max" agg_mean = "agg_mean" agg_median = "agg_median" agg_min = "agg_min" agg_nan_max = "agg_nan_max" agg_nan_min = "agg_nan_min" agg_product = "agg_product" agg_quantile = "agg_quantile" agg_std = "agg_std" agg_sum = "agg_sum" agg_var = "agg_var" # Array arr = "arr" arr_max = "arr_max" arr_min = "arr_min" arr_median = "arr_median" arr_sum = "arr_sum" arr_std = "arr_std" arr_to_list = "arr_to_list" arr_unique = "arr_unique" arr_n_unique = "arr_n_unique" arr_var = "arr_var" arr_all = "arr_all" arr_any = "arr_any" arr_sort = "arr_sort" arr_reverse = "arr_reverse" arr_arg_min = "arr_arg_min" arr_arg_max = "arr_arg_max" arr_get = "arr_get" arr_first = "arr_first" arr_last = "arr_last" arr_join = "arr_join" arr_explode = "arr_explode" arr_contains = "arr_contains" arr_count_matches = "arr_count_matches" arr_to_struct = "arr_to_struct" arr_shift = "arr_shift" # Binary binary = "binary" binary_contains = "binary_contains" binary_decode = "binary_decode" binary_encode = "binary_encode" binary_ends_with = "binary_ends_with" binary_size = "binary_size" binary_starts_with = "binary_starts_with" # Boolean # Categories # Columns / names column = "column" alias = "alias" # Computation # Functions func_all = "func_all" func_all_horizontal = "func_all_horizontal" func_any = "func_any" func_any_horizontal = "func_any_horizontal" func_approx_n_unique = "func_approx_n_unique" func_arange = "func_arange" func_arctan2 = "func_arctan2" func_arctan2d = "func_arctan2d" func_arg_sort_by = "func_arg_sort_by" func_arg_where = "func_arg_where" func_business_day_count = "func_business_day_count" func_coalesce = "func_coalesce" func_concat_list = "func_concat_list" func_concat_str = "func_concat_str" func_corr = "func_corr" func_count = "func_count" func_cov = "func_cov" func_cum_count = "func_cum_count" func_cum_fold = "func_cum_fold" func_cum_reduce = "func_cum_reduce" func_cum_sum = "func_cum_sum" func_cum_sum_horizontal = "func_cum_sum_horizontal" func_date = "func_date" func_datetime = "func_datetime" func_date_range = "func_date_range" func_date_ranges = "func_date_ranges" func_datetime_range = "func_datetime_range" func_datetime_ranges = "func_datetime_ranges" func_duration = "func_duration" func_element = "func_element" func_exclude = "func_exclude" func_first = "func_first" func_fold = "func_fold" func_format = "func_format" func_from_epoch = "func_from_epoch" func_groups = "func_groups" func_head = "func_head" func_implode = "func_implode" func_int_range = "func_int_range" func_int_ranges = "func_int_ranges" func_last = "func_last" func_len = "func_len" func_lit = "func_lit" func_map_batches = "func_map_batches" func_map_groups = "func_map_groups" func_max = "func_max" func_max_horizontal = "func_max_horizontal" func_mean = "func_mean" func_mean_horizontal = "func_mean_horizontal" func_median = "func_median" func_min = "func_min" func_min_horizontal = "func_min_horizontal" func_n_unique = "func_n_unique" func_nth = "func_nth" func_ones = "func_ones" func_quantile = "func_quantile" func_reduce = "func_reduce" func_repeat = "func_repeat" func_rolling_corr = "func_rolling_corr" func_rolling_cov = "func_rolling_cov" func_select = "func_select" func_std = "func_std" func_struct = "func_struct" func_sum = "func_sum" func_sum_horizontal = "func_sum_horizontal" func_sql = "func_sql" func_sql_expr = "func_sql_expr" func_tail = "func_tail" func_time = "func_time" func_time_range = "func_time_range" func_time_ranges = "func_time_ranges" func_var = "func_var" func_when = "func_when" func_zeros = "func_zeros" # plus = "plus" # minus = "minus" # multiple = "multiple" # divide = "divide" # List list = "list" list_all = "list_all" list_any = "list_any" list_drop_nulls = "list_drop_nulls" list_arg_max = "list_arg_max" list_arg_min = "list_arg_min" list_concat = "list_concat" list_contains = "list_contains" list_count_matches = "list_count_matches" list_diff = "list_diff" list_eval = "list_eval" list_explode = "list_explode" list_first = "list_first" list_gather = "list_gather" list_get = "list_get" list_head = "list_head" list_join = "list_join" list_last = "list_last" list_len = "list_len" list_max = "list_max" list_mean = "list_mean" list_median = "list_median" list_min = "list_min" list_reverse = "list_reverse" list_sample = "list_sample" list_set_difference = "list_set_difference" list_set_intersection = "list_set_intersection" list_set_symmetric_difference = "list_set_symmetric_difference" list_set_union = "list_set_union" list_shift = "list_shift" list_slice = "list_slice" list_sort = "list_sort" list_std = "list_std" list_sum = "list_sum" list_tail = "list_tail" list_to_array = "list_to_array" list_to_struct = "list_to_struct" list_unique = "list_unique" list_n_unique = "list_n_unique" list_var = "list_var" list_gather_every = "list_gather_every" # Manipulation / selection append = "append" arg_sort = "arg_sort" arg_true = "arg_true" backward_fill = "backward_fill" bottom_k = "bottom_k" bottom_k_by = "bottom_k_by" cast = "cast" ceil = "ceil" clip = "clip" cut = "cut" drop_nans = "drop_nans" drop_nulls = "drop_nulls" explode = "explode" extend_constant = "extend_constant" fill_nan = "fill_nan" fill_null = "fill_null" filter = "filter" flatten = "flatten" floor = "floor" forward_fill = "forward_fill" gather = "gather" gather_every = "gather_every" get = "get" head = "head" inspect = "inspect" interpolate = "interpolate" interpolate_by = "interpolate_by" limit = "limit" lower_bound = "lower_bound" pipe = "pipe" qcut = "qcut" rechunk = "rechunk" reinterpret = "reinterpret" repeat_by = "repeat_by" replace = "replace" replace_strict = "replace_strict" reshape = "reshape" reverse = "reverse" rle = "rle" rle_id = "rle_id" round = "round" round_sig_figs = "round_sig_figs" sample = "sample" shift = "shift" shrink_dtype = "shrink_dtype" shuffle = "shuffle" slice = "slice" sort = "sort" sort_by = "sort_by" tail = "tail" to_physical = "to_physical" top_k = "top_k" top_k_by = "top_k_by" upper_bound = "upper_bound" where = "where" # Meta # Miscellaneous # Name # Operators and_ = "and" or_ = "or" eq = "eq" eq_missing = "eq_missing" ge = "ge" gt = "gt" le = "le" lt = "lt" ne = "ne" ne_missing = "ne_missing" add = "add" floordiv = "floordiv" mod = "mod" mul = "mul" neg = "neg" sub = "sub" truediv = "truediv" pow = "pow" xor = "xor" # String string = "string" str_concat = "str_concat" str_contains = "str_contains" str_contains_any = "str_contains_any" str_count_matches = "str_count_matches" str_decode = "str_decode" str_encode = "str_encode" str_ends_with = "str_ends_with" str_explode = "str_explode" str_extract = "str_extract" str_extract_all = "str_extract_all" str_extract_groups = "str_extract_groups" str_extract_many = "str_extract_many" str_find = "str_find" str_head = "str_head" str_join = "str_join" str_json_decode = "str_json_decode" str_json_path_match = "str_json_path_match" str_len_bytes = "str_len_bytes" str_len_chars = "str_len_chars" str_pad_end = "str_pad_end" str_pad_start = "str_pad_start" str_replace = "str_replace" str_replace_all = "str_replace_all" str_replace_many = "str_replace_many" str_reverse = "str_reverse" str_slice = "str_slice" str_split = "str_split" str_split_exact = "str_split_exact" str_splitn = "str_splitn" str_starts_with = "str_starts_with" str_strip_chars = "str_strip_chars" str_strip_chars_start = "str_strip_chars_start" str_strip_chars_end = "str_strip_chars_end" str_strip_prefix = "str_strip_prefix" str_strip_suffix = "str_strip_suffix" str_strptime = "str_strptime" str_tail = "str_tail" str_to_date = "str_to_date" str_to_datetime = "str_to_datetime" str_to_decimal = "str_to_decimal" str_to_integer = "str_to_integer" str_to_lowercase = "str_to_lowercase" str_to_titlecase = "str_to_titlecase" str_to_time = "str_to_time" str_to_uppercase = "str_to_uppercase" str_zfill = "str_zfill" # Struct struct = "struct" func_field = "func_field" struct_field = "struct_field" struct_json_encode = "struct_json_encode" struct_rename_fields = "struct_rename_fields" struct_with_fields = "struct_with_fields" # Temporal dt = "datetime" dt_add_business_days = "dt_add_business_days" dt_base_utc_offset = "dt_base_utc_offset" dt_cast_time_unit = "dt_cast_time_unit" dt_century = "dt_century" dt_combine = "dt_combine" dt_convert_time_zone = "dt_convert_time_zone" dt_date = "dt_date" dt_datetime = "dt_datetime" dt_day = "dt_day" dt_dst_offset = "dt_dst_offset" dt_epoch = "dt_epoch" dt_hour = "dt_hour" dt_is_leap_year = "dt_is_leap_year" dt_iso_year = "dt_iso_year" dt_microsecond = "dt_microsecond" dt_millennium = "dt_millennium" dt_millisecond = "dt_millisecond" dt_minute = "dt_minute" dt_month = "dt_month" dt_month_end = "dt_month_end" dt_month_start = "dt_month_start" dt_nanosecond = "dt_nanosecond" dt_offset_by = "dt_offset_by" dt_ordinal_day = "dt_ordinal_day" dt_quarter = "dt_quarter" dt_replace_time_zone = "dt_replace_time_zone" dt_round = "dt_round" dt_second = "dt_second" dt_strftime = "dt_strftime" dt_time = "dt_time" dt_timestamp = "dt_timestamp" dt_to_string = "dt_to_string" dt_total_days = "dt_total_days" dt_total_hours = "dt_total_hours" dt_total_microseconds = "dt_total_microseconds" dt_total_milliseconds = "dt_total_milliseconds" dt_total_minutes = "dt_total_minutes" dt_total_nanoseconds = "dt_total_nanoseconds" dt_total_seconds = "dt_total_seconds" dt_truncate = "dt_truncate" dt_week = "dt_week" dt_weekday = "dt_weekday" dt_with_time_unit = "dt_with_time_unit" dt_year = "dt_year"
# Window
[docs]def to_dict(inst) -> T_KWARGS: """ Convert an instance of ``BaseExpr`` to a dict. This dict can be used in ``BaseExpr.from_dict`` method to create a identical instance of the original ``BaseExpr`` instance. """ if isinstance(inst, BaseExpr): return inst.to_dict() elif isinstance(inst, (tuple, list)): return type(inst)([to_dict(v) for v in inst]) elif isinstance(inst, dict): kwargs = {k: to_dict(v) for k, v in inst.items()} return rm_na(**kwargs) else: return inst
[docs]@dataclasses.dataclass class BaseExpr(BaseModel): type: str = dataclasses.field(default=REQ) def to_dict(self) -> T_KWARGS: kwargs = dict() for field in dataclasses.fields(self.__class__): value = getattr(self, field.name) kwargs[field.name] = to_dict(value) return rm_na(**kwargs)
[docs] @classmethod def from_dict(cls, dct: T_KWARGS): """ Create an instance of ``BaseExpr`` from either a human created dict, or a dict created by the ``BaseExpr.to_dict`` method. """ req_kwargs, opt_kwargs = cls._split_req_opt(dct) return cls(**req_kwargs, **rm_na(**opt_kwargs))
def to_polars(self) -> pl.Expr: raise NotImplementedError()
expr_enum_to_klass_mapping: T.Dict[str, T.Type["T_EXPR"]] = dict()
[docs]def parse_expr(dct: T.Dict[str, T.Any]) -> "T_EXPR": """ Note: you have to import everything in the :mod:`jsonpolars.expr` module to make this work. """ return expr_enum_to_klass_mapping[dct["type"]].from_dict(dct)