Source code for jsonpolars.base_expr
# -*- coding: utf-8 -*-
import typing as T
import enum
import dataclasses
import polars as pl
from .vendor.better_dataclasses import DataClass
from .sentinel import NOTHING, REQUIRED, OPTIONAL
if T.TYPE_CHECKING: # pragma: no cover
from .expr.api import T_EXPR
[docs]class ExprEnum(str, enum.Enum):
# Aggregation
agg = "agg"
agg_groups = "agg_groups"
arg_max = "arg_max"
arg_min = "arg_min"
agg_count = "agg_count"
agg_first = "agg_first"
agg_implode = "agg_implode"
agg_last = "agg_last"
agg_len = "agg_len"
agg_max = "agg_max"
agg_mean = "agg_mean"
agg_median = "agg_median"
agg_min = "agg_min"
agg_nan_max = "agg_nan_max"
agg_nan_min = "agg_nan_min"
agg_product = "agg_product"
agg_quantile = "agg_quantile"
agg_std = "agg_std"
agg_sum = "agg_sum"
agg_var = "agg_var"
# Array
arr = "arr"
arr_max = "arr_max"
arr_min = "arr_min"
arr_median = "arr_median"
arr_sum = "arr_sum"
arr_std = "arr_std"
arr_to_list = "arr_to_list"
arr_unique = "arr_unique"
arr_n_unique = "arr_n_unique"
arr_var = "arr_var"
arr_all = "arr_all"
arr_any = "arr_any"
arr_sort = "arr_sort"
arr_reverse = "arr_reverse"
arr_arg_min = "arr_arg_min"
arr_arg_max = "arr_arg_max"
arr_get = "arr_get"
arr_first = "arr_first"
arr_last = "arr_last"
arr_join = "arr_join"
arr_explode = "arr_explode"
arr_contains = "arr_contains"
arr_count_matches = "arr_count_matches"
arr_to_struct = "arr_to_struct"
arr_shift = "arr_shift"
# Binary
binary = "binary"
binary_contains = "binary_contains"
binary_decode = "binary_decode"
binary_encode = "binary_encode"
binary_ends_with = "binary_ends_with"
binary_size = "binary_size"
binary_starts_with = "binary_starts_with"
# Boolean
# Categories
# Columns / names
column = "column"
alias = "alias"
# Computation
# Functions
lit = "lit"
plus = "plus"
minus = "minus"
multiple = "multiple"
divide = "divide"
# List
list = "list"
list_all = "list_all"
list_any = "list_any"
list_drop_nulls = "list_drop_nulls"
list_arg_max = "list_arg_max"
list_arg_min = "list_arg_min"
list_concat = "list_concat"
list_contains = "list_contains"
list_count_matches = "list_count_matches"
list_diff = "list_diff"
list_eval = "list_eval"
list_explode = "list_explode"
list_first = "list_first"
list_gather = "list_gather"
list_get = "list_get"
list_head = "list_head"
list_join = "list_join"
list_last = "list_last"
list_len = "list_len"
list_max = "list_max"
list_mean = "list_mean"
list_median = "list_median"
list_min = "list_min"
list_reverse = "list_reverse"
list_sample = "list_sample"
list_set_difference = "list_set_difference"
list_set_intersection = "list_set_intersection"
list_set_symmetric_difference = "list_set_symmetric_difference"
list_set_union = "list_set_union"
list_shift = "list_shift"
list_slice = "list_slice"
list_sort = "list_sort"
list_std = "list_std"
list_sum = "list_sum"
list_tail = "list_tail"
list_to_array = "list_to_array"
list_to_struct = "list_to_struct"
list_unique = "list_unique"
list_n_unique = "list_n_unique"
list_var = "list_var"
list_gather_every = "list_gather_every"
# Manipulation / selection
append = "append"
arg_sort = "arg_sort"
arg_true = "arg_true"
backward_fill = "backward_fill"
bottom_k = "bottom_k"
bottom_k_by = "bottom_k_by"
cast = "cast"
ceil = "ceil"
clip = "clip"
cut = "cut"
drop_nans = "drop_nans"
drop_nulls = "drop_nulls"
explode = "explode"
extend_constant = "extend_constant"
fill_nan = "fill_nan"
fill_null = "fill_null"
filter = "filter"
flatten = "flatten"
floor = "floor"
forward_fill = "forward_fill"
gather = "gather"
gather_every = "gather_every"
get = "get"
head = "head"
inspect = "inspect"
interpolate = "interpolate"
interpolate_by = "interpolate_by"
limit = "limit"
lower_bound = "lower_bound"
pipe = "pipe"
qcut = "qcut"
rechunk = "rechunk"
reinterpret = "reinterpret"
repeat_by = "repeat_by"
replace = "replace"
replace_strict = "replace_strict"
reshape = "reshape"
reverse = "reverse"
rle = "rle"
rle_id = "rle_id"
round = "round"
round_sig_figs = "round_sig_figs"
sample = "sample"
shift = "shift"
shrink_dtype = "shrink_dtype"
shuffle = "shuffle"
slice = "slice"
sort = "sort"
sort_by = "sort_by"
tail = "tail"
to_physical = "to_physical"
top_k = "top_k"
top_k_by = "top_k_by"
upper_bound = "upper_bound"
where = "where"
# Meta
# Miscellaneous
# Name
# Operators
and_ = "and"
or_ = "or"
eq = "eq"
eq_missing = "eq_missing"
ge = "ge"
gt = "gt"
le = "le"
lt = "lt"
ne = "ne"
ne_missing = "ne_missing"
add = "add"
floordiv = "floordiv"
mod = "mod"
mul = "mul"
neg = "neg"
sub = "sub"
truediv = "truediv"
pow = "pow"
xor = "xor"
# String
string = "string"
str_concat = "str_concat"
str_contains = "str_contains"
str_contains_any = "str_contains_any"
str_count_matches = "str_count_matches"
str_decode = "str_decode"
str_encode = "str_encode"
str_ends_with = "str_ends_with"
str_explode = "str_explode"
str_extract = "str_extract"
str_extract_all = "str_extract_all"
str_extract_groups = "str_extract_groups"
str_extract_many = "str_extract_many"
str_find = "str_find"
str_head = "str_head"
str_join = "str_join"
str_json_decode = "str_json_decode"
str_json_path_match = "str_json_path_match"
str_len_bytes = "str_len_bytes"
str_len_chars = "str_len_chars"
str_pad_end = "str_pad_end"
str_pad_start = "str_pad_start"
str_replace = "str_replace"
str_replace_all = "str_replace_all"
str_replace_many = "str_replace_many"
str_reverse = "str_reverse"
str_slice = "str_slice"
str_split = "str_split"
str_split_exact = "str_split_exact"
str_splitn = "str_splitn"
str_starts_with = "str_starts_with"
str_strip_chars = "str_strip_chars"
str_strip_chars_start = "str_strip_chars_start"
str_strip_chars_end = "str_strip_chars_end"
str_strip_prefix = "str_strip_prefix"
str_strip_suffix = "str_strip_suffix"
str_strptime = "str_strptime"
str_tail = "str_tail"
str_to_date = "str_to_date"
str_to_datetime = "str_to_datetime"
str_to_decimal = "str_to_decimal"
str_to_integer = "str_to_integer"
str_to_lowercase = "str_to_lowercase"
str_to_titlecase = "str_to_titlecase"
str_to_time = "str_to_time"
str_to_uppercase = "str_to_uppercase"
str_zfill = "str_zfill"
# Struct
struct = "struct"
struct_field = "struct_field"
struct_json_encode = "struct_json_encode"
struct_rename_fields = "struct_rename_fields"
struct_with_fields = "struct_with_fields"
# Temporal
dt = "datetime"
dt_add_business_days = "dt_add_business_days"
dt_base_utc_offset = "dt_base_utc_offset"
dt_cast_time_unit = "dt_cast_time_unit"
dt_century = "dt_century"
dt_combine = "dt_combine"
dt_convert_time_zone = "dt_convert_time_zone"
dt_date = "dt_date"
dt_datetime = "dt_datetime"
dt_day = "dt_day"
dt_dst_offset = "dt_dst_offset"
dt_epoch = "dt_epoch"
dt_hour = "dt_hour"
dt_is_leap_year = "dt_is_leap_year"
dt_iso_year = "dt_iso_year"
dt_microsecond = "dt_microsecond"
dt_millennium = "dt_millennium"
dt_millisecond = "dt_millisecond"
dt_minute = "dt_minute"
dt_month = "dt_month"
dt_month_end = "dt_month_end"
dt_month_start = "dt_month_start"
dt_nanosecond = "dt_nanosecond"
dt_offset_by = "dt_offset_by"
dt_ordinal_day = "dt_ordinal_day"
dt_quarter = "dt_quarter"
dt_replace_time_zone = "dt_replace_time_zone"
dt_round = "dt_round"
dt_second = "dt_second"
dt_strftime = "dt_strftime"
dt_time = "dt_time"
dt_timestamp = "dt_timestamp"
dt_to_string = "dt_to_string"
dt_total_days = "dt_total_days"
dt_total_hours = "dt_total_hours"
dt_total_microseconds = "dt_total_microseconds"
dt_total_milliseconds = "dt_total_milliseconds"
dt_total_minutes = "dt_total_minutes"
dt_total_nanoseconds = "dt_total_nanoseconds"
dt_total_seconds = "dt_total_seconds"
dt_truncate = "dt_truncate"
dt_week = "dt_week"
dt_weekday = "dt_weekday"
dt_with_time_unit = "dt_with_time_unit"
dt_year = "dt_year"
# Window
[docs]@dataclasses.dataclass
class BaseExpr(DataClass):
type: str = dataclasses.field(default=REQUIRED)
def _validate(self):
for k, v in dataclasses.asdict(self).items():
if v is REQUIRED: # pragma: no cover
raise ValueError(f"Field {k!r} is required for {self.__class__}.")
def __post_init__(self):
self._validate()
def to_polars(self) -> pl.Expr:
raise NotImplementedError()
expr_enum_to_klass_mapping: T.Dict[str, T.Type["T_EXPR"]] = dict()
[docs]def parse_expr(dct: T.Dict[str, T.Any]) -> "T_EXPR":
"""
Note: you have to import everything in the :mod:`jsonpolars.expr` module
to make this work.
"""
return expr_enum_to_klass_mapping[dct["type"]].from_dict(dct)