>>> import ibis
>>> t = ibis.table(dict(a="str"), name="t")
>>> expr = t.a.length().name("len").as_table()
>>> expected = t.select(len=t.a.length())
>>> expr.equals(expected)
True
Scalars and columns of any element type.
Value(self, arg)
Base class for a data generating expression having a known type.
Name | Description |
---|---|
as_table | Promote the expression to a Table. |
asc | Sort an expression ascending. |
between | Check if this expression is between lower and upper , inclusive. |
case | Create a SimpleCaseBuilder to chain multiple if-else statements. |
cases | Create a case expression in one shot. |
cast | Cast expression to indicated data type. |
coalesce | Return the first non-null value from args . |
collect | Aggregate this expression’s elements into an array. |
desc | Sort an expression descending. |
fillna | Replace any null values with the indicated fill value. |
group_concat | Concatenate values using the indicated separator to produce a string. |
hash | Compute an integer hash value. |
identical_to | Return whether this expression is identical to other. |
isin | Check whether this expression’s values are in values . |
isnull | Return whether this expression is NULL. |
name | Rename an expression to name . |
notin | Check whether this expression’s values are not in values . |
notnull | Return whether this expression is not NULL. |
nullif | Set values to null if they equal the values null_if_expr . |
over | Construct a window expression. |
substitute | Replace values given in values with replacement . |
to_pandas | Convert a column expression to a pandas Series or scalar object. |
try_cast | Try cast expression to indicated data type. |
type | Return the DataType of self . |
typeof | Return the string name of the datatype of self. |
as_table()
Promote the expression to a Table.
Type | Description |
---|---|
Table |
A table expression |
asc()
Sort an expression ascending.
between(lower, upper)
Check if this expression is between lower
and upper
, inclusive.
Name | Type | Description | Default |
---|---|---|---|
lower |
Value | Lower bound, inclusive | required |
upper |
Value | Upper bound, inclusive | required |
Type | Description |
---|---|
BooleanValue |
Expression indicating membership in the provided range |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch().limit(5)
>>> t.bill_length_mm.between(35, 38)
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Between(bill_length_mm, 35, 38) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ boolean │ ├─────────────────────────────────┤ │ False │ │ False │ │ False │ │ NULL │ │ True │ └─────────────────────────────────┘
case()
Create a SimpleCaseBuilder to chain multiple if-else statements.
Add new search expressions with the .when()
method. These must be comparable with this column expression. Conclude by calling .end()
.
Type | Description |
---|---|
SimpleCaseBuilder |
A case builder |
>>> import ibis
>>> ibis.options.interactive = True
>>> x = ibis.examples.penguins.fetch().head(5)["sex"]
>>> x
┏━━━━━━━━┓ ┃ sex ┃ ┡━━━━━━━━┩ │ string │ ├────────┤ │ male │ │ female │ │ female │ │ NULL │ │ female │ └────────┘
┏━━━━━━━━━━━━━━━━━━━━━━┓ ┃ SimpleCase(sex, 'U') ┃ ┡━━━━━━━━━━━━━━━━━━━━━━┩ │ string │ ├──────────────────────┤ │ M │ │ F │ │ F │ │ U │ │ F │ └──────────────────────┘
Cases not given result in the ELSE case
┏━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ SimpleCase(sex, 'OTHER') ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ string │ ├──────────────────────────┤ │ M │ │ OTHER │ │ OTHER │ │ OTHER │ │ OTHER │ └──────────────────────────┘
If you don’t supply an ELSE, then NULL is used
cases(case_result_pairs, default=None)
Create a case expression in one shot.
Name | Type | Description | Default |
---|---|---|---|
case_result_pairs |
Iterable[tuple[ir .BooleanValue , Value]] |
Conditional-result pairs | required |
default |
Value | None | Value to return if none of the case conditions are true | None |
Type | Description |
---|---|
Value | Value expression |
cast(target_type)
Cast expression to indicated data type.
Similar to pandas.Series.astype
.
Name | Type | Description | Default |
---|---|---|---|
target_type |
Any | Type to cast to. Anything accepted by ibis.dtype() |
required |
Type | Description |
---|---|
Value | Casted expression |
>>> import ibis
>>> ibis.options.interactive = True
>>> x = ibis.examples.penguins.fetch()["bill_depth_mm"]
>>> x
┏━━━━━━━━━━━━━━━┓ ┃ bill_depth_mm ┃ ┡━━━━━━━━━━━━━━━┩ │ float64 │ ├───────────────┤ │ 18.7 │ │ 17.4 │ │ 18.0 │ │ NULL │ │ 19.3 │ │ 20.6 │ │ 17.8 │ │ 19.6 │ │ 18.1 │ │ 20.2 │ │ … │ └───────────────┘
python’s built-in types can be used
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Cast(bill_depth_mm, int64) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ int64 │ ├────────────────────────────┤ │ 19 │ │ 17 │ │ 18 │ │ NULL │ │ 19 │ │ 21 │ │ 18 │ │ 20 │ │ 18 │ │ 20 │ │ … │ └────────────────────────────┘
or string names
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Cast(bill_depth_mm, uint16) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ uint16 │ ├─────────────────────────────┤ │ 19 │ │ 17 │ │ 18 │ │ NULL │ │ 19 │ │ 21 │ │ 18 │ │ 20 │ │ 18 │ │ 20 │ │ … │ └─────────────────────────────┘
If you make an illegal cast, you won’t know until the backend actually executes it. Consider .try_cast()
.
coalesce(*args)
Return the first non-null value from args
.
Name | Type | Description | Default |
---|---|---|---|
args |
Value | Arguments from which to choose the first non-null value | () |
Type | Description |
---|---|
Value | Coalesced expression |
collect(where=None)
Aggregate this expression’s elements into an array.
This function is called array_agg
, list_agg
, or list
in other systems.
Name | Type | Description | Default |
---|---|---|---|
where |
ir .BooleanValue | None |
Filter to apply before aggregation | None |
Type | Description |
---|---|
ArrayScalar |
Collected array |
Basic collect usage
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"key": list("aaabb"), "value": [1, 2, 3, 4, 5]})
>>> t
┏━━━━━━━━┳━━━━━━━┓ ┃ key ┃ value ┃ ┡━━━━━━━━╇━━━━━━━┩ │ string │ int64 │ ├────────┼───────┤ │ a │ 1 │ │ a │ 2 │ │ a │ 3 │ │ b │ 4 │ │ b │ 5 │ └────────┴───────┘
Collect elements per group
┏━━━━━━━━┳━━━━━━━━━━━━━━━━┓ ┃ key ┃ v ┃ ┡━━━━━━━━╇━━━━━━━━━━━━━━━━┩ │ string │ array<int64> │ ├────────┼────────────────┤ │ a │ [1, 2, ... +1] │ │ b │ [4, 5] │ └────────┴────────────────┘
Collect elements per group using a filter
desc()
Sort an expression descending.
fillna(fill_value)
Replace any null values with the indicated fill value.
Name | Type | Description | Default |
---|---|---|---|
fill_value |
Scalar | Value with which to replace NA values in self |
required |
Type | Description |
---|---|
Value | self filled with fill_value where it is NA |
group_concat(sep=',', where=None)
Concatenate values using the indicated separator to produce a string.
Name | Type | Description | Default |
---|---|---|---|
sep |
str | Separator will be used to join strings | ',' |
where |
ir .BooleanValue | None |
Filter expression | None |
Type | Description |
---|---|
StringScalar |
Concatenated string expression |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch().limit(5)
>>> t[["bill_length_mm", "bill_depth_mm"]]
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ bill_length_mm ┃ bill_depth_mm ┃ ┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ float64 │ float64 │ ├────────────────┼───────────────┤ │ 39.1 │ 18.7 │ │ 39.5 │ 17.4 │ │ 40.3 │ 18.0 │ │ NULL │ NULL │ │ 36.7 │ 19.3 │ └────────────────┴───────────────┘
hash()
Compute an integer hash value.
Type | Description |
---|---|
IntegerValue |
The hash value of self |
identical_to(other)
Return whether this expression is identical to other.
Corresponds to IS NOT DISTINCT FROM
in SQL.
Name | Type | Description | Default |
---|---|---|---|
other |
Value | Expression to compare to | required |
Type | Description |
---|---|
BooleanValue |
Whether this expression is not distinct from other |
isin(values)
Check whether this expression’s values are in values
.
Name | Type | Description | Default |
---|---|---|---|
values |
Value | Sequence[Value] | Values or expression to check for membership | required |
Type | Description |
---|---|
BooleanValue |
Expression indicating membership |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"a": [1, 2, 3], "b": [2, 3, 4]})
>>> t
┏━━━━━━━┳━━━━━━━┓ ┃ a ┃ b ┃ ┡━━━━━━━╇━━━━━━━┩ │ int64 │ int64 │ ├───────┼───────┤ │ 1 │ 2 │ │ 2 │ 3 │ │ 3 │ 4 │ └───────┴───────┘
Check against a literal sequence of values
┏━━━━━━━━━━━━━┓ ┃ InValues(a) ┃ ┡━━━━━━━━━━━━━┩ │ boolean │ ├─────────────┤ │ True │ │ True │ │ False │ └─────────────┘
Check against a derived expression
┏━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ InColumn(a, Add(b, 1)) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━┩ │ boolean │ ├────────────────────────┤ │ False │ │ False │ │ True │ └────────────────────────┘
Check against a column from a different table
isnull()
Return whether this expression is NULL.
name(name)
Rename an expression to name
.
Name | Type | Description | Default |
---|---|---|---|
name |
The new name of the expression | required |
Type | Description |
---|---|
Value | self with name name |
notin(values)
Check whether this expression’s values are not in values
.
Opposite of Value.isin()
.
Name | Type | Description | Default |
---|---|---|---|
values |
Value | Sequence[Value] | Values or expression to check for lack of membership | required |
Type | Description |
---|---|
BooleanValue |
Whether self ’s values are not contained in values |
notnull()
Return whether this expression is not NULL.
nullif(null_if_expr)
Set values to null if they equal the values null_if_expr
.
Commonly used to avoid divide-by-zero problems by replacing zero with NULL
in the divisor.
Equivalent to (self == null_if_expr).ifelse(ibis.null(), self)
.
Name | Type | Description | Default |
---|---|---|---|
null_if_expr |
Value | Expression indicating what values should be NULL | required |
Type | Description |
---|---|
Value | Value expression |
over(window=None, *, rows=None, range=None, group_by=None, order_by=None)
Construct a window expression.
Name | Type | Description | Default |
---|---|---|---|
window |
Window specification | None |
|
rows |
Whether to use the ROWS window clause |
None |
|
range |
Whether to use the RANGE window clause |
None |
|
group_by |
Grouping key | None |
|
order_by |
Ordering key | None |
Type | Description |
---|---|
Value | A window function expression |
substitute(value, replacement=None, else_=None)
Replace values given in values
with replacement
.
This is similar to the pandas replace
method.
Name | Type | Description | Default |
---|---|---|---|
value |
Value | dict | Expression or dict. | required |
replacement |
Value | None | If an expression is passed to value, this must be passed. | None |
else_ |
Value | None | If an original value does not match value , then else_ is used. The default of None means leave the original value unchanged. |
None |
Type | Description |
---|---|
Value | Replaced values |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> t.island.value_counts().order_by("island")
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ ┃ island ┃ island_count ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━┩ │ string │ int64 │ ├───────────┼──────────────┤ │ Biscoe │ 168 │ │ Dream │ 124 │ │ Torgersen │ 52 │ └───────────┴──────────────┘
>>> t.island.substitute({"Torgersen": "torg", "Biscoe": "bisc"}).name(
... "island"
... ).value_counts().order_by("island")
┏━━━━━━━━┳━━━━━━━━━━━━━━┓ ┃ island ┃ island_count ┃ ┡━━━━━━━━╇━━━━━━━━━━━━━━┩ │ string │ int64 │ ├────────┼──────────────┤ │ Dream │ 124 │ │ bisc │ 168 │ │ torg │ 52 │ └────────┴──────────────┘
to_pandas(**kwargs)
Convert a column expression to a pandas Series or scalar object.
Name | Type | Description | Default |
---|---|---|---|
kwargs |
Same as keyword arguments to execute |
{} |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch().limit(5)
>>> t.to_pandas()
species | island | bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | sex | year | |
---|---|---|---|---|---|---|---|---|
0 | Adelie | Torgersen | 39.1 | 18.7 | 181.0 | 3750.0 | male | 2007 |
1 | Adelie | Torgersen | 39.5 | 17.4 | 186.0 | 3800.0 | female | 2007 |
2 | Adelie | Torgersen | 40.3 | 18.0 | 195.0 | 3250.0 | female | 2007 |
3 | Adelie | Torgersen | NaN | NaN | NaN | NaN | None | 2007 |
4 | Adelie | Torgersen | 36.7 | 19.3 | 193.0 | 3450.0 | female | 2007 |
try_cast(target_type)
Try cast expression to indicated data type.
If the cast fails for a row, the value is returned as null or NaN depending on target_type and backend behavior.
Name | Type | Description | Default |
---|---|---|---|
target_type |
Any | Type to try cast to. Anything accepted by ibis.dtype() |
required |
Type | Description |
---|---|
Value | Casted expression |
>>> import ibis
>>> from ibis import _
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"numbers": [1, 2, 3, 4], "strings": ["1.0", "2", "hello", "world"]})
>>> t
┏━━━━━━━━━┳━━━━━━━━━┓ ┃ numbers ┃ strings ┃ ┡━━━━━━━━━╇━━━━━━━━━┩ │ int64 │ string │ ├─────────┼─────────┤ │ 1 │ 1.0 │ │ 2 │ 2 │ │ 3 │ hello │ │ 4 │ world │ └─────────┴─────────┘
>>> t = t.mutate(numbers_to_strings=_.numbers.try_cast("string"))
>>> t = t.mutate(strings_to_numbers=_.strings.try_cast("int"))
>>> t
┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ ┃ numbers ┃ strings ┃ numbers_to_strings ┃ strings_to_numbers ┃ ┡━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩ │ int64 │ string │ string │ int64 │ ├─────────┼─────────┼────────────────────┼────────────────────┤ │ 1 │ 1.0 │ 1 │ 1 │ │ 2 │ 2 │ 2 │ 2 │ │ 3 │ hello │ 3 │ NULL │ │ 4 │ world │ 4 │ NULL │ └─────────┴─────────┴────────────────────┴────────────────────┘
type()
Return the DataType of self
.
typeof()
Return the string name of the datatype of self.
The values of the returned strings are necessarily backend dependent. e.g. duckdb may say “DOUBLE”, while sqlite may say “real”.
Type | Description |
---|---|
StringValue |
A string indicating the type of the value |
>>> import ibis
>>> ibis.options.interactive = True
>>> vals = ibis.examples.penguins.fetch().head(5).bill_length_mm
>>> vals
┏━━━━━━━━━━━━━━━━┓ ┃ bill_length_mm ┃ ┡━━━━━━━━━━━━━━━━┩ │ float64 │ ├────────────────┤ │ 39.1 │ │ 39.5 │ │ 40.3 │ │ NULL │ │ 36.7 │ └────────────────┘
┏━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ TypeOf(bill_length_mm) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━┩ │ string │ ├────────────────────────┤ │ DOUBLE │ │ DOUBLE │ │ DOUBLE │ │ DOUBLE │ │ DOUBLE │ └────────────────────────┘
Different backends have different names for their native types
Column(self, arg)
Name | Description |
---|---|
approx_median | Return an approximate of the median of self . |
approx_nunique | Return the approximate number of distinct elements in self . |
arbitrary | Select an arbitrary value in a column. |
argmax | Return the value of self that maximizes key . |
argmin | Return the value of self that minimizes key . |
count | Compute the number of rows in an expression. |
cume_dist | Return the cumulative distribution over a window. |
cummax | Return the cumulative max over a window. |
cummin | Return the cumulative min over a window. |
dense_rank | Position of first element within each group of equal values. |
first | Return the first value of a column. |
lag | Return the row located at offset rows before the current row. |
last | Return the last value of a column. |
lead | Return the row located at offset rows after the current row. |
max | Return the maximum of a column. |
median | Return the median of the column. |
min | Return the minimum of a column. |
mode | Return the mode of a column. |
nth | Return the n th value (0-indexed) over a window. |
ntile | Return the integer number of a partitioning of the column values. |
nunique | Compute the number of distinct rows in an expression. |
percent_rank | Return the relative rank of the values in the column. |
quantile | Return value at the given quantile. |
rank | Compute position of first element within each equal-value group in sorted order. |
topk | Return a “top k” expression. |
value_counts | Compute a frequency table. |
approx_median(where=None)
Return an approximate of the median of self
.
Whether the result is an approximation depends on the backend.
Name | Type | Description | Default |
---|---|---|---|
where |
ir .BooleanValue | None |
Filter in values when where is True |
None |
Type | Description |
---|---|
Scalar | An approximation of the median of self |
approx_nunique(where=None)
Return the approximate number of distinct elements in self
.
Whether the result is an approximation depends on the backend.
Name | Type | Description | Default |
---|---|---|---|
where |
ir .BooleanValue | None |
Filter in values when where is True |
None |
Type | Description |
---|---|
Scalar | An approximate count of the distinct elements of self |
arbitrary(where=None, how='first')
Select an arbitrary value in a column.
Name | Type | Description | Default |
---|---|---|---|
where |
ir .BooleanValue | None |
A filter expression | None |
how |
Literal[‘first’, ‘last’, ‘heavy’] | The method to use for selecting the element. * "first" : Select the first non-NULL element * "last" : Select the last non-NULL element * "heavy" : Select a frequently occurring value using the heavy hitters algorithm. "heavy" is only supported by Clickhouse backend. |
'first' |
Type | Description |
---|---|
Scalar | An expression |
argmax(key, where=None)
Return the value of self
that maximizes key
.
Name | Type | Description | Default |
---|---|---|---|
key |
ir .Value |
Key to use for max computation. |
required |
where |
ir .BooleanValue | None |
Keep values when where is True |
None |
Type | Description |
---|---|
Scalar | The value of self that maximizes key |
argmin(key, where=None)
Return the value of self
that minimizes key
.
Name | Type | Description | Default |
---|---|---|---|
key |
ir .Value |
Key to use for min computation. |
required |
where |
ir .BooleanValue | None |
Keep values when where is True |
None |
Type | Description |
---|---|
Scalar | The value of self that minimizes key |
count(where=None)
Compute the number of rows in an expression.
Name | Type | Description | Default |
---|---|---|---|
where |
ir .BooleanValue | None |
Filter expression | None |
Type | Description |
---|---|
IntegerScalar |
Number of elements in an expression |
cume_dist()
Return the cumulative distribution over a window.
cummax(where=None, group_by=None, order_by=None)
Return the cumulative max over a window.
cummin(where=None, group_by=None, order_by=None)
Return the cumulative min over a window.
dense_rank()
Position of first element within each group of equal values.
Values are returned in sorted order and duplicate values are ignored.
Equivalent to SQL’s DENSE_RANK()
.
Type | Description |
---|---|
IntegerColumn |
The rank |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"values": [1, 2, 1, 2, 3, 2]})
>>> t.mutate(rank=t.values.dense_rank())
┏━━━━━━━━┳━━━━━━━┓ ┃ values ┃ rank ┃ ┡━━━━━━━━╇━━━━━━━┩ │ int64 │ int64 │ ├────────┼───────┤ │ 1 │ 0 │ │ 1 │ 0 │ │ 2 │ 1 │ │ 2 │ 1 │ │ 2 │ 1 │ │ 3 │ 2 │ └────────┴───────┘
first(where=None)
Return the first value of a column.
lag(offset=None, default=None)
Return the row located at offset
rows before the current row.
Name | Type | Description | Default |
---|---|---|---|
offset |
int | ir .IntegerValue | None |
Index of row to select | None |
default |
Value | None | Value used if no row exists at offset |
None |
last(where=None)
Return the last value of a column.
lead(offset=None, default=None)
Return the row located at offset
rows after the current row.
Name | Type | Description | Default |
---|---|---|---|
offset |
int | ir .IntegerValue | None |
Index of row to select | None |
default |
Value | None | Value used if no row exists at offset |
None |
max(where=None)
Return the maximum of a column.
Name | Type | Description | Default |
---|---|---|---|
where |
ir .BooleanValue | None |
Filter in values when where is True |
None |
Type | Description |
---|---|
Scalar | The maximum value in self |
median(where=None)
Return the median of the column.
Name | Type | Description | Default |
---|---|---|---|
where |
ir .BooleanValue | None |
Optional boolean expression. If given, only the values where where evaluates to true will be considered for the median. |
None |
Type | Description |
---|---|
Scalar | Median of the column |
Compute the median of bill_depth_mm
>>> t.group_by(t.species).agg(median_bill_depth=t.bill_depth_mm.median()).order_by(
... ibis.desc("median_bill_depth")
... )
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓ ┃ species ┃ median_bill_depth ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩ │ string │ float64 │ ├───────────┼───────────────────┤ │ Chinstrap │ 18.45 │ │ Adelie │ 18.40 │ │ Gentoo │ 15.00 │ └───────────┴───────────────────┘
In addition to numeric types, any orderable non-numeric types such as strings and dates work with median
.
>>> t.group_by(t.island).agg(median_species=t.species.median()).order_by(
... ibis.desc("median_species")
... )
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓ ┃ island ┃ median_species ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩ │ string │ string │ ├───────────┼────────────────┤ │ Biscoe │ Gentoo │ │ Dream │ Chinstrap │ │ Torgersen │ Adelie │ └───────────┴────────────────┘
min(where=None)
Return the minimum of a column.
Name | Type | Description | Default |
---|---|---|---|
where |
ir .BooleanValue | None |
Filter in values when where is True |
None |
Type | Description |
---|---|
Scalar | The minimum value in self |
mode(where=None)
Return the mode of a column.
Name | Type | Description | Default |
---|---|---|---|
where |
ir .BooleanValue | None |
Filter in values when where is True |
None |
Type | Description |
---|---|
Scalar | The mode of self |
nth(n)
Return the n
th value (0-indexed) over a window.
.nth(0)
is equivalent to .first()
. Negative will result in NULL
. If the value of n
is greater than the number of rows in the window, NULL
will be returned.
Name | Type | Description | Default |
---|---|---|---|
n |
int | ir .IntegerValue |
Desired rank value | required |
Type | Description |
---|---|
Column | The nth value over a window |
ntile(buckets)
Return the integer number of a partitioning of the column values.
Name | Type | Description | Default |
---|---|---|---|
buckets |
int | ir .IntegerValue |
Number of buckets to partition into | required |
nunique(where=None)
Compute the number of distinct rows in an expression.
Name | Type | Description | Default |
---|---|---|---|
where |
ir .BooleanValue | None |
Filter expression | None |
Type | Description |
---|---|
IntegerScalar |
Number of distinct elements in an expression |
percent_rank()
Return the relative rank of the values in the column.
quantile(quantile, where=None)
Return value at the given quantile.
The output of this method is a continuous quantile if the input is numeric, otherwise the output is a discrete quantile.
Name | Type | Description | Default |
---|---|---|---|
quantile |
float | ir .NumericValue | Sequence[ir .NumericValue | float] |
0 <= quantile <= 1 , or an array of such values indicating the quantile or quantiles to compute |
required |
where |
ir .BooleanValue | None |
Boolean filter for input values | None |
Type | Description |
---|---|
Scalar | Quantile of the input |
Compute the 99th percentile of bill_depth
>>> t.group_by(t.species).agg(p99_bill_depth=t.bill_depth_mm.quantile(0.99)).order_by(
... ibis.desc("p99_bill_depth")
... )
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓ ┃ species ┃ p99_bill_depth ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩ │ string │ float64 │ ├───────────┼────────────────┤ │ Adelie │ 21.200 │ │ Chinstrap │ 20.733 │ │ Gentoo │ 17.256 │ └───────────┴────────────────┘
In addition to numeric types, any orderable non-numeric types such as strings and dates work with quantile
.
Let’s compute the 99th percentile of the species
column
>>> t.group_by(t.island).agg(p99_species=t.species.quantile(0.99)).order_by(
... ibis.desc("p99_species")
... )
┏━━━━━━━━━━━┳━━━━━━━━━━━━━┓ ┃ island ┃ p99_species ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━┩ │ string │ string │ ├───────────┼─────────────┤ │ Biscoe │ Gentoo │ │ Dream │ Chinstrap │ │ Torgersen │ Adelie │ └───────────┴─────────────┘
rank()
Compute position of first element within each equal-value group in sorted order.
Equivalent to SQL’s RANK()
window function.
Type | Description |
---|---|
Int64Column |
The min rank |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"values": [1, 2, 1, 2, 3, 2]})
>>> t.mutate(rank=t.values.rank())
┏━━━━━━━━┳━━━━━━━┓ ┃ values ┃ rank ┃ ┡━━━━━━━━╇━━━━━━━┩ │ int64 │ int64 │ ├────────┼───────┤ │ 1 │ 0 │ │ 1 │ 0 │ │ 2 │ 2 │ │ 2 │ 2 │ │ 2 │ 2 │ │ 3 │ 5 │ └────────┴───────┘
topk(k, by=None)
Return a “top k” expression.
Name | Type | Description | Default |
---|---|---|---|
k |
int | Return this number of rows | required |
by |
ir .Value | None |
An expression. Defaults to count . |
None |
Type | Description |
---|---|
TableExpr |
A top-k expression |
value_counts()
Compute a frequency table.
Type | Description |
---|---|
Table |
Frequency table expression |
Scalar(self, arg)
Name | Description |
---|---|
as_table | Promote the scalar expression to a table. |
as_table()
Promote the scalar expression to a table.
Type | Description |
---|---|
Table |
A table expression |
Promote an aggregation to a table
>>> import ibis
>>> import ibis.expr.types as ir
>>> t = ibis.table(dict(a="str"), name="t")
>>> expr = t.a.length().sum().name("len").as_table()
>>> isinstance(expr, ir.Table)
True
Promote a literal value to a table
ibis.literal(value, type=None)
Create a scalar expression from a Python value.
Ibis supports literal construction of arrays using the following functions:
Constructing these types using literal
will be deprecated in a future release.
Name | Type | Description | Default |
---|---|---|---|
value |
Any | A Python value | required |
type |
dt .DataType | str | None |
An instance of DataType or a string indicating the ibis type of value . This parameter can be used in cases where ibis’s type inference isn’t sufficient for discovering the type of value . |
None |
Type | Description |
---|---|
Scalar | An expression representing a literal value |
Construct an integer literal
Construct a float64
literal from an int
Ibis checks for invalid types
ibis.param(type)
Create a deferred parameter of a given type.
Name | Type | Description | Default |
---|---|---|---|
type |
dt .DataType |
The type of the unbound parameter, e.g., double, int64, date, etc. | required |
Type | Description |
---|---|
Scalar |
A scalar expression backend by a parameter |
>>> from datetime import date
>>> import ibis
>>> start = ibis.param("date")
>>> t = ibis.memtable(
... {
... "date_col": [date(2013, 1, 1), date(2013, 1, 2), date(2013, 1, 3)],
... "value": [1.0, 2.0, 3.0],
... },
... )
>>> expr = t.filter(t.date_col >= start).value.sum()
>>> expr.execute(params={start: date(2013, 1, 1)})
6.0
ibis.expr.api.NA
The NULL scalar.
ibis.null()
Create a NULL/NA scalar.
ibis.coalesce(*args)
Return the first non-null value from args
.
Name | Type | Description | Default |
---|---|---|---|
args |
Any | Arguments from which to choose the first non-null value | () |
Type | Description |
---|---|
Value |
Coalesced expression |
ibis.least(*args)
Compute the smallest value among the supplied arguments.
Name | Type | Description | Default |
---|---|---|---|
args |
Any | Arguments to choose from | () |
Type | Description |
---|---|
Value |
Minimum of the passed arguments |
ibis.greatest(*args)
Compute the largest value among the supplied arguments.
Name | Type | Description | Default |
---|---|---|---|
args |
Any | Arguments to choose from | () |
Type | Description |
---|---|
Value |
Maximum of the passed arguments |
ibis.asc(expr)
Create a ascending sort key from asc
or column name.
Name | Type | Description | Default |
---|---|---|---|
expr |
ir .Column | str |
The expression or column name to use for sorting | required |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> t[["species", "year"]].order_by(ibis.asc("year")).head()
┏━━━━━━━━━┳━━━━━━━┓ ┃ species ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━┩ │ string │ int64 │ ├─────────┼───────┤ │ Adelie │ 2007 │ │ Adelie │ 2007 │ │ Adelie │ 2007 │ │ Adelie │ 2007 │ │ Adelie │ 2007 │ └─────────┴───────┘
Type | Description |
---|---|
ir .ValueExpr |
An expression |
ibis.desc(expr)
Create a descending sort key from expr
or column name.
Name | Type | Description | Default |
---|---|---|---|
expr |
ir .Column | str |
The expression or column name to use for sorting | required |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> t[["species", "year"]].order_by(ibis.desc("year")).head()
┏━━━━━━━━━┳━━━━━━━┓ ┃ species ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━┩ │ string │ int64 │ ├─────────┼───────┤ │ Adelie │ 2009 │ │ Adelie │ 2009 │ │ Adelie │ 2009 │ │ Adelie │ 2009 │ │ Adelie │ 2009 │ └─────────┴───────┘
Type | Description |
---|---|
ir .ValueExpr |
An expression |
ibis.ifelse(condition, true_expr, false_expr)
Construct a ternary conditional expression.
Name | Type | Description | Default |
---|---|---|---|
condition |
Any | A boolean expression | required |
true_expr |
Any | Expression to return if condition evaluates to True |
required |
false_expr |
Any | Expression to return if condition evaluates to False or NULL |
required |
Type | Description |
---|---|
ir .Value |
The value of true_expr if condition is True else false_expr |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"condition": [True, False, True, None]})
>>> ibis.ifelse(t.condition, "yes", "no")
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ IfElse(condition, 'yes', 'no') ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ string │ ├────────────────────────────────┤ │ yes │ │ no │ │ yes │ │ no │ └────────────────────────────────┘
ibis.case()
Begin constructing a case expression.
Use the .when
method on the resulting object followed by .end
to create a complete case expression.
Type | Description |
---|---|
SearchedCaseBuilder |
A builder object to use for constructing a case expression. |
>>> import ibis
>>> from ibis import _
>>> ibis.options.interactive = True
>>> t = ibis.memtable(
... {
... "left": [1, 2, 3, 4],
... "symbol": ["+", "-", "*", "/"],
... "right": [5, 6, 7, 8],
... }
... )
>>> t.mutate(
... result=(
... ibis.case()
... .when(_.symbol == "+", _.left + _.right)
... .when(_.symbol == "-", _.left - _.right)
... .when(_.symbol == "*", _.left * _.right)
... .when(_.symbol == "/", _.left / _.right)
... .end()
... )
... )
┏━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━━┓ ┃ left ┃ symbol ┃ right ┃ result ┃ ┡━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━━┩ │ int64 │ string │ int64 │ float64 │ ├───────┼────────┼───────┼─────────┤ │ 1 │ + │ 5 │ 6.0 │ │ 2 │ - │ 6 │ -4.0 │ │ 3 │ * │ 7 │ 21.0 │ │ 4 │ / │ 8 │ 0.5 │ └───────┴────────┴───────┴─────────┘
ibis.to_sql(expr, dialect=None, **kwargs)
Return the formatted SQL string for an expression.
Name | Type | Description | Default |
---|---|---|---|
expr |
ir .Expr |
Ibis expression. | required |
dialect |
str | None | SQL dialect to use for compilation. | None |
kwargs |
Scalar parameters | {} |
Type | Description |
---|---|
str | Formatted SQL string |