>>> import ibis
>>> t = ibis.table(dict(a="str"), name="t")
>>> expr = t.a.length().name("len").as_table()
>>> expected = t.select(len=t.a.length())
>>> expr.equals(expected)True
Scalars and columns of any element type.
Value(self, arg)
Base class for a data generating expression having a known type.
| Name | Description |
|---|---|
| as_table | Promote the expression to a Table. |
| asc | Sort an expression ascending. |
| between | Check if this expression is between lower and upper, inclusive. |
| case | Create a SimpleCaseBuilder to chain multiple if-else statements. |
| cases | Create a case expression in one shot. |
| cast | Cast expression to indicated data type. |
| coalesce | Return the first non-null value from args. |
| collect | Aggregate this expression’s elements into an array. |
| desc | Sort an expression descending. |
| fillna | Replace any null values with the indicated fill value. |
| group_concat | Concatenate values using the indicated separator to produce a string. |
| hash | Compute an integer hash value. |
| identical_to | Return whether this expression is identical to other. |
| isin | Check whether this expression’s values are in values. |
| isnull | Return whether this expression is NULL. |
| name | Rename an expression to name. |
| notin | Check whether this expression’s values are not in values. |
| notnull | Return whether this expression is not NULL. |
| nullif | Set values to null if they equal the values null_if_expr. |
| over | Construct a window expression. |
| substitute | Replace values given in values with replacement. |
| to_pandas | Convert a column expression to a pandas Series or scalar object. |
| try_cast | Try cast expression to indicated data type. |
| type | Return the DataType of self. |
| typeof | Return the string name of the datatype of self. |
as_table()
Promote the expression to a Table.
| Type | Description |
|---|---|
Table |
A table expression |
asc()
Sort an expression ascending.
between(lower, upper)
Check if this expression is between lower and upper, inclusive.
| Name | Type | Description | Default |
|---|---|---|---|
lower |
Value | Lower bound, inclusive | required |
upper |
Value | Upper bound, inclusive | required |
| Type | Description |
|---|---|
BooleanValue |
Expression indicating membership in the provided range |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch().limit(5)
>>> t.bill_length_mm.between(35, 38)┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Between(bill_length_mm, 35, 38) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ boolean │ ├─────────────────────────────────┤ │ False │ │ False │ │ False │ │ NULL │ │ True │ └─────────────────────────────────┘
case()
Create a SimpleCaseBuilder to chain multiple if-else statements.
Add new search expressions with the .when() method. These must be comparable with this column expression. Conclude by calling .end().
| Type | Description |
|---|---|
SimpleCaseBuilder |
A case builder |
>>> import ibis
>>> ibis.options.interactive = True
>>> x = ibis.examples.penguins.fetch().head(5)["sex"]
>>> x┏━━━━━━━━┓ ┃ sex ┃ ┡━━━━━━━━┩ │ string │ ├────────┤ │ male │ │ female │ │ female │ │ NULL │ │ female │ └────────┘
┏━━━━━━━━━━━━━━━━━━━━━━┓ ┃ SimpleCase(sex, 'U') ┃ ┡━━━━━━━━━━━━━━━━━━━━━━┩ │ string │ ├──────────────────────┤ │ M │ │ F │ │ F │ │ U │ │ F │ └──────────────────────┘
Cases not given result in the ELSE case
┏━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ SimpleCase(sex, 'OTHER') ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ string │ ├──────────────────────────┤ │ M │ │ OTHER │ │ OTHER │ │ OTHER │ │ OTHER │ └──────────────────────────┘
If you don’t supply an ELSE, then NULL is used
cases(case_result_pairs, default=None)
Create a case expression in one shot.
| Name | Type | Description | Default |
|---|---|---|---|
case_result_pairs |
Iterable[tuple[ir.BooleanValue, Value]] |
Conditional-result pairs | required |
default |
Value | None | Value to return if none of the case conditions are true | None |
| Type | Description |
|---|---|
| Value | Value expression |
cast(target_type)
Cast expression to indicated data type.
Similar to pandas.Series.astype.
| Name | Type | Description | Default |
|---|---|---|---|
target_type |
Any | Type to cast to. Anything accepted by ibis.dtype() |
required |
| Type | Description |
|---|---|
| Value | Casted expression |
>>> import ibis
>>> ibis.options.interactive = True
>>> x = ibis.examples.penguins.fetch()["bill_depth_mm"]
>>> x┏━━━━━━━━━━━━━━━┓ ┃ bill_depth_mm ┃ ┡━━━━━━━━━━━━━━━┩ │ float64 │ ├───────────────┤ │ 18.7 │ │ 17.4 │ │ 18.0 │ │ NULL │ │ 19.3 │ │ 20.6 │ │ 17.8 │ │ 19.6 │ │ 18.1 │ │ 20.2 │ │ … │ └───────────────┘
python’s built-in types can be used
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Cast(bill_depth_mm, int64) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ int64 │ ├────────────────────────────┤ │ 19 │ │ 17 │ │ 18 │ │ NULL │ │ 19 │ │ 21 │ │ 18 │ │ 20 │ │ 18 │ │ 20 │ │ … │ └────────────────────────────┘
or string names
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Cast(bill_depth_mm, uint16) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ uint16 │ ├─────────────────────────────┤ │ 19 │ │ 17 │ │ 18 │ │ NULL │ │ 19 │ │ 21 │ │ 18 │ │ 20 │ │ 18 │ │ 20 │ │ … │ └─────────────────────────────┘
If you make an illegal cast, you won’t know until the backend actually executes it. Consider .try_cast().
coalesce(*args)
Return the first non-null value from args.
| Name | Type | Description | Default |
|---|---|---|---|
args |
Value | Arguments from which to choose the first non-null value | () |
| Type | Description |
|---|---|
| Value | Coalesced expression |
collect(where=None)
Aggregate this expression’s elements into an array.
This function is called array_agg, list_agg, or list in other systems.
| Name | Type | Description | Default |
|---|---|---|---|
where |
ir.BooleanValue | None |
Filter to apply before aggregation | None |
| Type | Description |
|---|---|
ArrayScalar |
Collected array |
Basic collect usage
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"key": list("aaabb"), "value": [1, 2, 3, 4, 5]})
>>> t┏━━━━━━━━┳━━━━━━━┓ ┃ key ┃ value ┃ ┡━━━━━━━━╇━━━━━━━┩ │ string │ int64 │ ├────────┼───────┤ │ a │ 1 │ │ a │ 2 │ │ a │ 3 │ │ b │ 4 │ │ b │ 5 │ └────────┴───────┘
Collect elements per group
┏━━━━━━━━┳━━━━━━━━━━━━━━━━┓ ┃ key ┃ v ┃ ┡━━━━━━━━╇━━━━━━━━━━━━━━━━┩ │ string │ array<int64> │ ├────────┼────────────────┤ │ a │ [1, 2, ... +1] │ │ b │ [4, 5] │ └────────┴────────────────┘
Collect elements per group using a filter
desc()
Sort an expression descending.
fillna(fill_value)
Replace any null values with the indicated fill value.
| Name | Type | Description | Default |
|---|---|---|---|
fill_value |
Scalar | Value with which to replace NA values in self |
required |
| Type | Description |
|---|---|
| Value | self filled with fill_value where it is NA |
group_concat(sep=',', where=None)
Concatenate values using the indicated separator to produce a string.
| Name | Type | Description | Default |
|---|---|---|---|
sep |
str | Separator will be used to join strings | ',' |
where |
ir.BooleanValue | None |
Filter expression | None |
| Type | Description |
|---|---|
StringScalar |
Concatenated string expression |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch().limit(5)
>>> t[["bill_length_mm", "bill_depth_mm"]]┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ bill_length_mm ┃ bill_depth_mm ┃ ┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ float64 │ float64 │ ├────────────────┼───────────────┤ │ 39.1 │ 18.7 │ │ 39.5 │ 17.4 │ │ 40.3 │ 18.0 │ │ NULL │ NULL │ │ 36.7 │ 19.3 │ └────────────────┴───────────────┘
hash()
Compute an integer hash value.
| Type | Description |
|---|---|
IntegerValue |
The hash value of self |
identical_to(other)
Return whether this expression is identical to other.
Corresponds to IS NOT DISTINCT FROM in SQL.
| Name | Type | Description | Default |
|---|---|---|---|
other |
Value | Expression to compare to | required |
| Type | Description |
|---|---|
BooleanValue |
Whether this expression is not distinct from other |
isin(values)
Check whether this expression’s values are in values.
| Name | Type | Description | Default |
|---|---|---|---|
values |
Value | Sequence[Value] | Values or expression to check for membership | required |
| Type | Description |
|---|---|
BooleanValue |
Expression indicating membership |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"a": [1, 2, 3], "b": [2, 3, 4]})
>>> t┏━━━━━━━┳━━━━━━━┓ ┃ a ┃ b ┃ ┡━━━━━━━╇━━━━━━━┩ │ int64 │ int64 │ ├───────┼───────┤ │ 1 │ 2 │ │ 2 │ 3 │ │ 3 │ 4 │ └───────┴───────┘
Check against a literal sequence of values
┏━━━━━━━━━━━━━┓ ┃ InValues(a) ┃ ┡━━━━━━━━━━━━━┩ │ boolean │ ├─────────────┤ │ True │ │ True │ │ False │ └─────────────┘
Check against a derived expression
┏━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ InColumn(a, Add(b, 1)) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━┩ │ boolean │ ├────────────────────────┤ │ False │ │ False │ │ True │ └────────────────────────┘
Check against a column from a different table
isnull()
Return whether this expression is NULL.
name(name)
Rename an expression to name.
| Name | Type | Description | Default |
|---|---|---|---|
name |
The new name of the expression | required |
| Type | Description |
|---|---|
| Value | self with name name |
notin(values)
Check whether this expression’s values are not in values.
Opposite of Value.isin().
| Name | Type | Description | Default |
|---|---|---|---|
values |
Value | Sequence[Value] | Values or expression to check for lack of membership | required |
| Type | Description |
|---|---|
BooleanValue |
Whether self’s values are not contained in values |
notnull()
Return whether this expression is not NULL.
nullif(null_if_expr)
Set values to null if they equal the values null_if_expr.
Commonly used to avoid divide-by-zero problems by replacing zero with NULL in the divisor.
Equivalent to (self == null_if_expr).ifelse(ibis.null(), self).
| Name | Type | Description | Default |
|---|---|---|---|
null_if_expr |
Value | Expression indicating what values should be NULL | required |
| Type | Description |
|---|---|
| Value | Value expression |
over(window=None, *, rows=None, range=None, group_by=None, order_by=None)
Construct a window expression.
| Name | Type | Description | Default |
|---|---|---|---|
window |
Window specification | None |
|
rows |
Whether to use the ROWS window clause |
None |
|
range |
Whether to use the RANGE window clause |
None |
|
group_by |
Grouping key | None |
|
order_by |
Ordering key | None |
| Type | Description |
|---|---|
| Value | A window function expression |
substitute(value, replacement=None, else_=None)
Replace values given in values with replacement.
This is similar to the pandas replace method.
| Name | Type | Description | Default |
|---|---|---|---|
value |
Value | dict | Expression or dict. | required |
replacement |
Value | None | If an expression is passed to value, this must be passed. | None |
else_ |
Value | None | If an original value does not match value, then else_ is used. The default of None means leave the original value unchanged. |
None |
| Type | Description |
|---|---|
| Value | Replaced values |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> t.island.value_counts().order_by("island")┏━━━━━━━━━━━┳━━━━━━━━━━━━━━┓ ┃ island ┃ island_count ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━┩ │ string │ int64 │ ├───────────┼──────────────┤ │ Biscoe │ 168 │ │ Dream │ 124 │ │ Torgersen │ 52 │ └───────────┴──────────────┘
>>> t.island.substitute({"Torgersen": "torg", "Biscoe": "bisc"}).name(
... "island"
... ).value_counts().order_by("island")┏━━━━━━━━┳━━━━━━━━━━━━━━┓ ┃ island ┃ island_count ┃ ┡━━━━━━━━╇━━━━━━━━━━━━━━┩ │ string │ int64 │ ├────────┼──────────────┤ │ Dream │ 124 │ │ bisc │ 168 │ │ torg │ 52 │ └────────┴──────────────┘
to_pandas(**kwargs)
Convert a column expression to a pandas Series or scalar object.
| Name | Type | Description | Default |
|---|---|---|---|
kwargs |
Same as keyword arguments to execute |
{} |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch().limit(5)
>>> t.to_pandas()| species | island | bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | sex | year | |
|---|---|---|---|---|---|---|---|---|
| 0 | Adelie | Torgersen | 39.1 | 18.7 | 181.0 | 3750.0 | male | 2007 |
| 1 | Adelie | Torgersen | 39.5 | 17.4 | 186.0 | 3800.0 | female | 2007 |
| 2 | Adelie | Torgersen | 40.3 | 18.0 | 195.0 | 3250.0 | female | 2007 |
| 3 | Adelie | Torgersen | NaN | NaN | NaN | NaN | None | 2007 |
| 4 | Adelie | Torgersen | 36.7 | 19.3 | 193.0 | 3450.0 | female | 2007 |
try_cast(target_type)
Try cast expression to indicated data type.
If the cast fails for a row, the value is returned as null or NaN depending on target_type and backend behavior.
| Name | Type | Description | Default |
|---|---|---|---|
target_type |
Any | Type to try cast to. Anything accepted by ibis.dtype() |
required |
| Type | Description |
|---|---|
| Value | Casted expression |
>>> import ibis
>>> from ibis import _
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"numbers": [1, 2, 3, 4], "strings": ["1.0", "2", "hello", "world"]})
>>> t┏━━━━━━━━━┳━━━━━━━━━┓ ┃ numbers ┃ strings ┃ ┡━━━━━━━━━╇━━━━━━━━━┩ │ int64 │ string │ ├─────────┼─────────┤ │ 1 │ 1.0 │ │ 2 │ 2 │ │ 3 │ hello │ │ 4 │ world │ └─────────┴─────────┘
>>> t = t.mutate(numbers_to_strings=_.numbers.try_cast("string"))
>>> t = t.mutate(strings_to_numbers=_.strings.try_cast("int"))
>>> t┏━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓ ┃ numbers ┃ strings ┃ numbers_to_strings ┃ strings_to_numbers ┃ ┡━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩ │ int64 │ string │ string │ int64 │ ├─────────┼─────────┼────────────────────┼────────────────────┤ │ 1 │ 1.0 │ 1 │ 1 │ │ 2 │ 2 │ 2 │ 2 │ │ 3 │ hello │ 3 │ NULL │ │ 4 │ world │ 4 │ NULL │ └─────────┴─────────┴────────────────────┴────────────────────┘
type()
Return the DataType of self.
typeof()
Return the string name of the datatype of self.
The values of the returned strings are necessarily backend dependent. e.g. duckdb may say “DOUBLE”, while sqlite may say “real”.
| Type | Description |
|---|---|
StringValue |
A string indicating the type of the value |
>>> import ibis
>>> ibis.options.interactive = True
>>> vals = ibis.examples.penguins.fetch().head(5).bill_length_mm
>>> vals┏━━━━━━━━━━━━━━━━┓ ┃ bill_length_mm ┃ ┡━━━━━━━━━━━━━━━━┩ │ float64 │ ├────────────────┤ │ 39.1 │ │ 39.5 │ │ 40.3 │ │ NULL │ │ 36.7 │ └────────────────┘
┏━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ TypeOf(bill_length_mm) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━┩ │ string │ ├────────────────────────┤ │ DOUBLE │ │ DOUBLE │ │ DOUBLE │ │ DOUBLE │ │ DOUBLE │ └────────────────────────┘
Different backends have different names for their native types
Column(self, arg)
| Name | Description |
|---|---|
| approx_median | Return an approximate of the median of self. |
| approx_nunique | Return the approximate number of distinct elements in self. |
| arbitrary | Select an arbitrary value in a column. |
| argmax | Return the value of self that maximizes key. |
| argmin | Return the value of self that minimizes key. |
| count | Compute the number of rows in an expression. |
| cume_dist | Return the cumulative distribution over a window. |
| cummax | Return the cumulative max over a window. |
| cummin | Return the cumulative min over a window. |
| dense_rank | Position of first element within each group of equal values. |
| first | Return the first value of a column. |
| lag | Return the row located at offset rows before the current row. |
| last | Return the last value of a column. |
| lead | Return the row located at offset rows after the current row. |
| max | Return the maximum of a column. |
| median | Return the median of the column. |
| min | Return the minimum of a column. |
| mode | Return the mode of a column. |
| nth | Return the nth value (0-indexed) over a window. |
| ntile | Return the integer number of a partitioning of the column values. |
| nunique | Compute the number of distinct rows in an expression. |
| percent_rank | Return the relative rank of the values in the column. |
| quantile | Return value at the given quantile. |
| rank | Compute position of first element within each equal-value group in sorted order. |
| topk | Return a “top k” expression. |
| value_counts | Compute a frequency table. |
approx_median(where=None)
Return an approximate of the median of self.
Whether the result is an approximation depends on the backend.
| Name | Type | Description | Default |
|---|---|---|---|
where |
ir.BooleanValue | None |
Filter in values when where is True |
None |
| Type | Description |
|---|---|
| Scalar | An approximation of the median of self |
approx_nunique(where=None)
Return the approximate number of distinct elements in self.
Whether the result is an approximation depends on the backend.
| Name | Type | Description | Default |
|---|---|---|---|
where |
ir.BooleanValue | None |
Filter in values when where is True |
None |
| Type | Description |
|---|---|
| Scalar | An approximate count of the distinct elements of self |
arbitrary(where=None, how='first')
Select an arbitrary value in a column.
| Name | Type | Description | Default |
|---|---|---|---|
where |
ir.BooleanValue | None |
A filter expression | None |
how |
Literal[‘first’, ‘last’, ‘heavy’] | The method to use for selecting the element. * "first": Select the first non-NULL element * "last": Select the last non-NULL element * "heavy": Select a frequently occurring value using the heavy hitters algorithm. "heavy" is only supported by Clickhouse backend. |
'first' |
| Type | Description |
|---|---|
| Scalar | An expression |
argmax(key, where=None)
Return the value of self that maximizes key.
| Name | Type | Description | Default |
|---|---|---|---|
key |
ir.Value |
Key to use for max computation. |
required |
where |
ir.BooleanValue | None |
Keep values when where is True |
None |
| Type | Description |
|---|---|
| Scalar | The value of self that maximizes key |
argmin(key, where=None)
Return the value of self that minimizes key.
| Name | Type | Description | Default |
|---|---|---|---|
key |
ir.Value |
Key to use for min computation. |
required |
where |
ir.BooleanValue | None |
Keep values when where is True |
None |
| Type | Description |
|---|---|
| Scalar | The value of self that minimizes key |
count(where=None)
Compute the number of rows in an expression.
| Name | Type | Description | Default |
|---|---|---|---|
where |
ir.BooleanValue | None |
Filter expression | None |
| Type | Description |
|---|---|
IntegerScalar |
Number of elements in an expression |
cume_dist()
Return the cumulative distribution over a window.
cummax(where=None, group_by=None, order_by=None)
Return the cumulative max over a window.
cummin(where=None, group_by=None, order_by=None)
Return the cumulative min over a window.
dense_rank()
Position of first element within each group of equal values.
Values are returned in sorted order and duplicate values are ignored.
Equivalent to SQL’s DENSE_RANK().
| Type | Description |
|---|---|
IntegerColumn |
The rank |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"values": [1, 2, 1, 2, 3, 2]})
>>> t.mutate(rank=t.values.dense_rank())┏━━━━━━━━┳━━━━━━━┓ ┃ values ┃ rank ┃ ┡━━━━━━━━╇━━━━━━━┩ │ int64 │ int64 │ ├────────┼───────┤ │ 1 │ 0 │ │ 1 │ 0 │ │ 2 │ 1 │ │ 2 │ 1 │ │ 2 │ 1 │ │ 3 │ 2 │ └────────┴───────┘
first(where=None)
Return the first value of a column.
lag(offset=None, default=None)
Return the row located at offset rows before the current row.
| Name | Type | Description | Default |
|---|---|---|---|
offset |
int | ir.IntegerValue | None |
Index of row to select | None |
default |
Value | None | Value used if no row exists at offset |
None |
last(where=None)
Return the last value of a column.
lead(offset=None, default=None)
Return the row located at offset rows after the current row.
| Name | Type | Description | Default |
|---|---|---|---|
offset |
int | ir.IntegerValue | None |
Index of row to select | None |
default |
Value | None | Value used if no row exists at offset |
None |
max(where=None)
Return the maximum of a column.
| Name | Type | Description | Default |
|---|---|---|---|
where |
ir.BooleanValue | None |
Filter in values when where is True |
None |
| Type | Description |
|---|---|
| Scalar | The maximum value in self |
median(where=None)
Return the median of the column.
| Name | Type | Description | Default |
|---|---|---|---|
where |
ir.BooleanValue | None |
Optional boolean expression. If given, only the values where where evaluates to true will be considered for the median. |
None |
| Type | Description |
|---|---|
| Scalar | Median of the column |
Compute the median of bill_depth_mm
>>> t.group_by(t.species).agg(median_bill_depth=t.bill_depth_mm.median()).order_by(
... ibis.desc("median_bill_depth")
... )┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓ ┃ species ┃ median_bill_depth ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩ │ string │ float64 │ ├───────────┼───────────────────┤ │ Chinstrap │ 18.45 │ │ Adelie │ 18.40 │ │ Gentoo │ 15.00 │ └───────────┴───────────────────┘
In addition to numeric types, any orderable non-numeric types such as strings and dates work with median.
>>> t.group_by(t.island).agg(median_species=t.species.median()).order_by(
... ibis.desc("median_species")
... )┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓ ┃ island ┃ median_species ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩ │ string │ string │ ├───────────┼────────────────┤ │ Biscoe │ Gentoo │ │ Dream │ Chinstrap │ │ Torgersen │ Adelie │ └───────────┴────────────────┘
min(where=None)
Return the minimum of a column.
| Name | Type | Description | Default |
|---|---|---|---|
where |
ir.BooleanValue | None |
Filter in values when where is True |
None |
| Type | Description |
|---|---|
| Scalar | The minimum value in self |
mode(where=None)
Return the mode of a column.
| Name | Type | Description | Default |
|---|---|---|---|
where |
ir.BooleanValue | None |
Filter in values when where is True |
None |
| Type | Description |
|---|---|
| Scalar | The mode of self |
nth(n)
Return the nth value (0-indexed) over a window.
.nth(0) is equivalent to .first(). Negative will result in NULL. If the value of n is greater than the number of rows in the window, NULL will be returned.
| Name | Type | Description | Default |
|---|---|---|---|
n |
int | ir.IntegerValue |
Desired rank value | required |
| Type | Description |
|---|---|
| Column | The nth value over a window |
ntile(buckets)
Return the integer number of a partitioning of the column values.
| Name | Type | Description | Default |
|---|---|---|---|
buckets |
int | ir.IntegerValue |
Number of buckets to partition into | required |
nunique(where=None)
Compute the number of distinct rows in an expression.
| Name | Type | Description | Default |
|---|---|---|---|
where |
ir.BooleanValue | None |
Filter expression | None |
| Type | Description |
|---|---|
IntegerScalar |
Number of distinct elements in an expression |
percent_rank()
Return the relative rank of the values in the column.
quantile(quantile, where=None)
Return value at the given quantile.
The output of this method is a continuous quantile if the input is numeric, otherwise the output is a discrete quantile.
| Name | Type | Description | Default |
|---|---|---|---|
quantile |
float | ir.NumericValue | Sequence[ir.NumericValue | float] |
0 <= quantile <= 1, or an array of such values indicating the quantile or quantiles to compute |
required |
where |
ir.BooleanValue | None |
Boolean filter for input values | None |
| Type | Description |
|---|---|
| Scalar | Quantile of the input |
Compute the 99th percentile of bill_depth
>>> t.group_by(t.species).agg(p99_bill_depth=t.bill_depth_mm.quantile(0.99)).order_by(
... ibis.desc("p99_bill_depth")
... )┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓ ┃ species ┃ p99_bill_depth ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩ │ string │ float64 │ ├───────────┼────────────────┤ │ Adelie │ 21.200 │ │ Chinstrap │ 20.733 │ │ Gentoo │ 17.256 │ └───────────┴────────────────┘
In addition to numeric types, any orderable non-numeric types such as strings and dates work with quantile.
Let’s compute the 99th percentile of the species column
>>> t.group_by(t.island).agg(p99_species=t.species.quantile(0.99)).order_by(
... ibis.desc("p99_species")
... )┏━━━━━━━━━━━┳━━━━━━━━━━━━━┓ ┃ island ┃ p99_species ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━┩ │ string │ string │ ├───────────┼─────────────┤ │ Biscoe │ Gentoo │ │ Dream │ Chinstrap │ │ Torgersen │ Adelie │ └───────────┴─────────────┘
rank()
Compute position of first element within each equal-value group in sorted order.
Equivalent to SQL’s RANK() window function.
| Type | Description |
|---|---|
Int64Column |
The min rank |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"values": [1, 2, 1, 2, 3, 2]})
>>> t.mutate(rank=t.values.rank())┏━━━━━━━━┳━━━━━━━┓ ┃ values ┃ rank ┃ ┡━━━━━━━━╇━━━━━━━┩ │ int64 │ int64 │ ├────────┼───────┤ │ 1 │ 0 │ │ 1 │ 0 │ │ 2 │ 2 │ │ 2 │ 2 │ │ 2 │ 2 │ │ 3 │ 5 │ └────────┴───────┘
topk(k, by=None)
Return a “top k” expression.
| Name | Type | Description | Default |
|---|---|---|---|
k |
int | Return this number of rows | required |
by |
ir.Value | None |
An expression. Defaults to count. |
None |
| Type | Description |
|---|---|
TableExpr |
A top-k expression |
value_counts()
Compute a frequency table.
| Type | Description |
|---|---|
Table |
Frequency table expression |
Scalar(self, arg)
| Name | Description |
|---|---|
| as_table | Promote the scalar expression to a table. |
as_table()
Promote the scalar expression to a table.
| Type | Description |
|---|---|
Table |
A table expression |
Promote an aggregation to a table
>>> import ibis
>>> import ibis.expr.types as ir
>>> t = ibis.table(dict(a="str"), name="t")
>>> expr = t.a.length().sum().name("len").as_table()
>>> isinstance(expr, ir.Table)True
Promote a literal value to a table
ibis.literal(value, type=None)
Create a scalar expression from a Python value.
Ibis supports literal construction of arrays using the following functions:
Constructing these types using literal will be deprecated in a future release.
| Name | Type | Description | Default |
|---|---|---|---|
value |
Any | A Python value | required |
type |
dt.DataType | str | None |
An instance of DataType or a string indicating the ibis type of value. This parameter can be used in cases where ibis’s type inference isn’t sufficient for discovering the type of value. |
None |
| Type | Description |
|---|---|
| Scalar | An expression representing a literal value |
Construct an integer literal
Construct a float64 literal from an int
Ibis checks for invalid types
ibis.param(type)
Create a deferred parameter of a given type.
| Name | Type | Description | Default |
|---|---|---|---|
type |
dt.DataType |
The type of the unbound parameter, e.g., double, int64, date, etc. | required |
| Type | Description |
|---|---|
Scalar |
A scalar expression backend by a parameter |
>>> from datetime import date
>>> import ibis
>>> start = ibis.param("date")
>>> t = ibis.memtable(
... {
... "date_col": [date(2013, 1, 1), date(2013, 1, 2), date(2013, 1, 3)],
... "value": [1.0, 2.0, 3.0],
... },
... )
>>> expr = t.filter(t.date_col >= start).value.sum()
>>> expr.execute(params={start: date(2013, 1, 1)})6.0
ibis.expr.api.NA
The NULL scalar.
ibis.null()
Create a NULL/NA scalar.
ibis.coalesce(*args)
Return the first non-null value from args.
| Name | Type | Description | Default |
|---|---|---|---|
args |
Any | Arguments from which to choose the first non-null value | () |
| Type | Description |
|---|---|
Value |
Coalesced expression |
ibis.least(*args)
Compute the smallest value among the supplied arguments.
| Name | Type | Description | Default |
|---|---|---|---|
args |
Any | Arguments to choose from | () |
| Type | Description |
|---|---|
Value |
Minimum of the passed arguments |
ibis.greatest(*args)
Compute the largest value among the supplied arguments.
| Name | Type | Description | Default |
|---|---|---|---|
args |
Any | Arguments to choose from | () |
| Type | Description |
|---|---|
Value |
Maximum of the passed arguments |
ibis.asc(expr)
Create a ascending sort key from asc or column name.
| Name | Type | Description | Default |
|---|---|---|---|
expr |
ir.Column | str |
The expression or column name to use for sorting | required |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> t[["species", "year"]].order_by(ibis.asc("year")).head()┏━━━━━━━━━┳━━━━━━━┓ ┃ species ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━┩ │ string │ int64 │ ├─────────┼───────┤ │ Adelie │ 2007 │ │ Adelie │ 2007 │ │ Adelie │ 2007 │ │ Adelie │ 2007 │ │ Adelie │ 2007 │ └─────────┴───────┘
| Type | Description |
|---|---|
ir.ValueExpr |
An expression |
ibis.desc(expr)
Create a descending sort key from expr or column name.
| Name | Type | Description | Default |
|---|---|---|---|
expr |
ir.Column | str |
The expression or column name to use for sorting | required |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> t[["species", "year"]].order_by(ibis.desc("year")).head()┏━━━━━━━━━┳━━━━━━━┓ ┃ species ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━┩ │ string │ int64 │ ├─────────┼───────┤ │ Adelie │ 2009 │ │ Adelie │ 2009 │ │ Adelie │ 2009 │ │ Adelie │ 2009 │ │ Adelie │ 2009 │ └─────────┴───────┘
| Type | Description |
|---|---|
ir.ValueExpr |
An expression |
ibis.ifelse(condition, true_expr, false_expr)
Construct a ternary conditional expression.
| Name | Type | Description | Default |
|---|---|---|---|
condition |
Any | A boolean expression | required |
true_expr |
Any | Expression to return if condition evaluates to True |
required |
false_expr |
Any | Expression to return if condition evaluates to False or NULL |
required |
| Type | Description |
|---|---|
ir.Value |
The value of true_expr if condition is True else false_expr |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"condition": [True, False, True, None]})
>>> ibis.ifelse(t.condition, "yes", "no")┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ IfElse(condition, 'yes', 'no') ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ string │ ├────────────────────────────────┤ │ yes │ │ no │ │ yes │ │ no │ └────────────────────────────────┘
ibis.case()
Begin constructing a case expression.
Use the .when method on the resulting object followed by .end to create a complete case expression.
| Type | Description |
|---|---|
SearchedCaseBuilder |
A builder object to use for constructing a case expression. |
>>> import ibis
>>> from ibis import _
>>> ibis.options.interactive = True
>>> t = ibis.memtable(
... {
... "left": [1, 2, 3, 4],
... "symbol": ["+", "-", "*", "/"],
... "right": [5, 6, 7, 8],
... }
... )
>>> t.mutate(
... result=(
... ibis.case()
... .when(_.symbol == "+", _.left + _.right)
... .when(_.symbol == "-", _.left - _.right)
... .when(_.symbol == "*", _.left * _.right)
... .when(_.symbol == "/", _.left / _.right)
... .end()
... )
... )┏━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━━┓ ┃ left ┃ symbol ┃ right ┃ result ┃ ┡━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━━┩ │ int64 │ string │ int64 │ float64 │ ├───────┼────────┼───────┼─────────┤ │ 1 │ + │ 5 │ 6.0 │ │ 2 │ - │ 6 │ -4.0 │ │ 3 │ * │ 7 │ 21.0 │ │ 4 │ / │ 8 │ 0.5 │ └───────┴────────┴───────┴─────────┘
ibis.to_sql(expr, dialect=None, **kwargs)
Return the formatted SQL string for an expression.
| Name | Type | Description | Default |
|---|---|---|---|
expr |
ir.Expr |
Ibis expression. | required |
dialect |
str | None | SQL dialect to use for compilation. | None |
kwargs |
Scalar parameters | {} |
| Type | Description |
|---|---|
| str | Formatted SQL string |