>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(dict(a="float32"), name="t")
>>> expr = t.select(s.where(lambda col: col.get_name() == "a"))
>>> expr.columns['a']
Choose Table columns based on dtype, regex, and other criteria
where(predicate)
Select columns that satisfy predicate.
Use this selector when one of the other selectors does not meet your needs.
| Name | Type | Description | Default |
|---|---|---|---|
predicate |
Callable[[ir.Value], bool] |
A callable that accepts an ibis value expression and returns a bool |
required |
numeric()
Return numeric columns.
of_type(dtype)
Select columns of type dtype.
| Name | Type | Description | Default |
|---|---|---|---|
dtype |
dt.DataType | str | type[dt.DataType] |
DataType instance, str or DataType class |
required |
Select according to a specific DataType instance
>>> import ibis
>>> import ibis.expr.datatypes as dt
>>> import ibis.selectors as s
>>> t = ibis.table(dict(name="string", siblings="array<string>", parents="array<int64>"))
>>> expr = t.select(s.of_type(dt.Array(dt.string)))
>>> expr.columns['siblings']
Strings are also accepted
Abstract/unparametrized types may also be specified by their string name (e.g. “integer” for any integer type), or by passing in a DataType class instead. The following options are equivalent.
startswith(prefixes)
Select columns whose name starts with one of prefixes.
| Name | Type | Description | Default |
|---|---|---|---|
prefixes |
str | tuple[str, …] | Prefixes to compare column names against | required |
endswith(suffixes)
Select columns whose name ends with one of suffixes.
| Name | Type | Description | Default |
|---|---|---|---|
suffixes |
str | tuple[str, …] | Suffixes to compare column names against | required |
contains(needles, how=any)
Return columns whose name contains needles.
| Name | Type | Description | Default |
|---|---|---|---|
needles |
str | tuple[str, …] | One or more strings to search for in column names | required |
how |
Callable[[Iterable[bool]], bool] | A boolean reduction to allow the configuration of how needles are summarized. |
any |
Select columns that contain either "a" or "b"
>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(
... dict(a="int64", b="string", c="float", d="array<int16>", ab="struct<x: int>")
... )
>>> expr = t.select(s.contains(("a", "b")))
>>> expr.columns['a', 'b', 'ab']
Select columns that contain all of "a" and "b", that is, both "a" and "b" must be in each column’s name to match.
matches(regex)
Return columns whose name matches the regular expression regex.
| Name | Type | Description | Default |
|---|---|---|---|
regex |
str | re.Pattern | A string or re.Pattern object |
required |
any_of(*predicates)
Include columns satisfying any of predicates.
all_of(*predicates)
Include columns satisfying all of predicates.
c(*names)
Select specific column names.
across(selector, func, names=None)
Apply data transformations across multiple columns.
| Name | Type | Description | Default |
|---|---|---|---|
selector |
Selector | Iterable[str] | str |
An expression that selects columns on which the transformation function will be applied, an iterable of str column names or a single str column name. |
required |
func |
Deferred | Callable[[ir.Value], ir.Value] | Mapping[str | None, Deferred | Callable[[ir.Value], ir.Value]] |
A function (or dictionary of functions) to use to transform the data. | required |
names |
str | Callable[[str, str | None], str] | None | A lambda function or a format string to name the columns created by the transformation function. | None |
| Type | Description |
|---|---|
Across |
An Across selector object |
>>> import ibis
>>> ibis.options.interactive = True
>>> from ibis import _, selectors as s
>>> t = ibis.examples.penguins.fetch()
>>> t.select(s.startswith("bill")).mutate(
... s.across(s.numeric(), dict(centered=_ - _.mean()), names="{fn}_{col}")
... )┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ bill_length_mm ┃ bill_depth_mm ┃ centered_bill_length_mm ┃ centered_bill_depth_mm ┃ ┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩ │ float64 │ float64 │ float64 │ float64 │ ├────────────────┼───────────────┼─────────────────────────┼────────────────────────┤ │ 39.1 │ 18.7 │ -4.82193 │ 1.54883 │ │ 39.5 │ 17.4 │ -4.42193 │ 0.24883 │ │ 40.3 │ 18.0 │ -3.62193 │ 0.84883 │ │ NULL │ NULL │ NULL │ NULL │ │ 36.7 │ 19.3 │ -7.22193 │ 2.14883 │ │ 39.3 │ 20.6 │ -4.62193 │ 3.44883 │ │ 38.9 │ 17.8 │ -5.02193 │ 0.64883 │ │ 39.2 │ 19.6 │ -4.72193 │ 2.44883 │ │ 34.1 │ 18.1 │ -9.82193 │ 0.94883 │ │ 42.0 │ 20.2 │ -1.92193 │ 3.04883 │ │ … │ … │ … │ … │ └────────────────┴───────────────┴─────────────────────────┴────────────────────────┘
if_any(selector, predicate)
Return the disjunction of predicate applied on all selector columns.
| Name | Type | Description | Default |
|---|---|---|---|
selector |
Selector |
A column selector | required |
predicate |
Deferred | Callable |
A callable or deferred object defining a predicate to apply to each column from selector. |
required |
>>> import ibis
>>> from ibis import selectors as s, _
>>> ibis.options.interactive = True
>>> penguins = ibis.examples.penguins.fetch()
>>> cols = s.across(s.endswith("_mm"), (_ - _.mean()) / _.std())
>>> expr = penguins.mutate(cols).filter(s.if_any(s.endswith("_mm"), _.abs() > 2))
>>> expr_by_hand = penguins.mutate(cols).filter(
... (_.bill_length_mm.abs() > 2)
... | (_.bill_depth_mm.abs() > 2)
... | (_.flipper_length_mm.abs() > 2)
... )
>>> expr.equals(expr_by_hand)True
┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩ │ string │ string │ float64 │ float64 │ float64 │ int64 │ string │ int64 │ ├─────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤ │ Adelie │ Biscoe │ -1.103002 │ 0.733662 │ -2.056307 │ 3150 │ female │ 2007 │ │ Gentoo │ Biscoe │ 1.113285 │ -0.431017 │ 2.068368 │ 5700 │ male │ 2007 │ │ Gentoo │ Biscoe │ 2.871660 │ -0.076550 │ 2.068368 │ 6050 │ male │ 2007 │ │ Gentoo │ Biscoe │ 1.900890 │ -0.734846 │ 2.139483 │ 5650 │ male │ 2008 │ │ Gentoo │ Biscoe │ 1.076652 │ -0.177826 │ 2.068368 │ 5700 │ male │ 2008 │ │ Gentoo │ Biscoe │ 0.856855 │ -0.582932 │ 2.068368 │ 5800 │ male │ 2008 │ │ Gentoo │ Biscoe │ 1.497929 │ -0.076550 │ 2.068368 │ 5550 │ male │ 2009 │ │ Gentoo │ Biscoe │ 1.388031 │ -0.431017 │ 2.068368 │ 5500 │ male │ 2009 │ │ Gentoo │ Biscoe │ 2.047422 │ -0.582932 │ 2.068368 │ 5850 │ male │ 2009 │ │ Adelie │ Dream │ -2.165354 │ -0.836123 │ -0.918466 │ 3050 │ female │ 2009 │ │ … │ … │ … │ … │ … │ … │ … │ … │ └─────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
if_all(selector, predicate)
Return the conjunction of predicate applied on all selector columns.
| Name | Type | Description | Default |
|---|---|---|---|
selector |
Selector |
A column selector | required |
predicate |
Deferred | Callable |
A callable or deferred object defining a predicate to apply to each column from selector. |
required |
>>> import ibis
>>> from ibis import selectors as s, _
>>> ibis.options.interactive = True
>>> penguins = ibis.examples.penguins.fetch()
>>> cols = s.across(s.endswith("_mm"), (_ - _.mean()) / _.std())
>>> expr = penguins.mutate(cols).filter(s.if_all(s.endswith("_mm"), _.abs() > 1))
>>> expr_by_hand = penguins.mutate(cols).filter(
... (_.bill_length_mm.abs() > 1)
... & (_.bill_depth_mm.abs() > 1)
... & (_.flipper_length_mm.abs() > 1)
... )
>>> expr.equals(expr_by_hand)True
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩ │ string │ string │ float64 │ float64 │ float64 │ int64 │ string │ int64 │ ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤ │ Adelie │ Dream │ -1.157951 │ 1.088129 │ -1.416272 │ 3300 │ female │ 2007 │ │ Adelie │ Torgersen │ -1.231217 │ 1.138768 │ -1.202926 │ 3900 │ male │ 2008 │ │ Gentoo │ Biscoe │ 1.149917 │ -1.443781 │ 1.214987 │ 5700 │ male │ 2007 │ │ Gentoo │ Biscoe │ 1.040019 │ -1.089314 │ 1.072757 │ 4750 │ male │ 2008 │ │ Gentoo │ Biscoe │ 1.131601 │ -1.089314 │ 1.712792 │ 5000 │ male │ 2008 │ │ Gentoo │ Biscoe │ 1.241499 │ -1.089314 │ 1.570562 │ 5550 │ male │ 2008 │ │ Gentoo │ Biscoe │ 1.351398 │ -1.494420 │ 1.214987 │ 5300 │ male │ 2009 │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
r
Ranges of columns.
first()
Return the first column of a table.
last()
Return the last column of a table.
all()
Return every column from a table.