from __future__ import annotations
import ibis.expr.datatypes as dt
import ibis.expr.datashape as ds
import ibis.expr.rules as rlz
from ibis.expr.operations import Reduction, Value
class LastDate(Reduction):
arg: Value[dt.Date, ds.Any]| None = None
where: Value[dt.Boolean, ds.Any]
= rlz.dtype_like("arg")
dtype = ds.scalar shape
Add a reduction operation
This notebook will show you how to add a new reduction operation last_date
to the existing backend SQLite.
A reduction operation is a function that maps \(N\) rows to 1 row, for example the sum
function.
Description
We’re going to add a last_date
function to ibis. last_date
returns the latest date of a list of dates.
Step 1: Define the Operation
Let’s define the last_date
operation as a function that takes any date column as input and returns a date:
from __future__ import annotations
from datetime import date
def last_date(dates: list[date]) -> date:
"""Latest date."""
We just defined a LastDate
class that takes one date column as input, and returns a scalar output of the same type as the input. This matches both the requirements of a reduction and the specifics of the function that we want to implement.
Note: It is very important that you write the correct argument rules and output type here. The expression will not work otherwise.
Step 2: Define the API
Because every reduction in Ibis has the ability to filter out values during aggregation, to make an expression out of LastDate
we need to pass an additional argument where
to our LastDate
constructor.
Additionally, reductions should be defined on Column
classes because reductions are not always well-defined for a scalar value.
from ibis.expr.types import DateColumn
def last_date(date_column, where=None):
return LastDate(date_column, where=where).to_expr()
= last_date DateColumn.last_date
Interlude: Create some expressions using last_date
import ibis
= ibis.table(
people dict(name="string", country="string", date_of_birth="date"),
="people",
name )
people.date_of_birth.last_date()
r0 := UnboundTable: people
name string
country string
date_of_birth date
LastDate(date_of_birth): LastDate(r0.date_of_birth)
== "Indonesia") people.date_of_birth.last_date(people.country
r0 := UnboundTable: people
name string
country string
date_of_birth date
LastDate(date_of_birth, Equals(country, 'Indonesia')): LastDate(r0.date_of_birth, where=r0.country == 'Indonesia')
Step 3: Turn the Expression into SQL
import sqlalchemy as sa
@ibis.sqlite.add_operation(LastDate)
def _last_date(translator, expr):
# pull out the arguments to the expression
= expr.op()
op
= op.arg
arg = op.where
where
# compile the argument
= translator.translate(arg)
compiled_arg
# call the appropriate SQLite function (`max` for the latest date)
= sa.func.max(compiled_arg)
agg
# handle a non-None filter clause
if where is not None:
return agg.filter(translator.translate(where))
return agg
Step 4: Putting it all Together
Download the geography database.
!curl -LsS -o geography.db 'https://storage.googleapis.com/ibis-tutorial-data/geography.db'
= ibis.sqlite.connect("geography.db") con
Create and execute a bitwise_and
expression
= con.table("independence")
ind ind
DatabaseTable: independence country_code string independence_date date independence_from string
Last country to gain independence in our database:
= ind.independence_date.last_date()
expr expr
r0 := DatabaseTable: independence
country_code string
independence_date date
independence_from string
LastDate(independence_date): LastDate(r0.independence_date)
ibis.to_sql(expr)
SELECT
MAX(t0.independence_date) AS "LastDate(independence_date)"
FROM independence AS t0
Show the last country to gain independence from the Spanish Empire, using the where
parameter:
= ind.independence_date.last_date(
expr =ind.independence_from == "Spanish Empire"
where
) expr
r0 := DatabaseTable: independence
country_code string
independence_date date
independence_from string
LastDate(independence_date, Equals(independence_from, 'Spanish Empire')): LastDate(r0.independence_date, where=r0.independence_from == 'Spanish Empire')