from __future__ import annotations
import ibis.expr.datatypes as dt
import ibis.expr.datashape as ds
import ibis.expr.rules as rlz
from ibis.expr.operations import Reduction, Value
class LastDate(Reduction):
arg: Value[dt.Date, ds.Any]
where: Value[dt.Boolean, ds.Any] | None = None
dtype = rlz.dtype_like("arg")
shape = ds.scalarAdd a reduction operation
This notebook will show you how to add a new reduction operation last_date to the existing backend SQLite.
A reduction operation is a function that maps \(N\) rows to 1 row, for example the sum function.
Description
We’re going to add a last_date function to ibis. last_date returns the latest date of a list of dates.
Step 1: Define the Operation
Let’s define the last_date operation as a function that takes any date column as input and returns a date:
from __future__ import annotations
from datetime import date
def last_date(dates: list[date]) -> date:
"""Latest date."""We just defined a LastDate class that takes one date column as input, and returns a scalar output of the same type as the input. This matches both the requirements of a reduction and the specifics of the function that we want to implement.
Note: It is very important that you write the correct argument rules and output type here. The expression will not work otherwise.
Step 2: Define the API
Because every reduction in Ibis has the ability to filter out values during aggregation, to make an expression out of LastDate we need to pass an additional argument where to our LastDate constructor.
Additionally, reductions should be defined on Column classes because reductions are not always well-defined for a scalar value.
from ibis.expr.types import DateColumn
def last_date(date_column, where=None):
return LastDate(date_column, where=where).to_expr()
DateColumn.last_date = last_dateInterlude: Create some expressions using last_date
import ibis
people = ibis.table(
dict(name="string", country="string", date_of_birth="date"),
name="people",
)people.date_of_birth.last_date()r0 := UnboundTable: people
name string
country string
date_of_birth date
LastDate(date_of_birth): LastDate(r0.date_of_birth)
people.date_of_birth.last_date(people.country == "Indonesia")r0 := UnboundTable: people
name string
country string
date_of_birth date
LastDate(date_of_birth, Equals(country, 'Indonesia')): LastDate(r0.date_of_birth, where=r0.country == 'Indonesia')
Step 3: Turn the Expression into SQL
import sqlalchemy as sa
@ibis.sqlite.add_operation(LastDate)
def _last_date(translator, expr):
# pull out the arguments to the expression
op = expr.op()
arg = op.arg
where = op.where
# compile the argument
compiled_arg = translator.translate(arg)
# call the appropriate SQLite function (`max` for the latest date)
agg = sa.func.max(compiled_arg)
# handle a non-None filter clause
if where is not None:
return agg.filter(translator.translate(where))
return aggStep 4: Putting it all Together
Download the geography database.
!curl -LsS -o geography.db 'https://storage.googleapis.com/ibis-tutorial-data/geography.db'
con = ibis.sqlite.connect("geography.db")Create and execute a bitwise_and expression
ind = con.table("independence")
indDatabaseTable: independence country_code string independence_date date independence_from string
Last country to gain independence in our database:
expr = ind.independence_date.last_date()
exprr0 := DatabaseTable: independence
country_code string
independence_date date
independence_from string
LastDate(independence_date): LastDate(r0.independence_date)
ibis.to_sql(expr)SELECT
MAX(t0.independence_date) AS "LastDate(independence_date)"
FROM independence AS t0Show the last country to gain independence from the Spanish Empire, using the where parameter:
expr = ind.independence_date.last_date(
where=ind.independence_from == "Spanish Empire"
)
exprr0 := DatabaseTable: independence
country_code string
independence_date date
independence_from string
LastDate(independence_date, Equals(independence_from, 'Spanish Empire')): LastDate(r0.independence_date, where=r0.independence_from == 'Spanish Empire')