"""
Accounting — turning weights/quantities and returns/prices into per-bar strategy P&L and the capital curves.
"""
import polars as pl
from pomata._expr import float64_expr, validate_positive
__all__ = (
"cumulative_pnl",
"dividend",
"equity_curve",
"pnl_gross",
"pnl_gross_inverse",
"pnl_net",
"returns_gross",
"returns_net",
"turnover",
)
[docs]
def cumulative_pnl(
returns: pl.Expr,
) -> pl.Expr:
r"""
Cumulative P&L, the additive running total of a per-bar P&L (or return) series.
The plain cumulative sum of the per-bar values to date:
.. math::
\mathrm{cumPnL}_t = \sum_{i \le t} x_i.
P&L in currency is **additive** — you sum dollars, you do not compound them — so for the cash / position flow this
running sum is your total P&L to date (pair it with :func:`pnl_net`). For the return flow, where capital is
reinvested, the cumulation is **compounded**: use :func:`equity_curve` instead, which is what "cumulative return"
conventionally means. The additive name lives here, on the currency P&L, where additive is the standard; the per-bar
inputs are unchanged either way, only the cumulation differs.
Args:
returns: Input per-bar values to cumulate — the strategy's net P&L (e.g. from :func:`pnl_net`) for a currency
total, or a net-return series for an additive (fixed-notional) return total.
Returns:
The running sum for each row, the same length as ``returns``.
Raises:
TypeError: If any input is not a ``pl.Expr``.
Note:
**Correctness** -- the result is checked against an independent reference oracle on every input, and every
edge case (missing data, boundaries, and warm-up where applicable) is given a defined behavior.
**Edge-case behavior:**
- **Null** — a ``null`` return contributes nothing and emits ``null`` at that row, while the running sum carries
across it unchanged (the cumulation skips the gap rather than breaking on it).
- **NaN** — a ``NaN`` return propagates into the running sum and every later row stays ``NaN`` (it is a real
value that contaminates the total, unlike a ``null`` gap).
- **Partitioning** — wrap the call in ``.over(...)`` for a multi-series panel so the running sum restarts per
series and never carries across boundaries, e.g. ``cumulative_pnl(pl.col("returns")).over("ticker")``.
See Also:
- :func:`equity_curve`: The compounded (reinvested) return-flow cumulation, a product of one-plus-returns.
- :func:`pnl_net`: The per-bar net P&L this typically cumulates in the cash flow.
- :func:`returns_net`: The per-bar net return it cumulates for an additive, fixed-notional total.
References:
- https://en.wikipedia.org/wiki/Rate_of_return
Examples:
Basic usage on a per-bar P&L series:
>>> import polars as pl
>>> from pomata.pnl import cumulative_pnl
>>>
>>> frame = pl.DataFrame({"returns": [0.1, -0.05, 0.2, 0.1, -0.15, 0.05, 0.3, -0.1]})
>>> frame.select(cumulative_pnl(pl.col("returns")).round(4).alias("cumulative_pnl"))["cumulative_pnl"].to_list()
[0.1, 0.05, 0.25, 0.35, 0.2, 0.25, 0.55, 0.45]
On a multi-ticker panel, wrap the call in ``.over`` so each ticker accumulates independently:
>>> frame = pl.DataFrame(
... {
... "ticker": ["A"] * 4 + ["B"] * 4,
... "returns": [0.1, 0.2, -0.05, 0.1, 0.0, 0.1, 0.1, -0.2],
... }
... )
>>> frame.with_columns(cumulative_pnl(pl.col("returns")).over("ticker").round(4).alias("c"))["c"].to_list()
[0.1, 0.3, 0.25, 0.35, 0.0, 0.1, 0.2, 0.0]
A ``null`` (skipped, the running total carries across it) then a ``NaN`` (which contaminates every later row)
in ``returns`` make the missing-data handling visible:
>>> frame = pl.DataFrame({"returns": [0.1, None, 0.2, float("nan"), 0.1]})
>>> frame.select(cumulative_pnl(pl.col("returns")).round(4).alias("cumulative_pnl"))["cumulative_pnl"].to_list()
[0.1, None, 0.3, nan, nan]
"""
returns = float64_expr(returns)
# Plain cumulative sum: a null is skipped (emits null, the total carries across it), a NaN propagates -- the Polars
# cum_sum semantics documented in the Note.
return returns.cum_sum()
[docs]
def dividend(
quantity: pl.Expr,
dividend_per_share: pl.Expr,
) -> pl.Expr:
r"""
Dividend Cashflow, the per-bar dividend income (or expense) of a held quantity.
The quantity held times the dividend paid per share that bar — the cash a position receives when the instrument
distributes a dividend (a long receives, a short pays):
.. math::
d_t = q_t \cdot \mathrm{dps}_t, \qquad q = \text{quantity},\ \mathrm{dps} = \text{dividend per share}.
The dividend per share is a per-bar series, zero on ordinary bars and the cash amount on ex-dividend bars. This is a
holding cashflow on the **income** side (not a cost): add it to the gross PnL (e.g.
``pnl_gross(...) + dividend(...)``) before subtracting costs.
Args:
quantity: Signed position size in units / shares / contracts held over the bar; a long (positive) receives the
dividend, a short (negative) pays it.
dividend_per_share: Dividend paid per share for the bar (e.g. ``pl.col("dividend")``); zero on ordinary bars.
Returns:
The dividend cashflow for each row, the same length as the inputs.
Raises:
TypeError: If any input is not a ``pl.Expr``.
Note:
**Correctness** -- the result is checked against an independent reference oracle on every input, and every
edge case (missing data, boundaries, and warm-up where applicable) is given a defined behavior.
**Edge-case behavior:**
- **Null** — a ``null`` in either input makes that row ``null`` (``null`` takes precedence over ``NaN``).
- **NaN** — a ``NaN`` in either input (with no ``null``) propagates, yielding ``NaN`` for that row.
- **Partitioning** — the product is elementwise (each row uses only its own pair), so ``.over(...)`` partitions
identically and is optional here, unlike the lagged / cumulative functions.
See Also:
- :func:`pnl_gross`: The gross position PnL this dividend income is added to.
- :func:`cost_borrow`: The equity holding cashflow on the cost side (short-borrow).
- :func:`cost_funding`: The perpetual-swap funding leg, another per-bar holding cashflow.
References:
- https://en.wikipedia.org/wiki/Dividend
Examples:
Basic usage on a held quantity and a per-share dividend:
>>> import polars as pl
>>> from pomata.pnl import dividend
>>>
>>> frame = pl.DataFrame(
... {
... "quantity": [100.0, 100.0, 100.0, 0.0, -50.0, -50.0, 200.0, 200.0],
... "dividend_per_share": [0.0, 0.0, 0.5, 0.0, 0.5, 0.5, 0.0, 0.0],
... }
... )
>>> expr = dividend(pl.col("quantity"), pl.col("dividend_per_share")).round(4)
>>> frame.select(expr.alias("dividend"))["dividend"].to_list()
[0.0, 0.0, 50.0, 0.0, -25.0, -25.0, 0.0, 0.0]
The product is elementwise, so ``.over`` partitions identically and is shown only for consistency:
>>> frame = pl.DataFrame(
... {
... "ticker": ["A"] * 4 + ["B"] * 4,
... "quantity": [100.0, 100.0, 100.0, 0.0, 50.0, 50.0, -50.0, -50.0],
... "dividend_per_share": [0.0, 0.0, 0.5, 0.0, 0.0, 0.3, 0.3, 0.3],
... }
... )
>>> expr = dividend(pl.col("quantity"), pl.col("dividend_per_share")).over("ticker").round(4)
>>> frame.with_columns(expr.alias("d"))["d"].to_list()
[0.0, 0.0, 50.0, 0.0, 0.0, 15.0, -15.0, -15.0]
A ``null`` then a ``NaN`` in ``quantity`` (both propagate through the product) make the missing-data handling
visible:
>>> frame = pl.DataFrame(
... {
... "quantity": [100.0, None, 100.0, float("nan"), -50.0],
... "dividend_per_share": [0.5, 0.5, 0.5, 0.5, 0.5],
... }
... )
>>> expr = dividend(pl.col("quantity"), pl.col("dividend_per_share")).round(4)
>>> frame.select(expr.alias("dividend"))["dividend"].to_list()
[50.0, None, 50.0, nan, -25.0]
"""
quantity = float64_expr(quantity)
dividend_per_share = float64_expr(dividend_per_share)
# Pure elementwise product: the held quantity times the per-share dividend; null propagates (taking precedence over
# NaN), NaN propagates.
return quantity * dividend_per_share
[docs]
def equity_curve(
returns: pl.Expr,
) -> pl.Expr:
r"""
Equity Curve, the compounded growth of one unit of capital over a return series.
The cumulative product of the gross returns (one plus each per-bar return) — the value of one unit of capital that
**reinvests** its P&L each bar, so every return compounds on the grown capital:
.. math::
\mathrm{equity}_t = \prod_{i \le t} (1 + r_i).
This is the standard equity curve and the multiplicative twin of :func:`cumulative_pnl`: use this when the P&L is
reinvested (the total-return convention) and the additive :func:`cumulative_pnl` when the notional is held fixed.
It is also the natural input to a drawdown, so the metrics family consumes it directly.
Args:
returns: Input per-bar returns to compound, typically the strategy's gross or net returns (e.g. from
:func:`returns_gross`).
Returns:
The compounded equity for each row, the same length as ``returns``, expressed as a growth factor relative to a
starting capital of ``1`` (multiply by the starting capital for a currency curve).
Raises:
TypeError: If any input is not a ``pl.Expr``.
Note:
**Correctness** -- the result is checked against an independent reference oracle on every input, and every
edge case (missing data, boundaries, and warm-up where applicable) is given a defined behavior.
**Edge-case behavior:**
- **Null** — a ``null`` return emits ``null`` at that row while the running product carries across it unchanged
(a missing bar contributes a neutral factor of one rather than breaking the curve); a leading warm-up ``null``
(e.g. the first row of :func:`returns_simple`) therefore stays ``null`` and the curve begins at the first
defined return.
- **NaN** — a ``NaN`` return propagates into the running product and every later row stays ``NaN``.
- **Partitioning** — wrap the call in ``.over(...)`` for a multi-series panel so the product restarts per series
and never carries across boundaries, e.g. ``equity_curve(pl.col("returns")).over("ticker")``.
See Also:
- :func:`cumulative_pnl`: The additive (fixed-notional) twin, a cumulative sum of returns.
- :func:`returns_gross`: The per-bar strategy returns this typically compounds.
- :func:`drawdown`: The metric that consumes this equity curve, its decline from the running peak.
References:
- https://en.wikipedia.org/wiki/Rate_of_return
Examples:
Basic usage on a per-bar return series:
>>> import polars as pl
>>> from pomata.pnl import equity_curve
>>>
>>> frame = pl.DataFrame({"returns": [0.1, -0.05, 0.2, 0.1, -0.15, 0.05, 0.3, -0.1]})
>>> frame.select(equity_curve(pl.col("returns")).round(4).alias("equity"))["equity"].to_list()
[1.1, 1.045, 1.254, 1.3794, 1.1725, 1.2311, 1.6004, 1.4404]
On a multi-ticker panel, wrap the call in ``.over`` so each ticker compounds independently:
>>> frame = pl.DataFrame(
... {
... "ticker": ["A"] * 4 + ["B"] * 4,
... "returns": [0.1, 0.2, -0.05, 0.1, 0.0, 0.1, 0.1, -0.2],
... }
... )
>>> frame.with_columns(equity_curve(pl.col("returns")).over("ticker").round(4).alias("e"))["e"].to_list()
[1.1, 1.32, 1.254, 1.3794, 1.0, 1.1, 1.21, 0.968]
A leading ``null`` stays ``null`` (the curve begins at the first defined return) and a later ``NaN`` then
contaminates every row after it:
>>> frame = pl.DataFrame({"returns": [None, 0.1, 0.2, float("nan"), 0.1]})
>>> frame.select(equity_curve(pl.col("returns")).round(4).alias("equity"))["equity"].to_list()
[None, 1.1, 1.32, nan, nan]
"""
returns = float64_expr(returns)
# Cumulative product of one-plus-returns: a null is skipped (emits null, the product carries across it), a NaN
# propagates -- the Polars cum_prod semantics documented in the Note.
return (1.0 + returns).cum_prod()
[docs]
def pnl_gross(
quantity: pl.Expr,
price: pl.Expr,
*,
multiplier: float = 1.0,
) -> pl.Expr:
r"""
Gross Position PnL, the per-bar mark-to-market profit and loss of a held quantity.
The signed quantity held over a bar times the bar's price change times the contract multiplier — the strategy's
gross P&L for that bar in the price's currency, before transaction costs. This is the cash / position flow's
counterpart to :func:`returns_gross`: use it when you hold a **quantity** of an instrument at a **price** (so the
instrument's multiplier, and later dividends / funding / FX, can be booked honestly), rather than a weight and a
return.
.. math::
\mathrm{pnl}^{\mathrm{gross}}_t = q_t \cdot (P_t - P_{t-1}) \cdot m, \qquad q = \text{quantity},\ m =
\text{multiplier}.
Summed over time it is the **total** mark-to-market PnL (realized plus unrealized combined); pomata does not split
realized from unrealized (that needs cost-basis lot accounting, which a vectorized ``pl.Expr`` does not carry).
Args:
quantity: Signed position size in units / shares / contracts held over the bar (e.g. ``100``, ``-2``).
price: Instrument price series (e.g. ``pl.col("close")``); must share a length and alignment with ``quantity``.
multiplier: Contract multiplier / point value (e.g. ``50`` for an E-mini S&P future); ``1.0`` for cash equity
and spot. Must be a finite number ``> 0``.
Returns:
The gross PnL for each row, the same length as the inputs. The first value is ``null`` (warm-up): the previous
price ``price.shift(1)`` is undefined for the first row, so no price change can be measured there.
Raises:
TypeError: If any input is not a ``pl.Expr``.
ValueError: If ``multiplier`` is not a finite number ``> 0`` (i.e. ``<= 0``, ``NaN``, or ``±inf``).
Note:
**Correctness** -- the result is checked against an independent reference oracle on every input, and every
edge case (missing data, boundaries, and warm-up where applicable) is given a defined behavior.
**No lookahead (alignment is the caller's):** the PnL assumes ``quantity`` at row ``t`` is the position held
over the price change into row ``t``. To stay lookahead-free, that quantity must depend only on information
available before that price; if it is decided on the same bar's close, lag it by one bar
(``pnl_gross(quantity.shift(1), price)``). Nothing is shifted for you, so a quantity you have already aligned is
never double-shifted.
**Edge-case behavior:**
- **Null** — a ``null`` in ``quantity``, ``price``, or the previous ``price`` makes that row ``null`` (``null``
takes precedence over ``NaN``).
- **NaN** — a ``NaN`` in either input (with no ``null``) propagates, yielding ``NaN`` for that row.
- **Partitioning** — wrap the call in ``.over(...)`` for a multi-series panel so the one-bar price change never
reaches across series boundaries, e.g. ``pnl_gross(pl.col("quantity"), pl.col("price")).over("ticker")``.
See Also:
- :func:`returns_gross`: The return-flow counterpart (weight times asset return).
- :func:`pnl_net`: Subtracts the composed cost from this gross PnL.
- :func:`pnl_gross_inverse`: The coin-margined (inverse-contract) version, nonlinear in price.
References:
- https://en.wikipedia.org/wiki/Mark-to-market_accounting
Examples:
Basic usage on a held quantity and a price series:
>>> import polars as pl
>>> from pomata.pnl import pnl_gross
>>>
>>> frame = pl.DataFrame(
... {
... "quantity": [10.0, 10.0, -5.0, -5.0, 20.0, 20.0, -10.0, -10.0],
... "price": [100.0, 102.0, 101.0, 104.0, 103.0, 105.0, 104.0, 106.0],
... }
... )
>>> frame.select(pnl_gross(pl.col("quantity"), pl.col("price")).round(4).alias("pnl"))["pnl"].to_list()
[None, 20.0, 5.0, -15.0, -20.0, 40.0, 10.0, -20.0]
On a multi-ticker panel, wrap the call in ``.over`` so each ticker warms up independently:
>>> frame = pl.DataFrame(
... {
... "ticker": ["A"] * 4 + ["B"] * 4,
... "quantity": [10.0, 10.0, -5.0, -5.0, 2.0, 2.0, 2.0, 2.0],
... "price": [100.0, 102.0, 101.0, 104.0, 50.0, 51.0, 49.0, 52.0],
... }
... )
>>> frame.with_columns(pnl_gross(pl.col("quantity"), pl.col("price")).over("ticker").round(4).alias("p"))[
... "p"
... ].to_list()
[None, 20.0, 5.0, -15.0, None, 2.0, -4.0, 6.0]
A leading warm-up ``null`` (row 0, no prior price), then a ``null`` and a ``NaN`` in ``quantity`` that void
only their own rows:
>>> frame = pl.DataFrame(
... {
... "quantity": [10.0, None, -5.0, float("nan"), 20.0],
... "price": [100.0, 102.0, 101.0, 104.0, 103.0],
... }
... )
>>> frame.select(pnl_gross(pl.col("quantity"), pl.col("price")).round(4).alias("pnl"))["pnl"].to_list()
[None, None, 5.0, nan, -20.0]
"""
quantity = float64_expr(quantity)
price = float64_expr(price)
validate_positive(multiplier, "multiplier")
# Per-bar mark-to-market: quantity held over the one-bar price change, times the contract multiplier. Row 0 is null
# (no prior price); null propagates (taking precedence over NaN), NaN propagates; no lag is applied (see the Note).
return quantity * (price - price.shift(1)) * multiplier
[docs]
def pnl_gross_inverse(
quantity: pl.Expr,
price: pl.Expr,
*,
multiplier: float = 1.0,
) -> pl.Expr:
r"""
Gross Inverse-Contract PnL (coin-margined), the per-bar mark-to-market profit and loss settled in the base coin.
An inverse (coin-margined) perpetual or futures contract carries a fixed notional in the **quote** currency (e.g.
``1`` USD per contract) but settles its profit and loss in the **base** coin (e.g. BTC). Its value per contract is
therefore the reciprocal of the price, so the PnL is the signed quantity times the contract notional times the
one-bar change in that reciprocal — nonlinear in the price, the one case the linear :func:`pnl_gross` cannot
express:
.. math::
\mathrm{pnl}^{\mathrm{gross}}_t = q_t \cdot m \cdot \left( \frac{1}{P_{t-1}} - \frac{1}{P_t} \right), \qquad
q = \text{quantity},\ m = \text{multiplier}.
A long gains as the price rises (the reciprocal falls), exactly as for a linear contract, but the coin-denominated
payoff is concave in the price for a long (convex for a short), since the contract's coin value ``1 / P`` is convex.
Summed over time it is the **total** mark-to-market PnL (realized plus unrealized
combined); pomata does not split realized from unrealized (that needs cost-basis lot accounting, which a vectorized
``pl.Expr`` does not carry).
Args:
quantity: Signed position size in units / shares / contracts held over the bar (e.g. ``100``, ``-2``).
price: Instrument price series, the quote per base unit (e.g. USD per BTC, ``pl.col("close")``); must be
strictly positive (see the **Domain** note) and share a length and alignment with ``quantity``.
multiplier: Contract notional in the quote currency — the quote value of one contract (e.g. ``1`` USD for an
inverse BTC/USD perpetual, ``100`` on some venues); ``1.0`` for a one-unit contract. Must be a finite
number ``> 0``.
Returns:
The gross PnL for each row, in the base coin, the same length as the inputs. The first value is ``null``
(warm-up): the previous price ``price.shift(1)`` is undefined for the first row, so no price change can be
measured there.
Raises:
TypeError: If any input is not a ``pl.Expr``.
ValueError: If ``multiplier`` is not a finite number ``> 0`` (i.e. ``<= 0``, ``NaN``, or ``±inf``).
Note:
**Correctness** -- the result is checked against an independent reference oracle on every input, and every
edge case (missing data, boundaries, and warm-up where applicable) is given a defined behavior.
**No lookahead (alignment is the caller's):** the PnL assumes ``quantity`` at row ``t`` is the position held
over the price change into row ``t``. To stay lookahead-free, that quantity must depend only on information
available before that price; if it is decided on the same bar's close, lag it by one bar
(``pnl_gross_inverse(quantity.shift(1), price)``). Nothing is shifted for you, so a quantity you have already
aligned is never double-shifted.
**Domain** — the payoff is defined on strictly positive prices. Following IEEE-754 division, a zero current
price makes ``1 / P_t`` infinite, so the bar is ``-inf`` (a long) or ``+inf`` (a short); a zero previous price
makes ``1 / P_{t-1}`` infinite, so the bar takes the opposite sign; and a negative price yields a finite but
economically meaningless value (the reciprocal flips sign). These are the documented and intended boundary
values rather than an error.
**Edge-case behavior:**
- **Null** — a ``null`` in ``quantity``, ``price``, or the previous ``price`` makes that row ``null`` (``null``
takes precedence over ``NaN``).
- **NaN** — a ``NaN`` in either input (with no ``null``) propagates, yielding ``NaN`` for that row.
- **Partitioning** — wrap the call in ``.over(...)`` for a multi-series panel so the one-bar price change never
reaches across series boundaries, e.g.
``pnl_gross_inverse(pl.col("quantity"), pl.col("price")).over("ticker")``.
See Also:
- :func:`pnl_gross`: The linear (quote-margined) counterpart; use it when the contract settles in the quote
currency rather than the base coin.
- :func:`pnl_net`: Subtracts the composed cost from this gross PnL.
- :func:`cost_funding`: The perpetual-swap funding leg, the companion holding cost.
References:
- https://en.wikipedia.org/wiki/Perpetual_futures
Examples:
Basic usage on an inverse (coin-margined) contract:
>>> import polars as pl
>>> from pomata.pnl import pnl_gross_inverse
>>>
>>> frame = pl.DataFrame(
... {
... "quantity": [1.0, 1.0, -2.0, -2.0, 3.0, 3.0, -1.0, -1.0],
... "price": [100.0, 110.0, 105.0, 120.0, 115.0, 118.0, 112.0, 120.0],
... }
... )
>>> expr = pnl_gross_inverse(pl.col("quantity"), pl.col("price")).round(6)
>>> frame.select(expr.alias("pnl"))["pnl"].to_list()
[None, 0.000909, 0.000866, -0.002381, -0.001087, 0.000663, 0.000454, -0.000595]
On a multi-ticker panel, wrap the call in ``.over`` so each ticker warms up independently:
>>> frame = pl.DataFrame(
... {
... "ticker": ["A"] * 4 + ["B"] * 4,
... "quantity": [1.0, 1.0, -2.0, -2.0, 2.0, 2.0, 2.0, 2.0],
... "price": [100.0, 110.0, 105.0, 120.0, 50.0, 55.0, 52.0, 58.0],
... }
... )
>>> frame.with_columns(
... pnl_gross_inverse(pl.col("quantity"), pl.col("price")).over("ticker").round(6).alias("p")
... )["p"].to_list()
[None, 0.000909, 0.000866, -0.002381, None, 0.003636, -0.002098, 0.003979]
A leading warm-up ``null`` (row 0, no prior price), then a ``null`` and a ``NaN`` in ``quantity`` that void
only their own rows:
>>> frame = pl.DataFrame(
... {
... "quantity": [1.0, None, -2.0, float("nan"), 3.0],
... "price": [100.0, 110.0, 105.0, 120.0, 115.0],
... }
... )
>>> expr = pnl_gross_inverse(pl.col("quantity"), pl.col("price")).round(6)
>>> frame.select(expr.alias("pnl"))["pnl"].to_list()
[None, None, 0.000866, nan, -0.001087]
"""
quantity = float64_expr(quantity)
price = float64_expr(price)
validate_positive(multiplier, "multiplier")
# Per-bar mark-to-market in the base coin: the contract value is the reciprocal of the price, so the PnL is the
# quantity times the notional times the one-bar change in 1/price. Row 0 is null (no prior price); null propagates
# (taking precedence over NaN), NaN propagates; no lag is applied (see the Note).
return quantity * multiplier * (1.0 / price.shift(1) - 1.0 / price)
[docs]
def pnl_net(
pnl_gross: pl.Expr,
cost: pl.Expr,
) -> pl.Expr:
r"""
Net Position PnL, the gross position PnL after transaction costs.
The gross per-bar position PnL minus the per-bar transaction cost, both in the account currency — the cash flow's
net P&L, the counterpart of :func:`returns_net`:
.. math::
\mathrm{pnl}^{\mathrm{net}}_t = \mathrm{pnl}^{\mathrm{gross}}_t - c_t.
A pure elementwise subtraction with no built-in cost model: the caller composes the cost from the cost components
(summing several with ``+``) and passes it, e.g.
``pnl_net(pnl_gross(quantity, price), cost_per_share(quantity, fee) + cost_notional(quantity, price, rate))``.
Args:
pnl_gross: Gross per-bar position PnL, typically from :func:`pnl_gross`.
cost: Per-bar transaction cost in the same currency, typically from :func:`cost_per_share` (sum several with
``+``).
Returns:
The net PnL for each row, the same length as the inputs.
Raises:
TypeError: If any input is not a ``pl.Expr``.
Note:
**Correctness** -- the result is checked against an independent reference oracle on every input, and every
edge case (missing data, boundaries, and warm-up where applicable) is given a defined behavior.
**Edge-case behavior:**
- **Null** — a ``null`` in either input makes that row ``null`` (``null`` takes precedence over ``NaN``).
- **NaN** — a ``NaN`` in either input (with no ``null``) propagates, yielding ``NaN`` for that row.
- **Partitioning** — the subtraction is elementwise (each row uses only its own pair), so ``.over(...)``
partitions identically and is optional here, unlike the lagged / cumulative functions.
See Also:
- :func:`pnl_gross`: The gross position PnL this nets costs from.
- :func:`cost_per_share`: A usual source of ``cost`` (sum several cost components with ``+``).
- :func:`cumulative_pnl`: Cumulates these net PnL into a running currency total.
References:
- https://en.wikipedia.org/wiki/Mark-to-market_accounting
Examples:
Basic usage on a gross P&L and a cost series:
>>> import polars as pl
>>> from pomata.pnl import pnl_net
>>>
>>> frame = pl.DataFrame(
... {
... "pnl_gross": [20.0, 5.0, -15.0, -20.0, 8.0, 12.0, -3.0, 10.0],
... "cost": [2.0, 0.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0],
... }
... )
>>> frame.select(pnl_net(pl.col("pnl_gross"), pl.col("cost")).round(4).alias("pnl_net"))["pnl_net"].to_list()
[18.0, 5.0, -18.0, -20.0, 7.0, 10.0, -3.0, 9.0]
The subtraction is elementwise, so ``.over`` partitions identically and is shown only for consistency:
>>> frame = pl.DataFrame(
... {
... "ticker": ["A"] * 4 + ["B"] * 4,
... "pnl_gross": [20.0, 5.0, -15.0, -20.0, 8.0, 12.0, -3.0, 10.0],
... "cost": [2.0, 0.0, 3.0, 0.0, 1.0, 2.0, 0.0, 1.0],
... }
... )
>>> frame.with_columns(pnl_net(pl.col("pnl_gross"), pl.col("cost")).over("ticker").round(4).alias("n"))[
... "n"
... ].to_list()
[18.0, 5.0, -18.0, -20.0, 7.0, 10.0, -3.0, 9.0]
A ``null`` then a ``NaN`` in ``pnl_gross`` (both propagate through the subtraction) make the missing-data
handling visible:
>>> frame = pl.DataFrame(
... {
... "pnl_gross": [20.0, None, -15.0, float("nan"), 8.0],
... "cost": [2.0, 3.0, 3.0, 0.0, 1.0],
... }
... )
>>> frame.select(pnl_net(pl.col("pnl_gross"), pl.col("cost")).round(4).alias("pnl_net"))["pnl_net"].to_list()
[18.0, None, -18.0, nan, 7.0]
"""
pnl_gross = float64_expr(pnl_gross)
cost = float64_expr(cost)
# Pure elementwise subtraction: null propagates (taking precedence over NaN), NaN propagates; no cost model is baked
# in, so the caller composes and sums the cost components (see the Note).
return pnl_gross - cost
[docs]
def returns_gross(
weight: pl.Expr,
asset_returns: pl.Expr,
) -> pl.Expr:
r"""
Gross Strategy Returns, the per-bar return of a weight before costs.
The signed weight times the asset's per-bar return — the strategy's gross return for that bar, before any
transaction costs:
.. math::
r^{\mathrm{gross}}_t = w_t \cdot r_t, \qquad w = \text{weight}.
Because simple returns aggregate across assets (a portfolio's return is the weighted sum of its constituents'), this
per-leg product is the building block of a multi-asset gross return: sum it over the legs of a panel. It is a pure
elementwise multiply with **no built-in lag**: each row pairs ``weight`` with ``asset_returns`` at the same index,
so the caller is responsible for alignment.
Args:
weight: Signed weight, the fraction of capital held (e.g. ``1.0`` fully long, ``-0.5`` half short);
``|weight| > 1`` is leverage.
asset_returns: Per-bar asset returns, typically from :func:`returns_simple` (e.g.
``returns_simple(pl.col("close"))``).
Returns:
The gross strategy return for each row, the same length as the inputs. There is no window and no warm-up of its
own: every row is the product of its own ``weight`` and ``asset_returns`` (so a warm-up ``null`` is inherited
only from the inputs, e.g. the first row of :func:`returns_simple`).
Raises:
TypeError: If any input is not a ``pl.Expr``.
Note:
**Correctness** -- the result is checked against an independent reference oracle on every input, and every
edge case (missing data, boundaries, and warm-up where applicable) is given a defined behavior.
**No lookahead (alignment is the caller's):** the product assumes ``weight`` at row ``t`` is the weight held
over ``asset_returns`` at row ``t``. To stay lookahead-free, that weight must depend only on information
available **before** that return; if your weight is decided on the same bar that closes the return, lag it by
one bar -- ``returns_gross(weight.shift(1), asset_returns)`` -- so the weight reflects only the prior close.
Nothing is shifted for you, so a weight you have already aligned is never double-shifted.
**Edge-case behavior:**
- **Null** — a ``null`` in either input makes that row ``null`` (the product propagates ``null``, which takes
precedence over ``NaN``).
- **NaN** — a ``NaN`` in either input (with no ``null`` at that row) propagates, yielding ``NaN`` for that row.
- **Partitioning** — the product is elementwise (each row uses only its own pair), so it is already correct on a
multi-series panel: ``.over(...)`` partitions identically and is therefore optional here, unlike the
lagged / cumulative functions where it is required to stop state spanning series boundaries.
See Also:
- :func:`returns_simple`: The usual source of ``asset_returns``.
- :func:`turnover`: The traded fraction of the same ``weight``, the basis for transaction costs.
- :func:`equity_curve`: Compounds these per-bar returns into a capital curve.
References:
- Meucci, A. (2010). "Quant Nugget 2: Linear vs. Compounded Returns."
- https://en.wikipedia.org/wiki/Rate_of_return
Examples:
Basic usage on a weight and an asset-return series:
>>> import polars as pl
>>> from pomata.pnl import returns_gross
>>>
>>> frame = pl.DataFrame(
... {
... "weight": [1.0, 0.5, -1.0, -1.0, 0.5, 1.0, -0.5, 0.5],
... "asset_returns": [0.02, -0.01, 0.03, -0.02, 0.04, 0.01, -0.03, 0.02],
... }
... )
>>> expr = returns_gross(pl.col("weight"), pl.col("asset_returns")).round(4)
>>> frame.select(expr.alias("returns_gross"))["returns_gross"].to_list()
[0.02, -0.005, -0.03, 0.02, 0.02, 0.01, 0.015, 0.01]
The product is elementwise, so ``.over`` partitions identically and is shown only for consistency:
>>> frame = pl.DataFrame(
... {
... "ticker": ["A"] * 4 + ["B"] * 4,
... "weight": [1.0, -1.0, 0.5, 0.5, 0.5, 0.5, -1.0, 1.0],
... "asset_returns": [0.02, 0.03, -0.01, 0.04, -0.02, 0.01, 0.03, -0.01],
... }
... )
>>> expr = returns_gross(pl.col("weight"), pl.col("asset_returns")).over("ticker").round(4)
>>> frame.with_columns(expr.alias("g"))["g"].to_list()
[0.02, -0.03, -0.005, 0.02, -0.01, 0.005, -0.03, -0.01]
A ``null`` then a ``NaN`` in ``asset_returns`` (both propagate through the product) make the missing-data
handling visible:
>>> frame = pl.DataFrame(
... {
... "weight": [1.0, 0.5, -1.0, -1.0, 0.5],
... "asset_returns": [0.02, None, 0.03, float("nan"), 0.04],
... }
... )
>>> expr = returns_gross(pl.col("weight"), pl.col("asset_returns")).round(4)
>>> frame.select(expr.alias("returns_gross"))["returns_gross"].to_list()
[0.02, None, -0.03, nan, 0.02]
"""
weight = float64_expr(weight)
asset_returns = float64_expr(asset_returns)
# Pure elementwise product: null propagates (taking precedence over NaN), NaN propagates; no lag is applied, so the
# caller owns alignment (see the Note).
return weight * asset_returns
[docs]
def returns_net(
returns_gross: pl.Expr,
cost: pl.Expr,
) -> pl.Expr:
r"""
Net Strategy Returns, the gross return after transaction costs.
The gross per-bar strategy return minus the per-bar transaction cost — the strategy's net return, which is the
series the performance and risk metrics consume:
.. math::
r^{\mathrm{net}}_t = r^{\mathrm{gross}}_t - c_t.
It is a pure elementwise subtraction with no built-in cost model: the caller composes the cost from the cost
components (summing several with ``+``) and passes it, e.g.
``returns_net(returns_gross(weight, asset_returns), cost_proportional(weight, rate))``.
Args:
returns_gross: Gross per-bar strategy returns, typically from :func:`returns_gross`.
cost: Per-bar transaction cost as a return drag, typically from :func:`cost_proportional` (sum several with
``+``).
Returns:
The net strategy return for each row, the same length as the inputs. There is no window and no warm-up of its
own: every row is ``returns_gross`` minus ``cost`` at that row.
Raises:
TypeError: If any input is not a ``pl.Expr``.
Note:
**Correctness** -- the result is checked against an independent reference oracle on every input, and every
edge case (missing data, boundaries, and warm-up where applicable) is given a defined behavior.
**Edge-case behavior:**
- **Null** — a ``null`` in either input makes that row ``null`` (the subtraction propagates ``null``, which
takes precedence over ``NaN``).
- **NaN** — a ``NaN`` in either input (with no ``null`` at that row) propagates, yielding ``NaN`` for that row.
- **Partitioning** — the subtraction is elementwise (each row uses only its own pair), so it is already correct
on a multi-series panel: ``.over(...)`` partitions identically and is therefore optional here, unlike the
lagged / cumulative functions where it is required.
See Also:
- :func:`returns_gross`: The gross return this nets costs from.
- :func:`cost_proportional`: The usual source of ``cost`` (a proportional, bps-of-notional fee).
- :func:`equity_curve`: Compounds these net returns into a capital curve.
References:
- https://en.wikipedia.org/wiki/Rate_of_return
Examples:
Basic usage on a gross return and a cost series:
>>> import polars as pl
>>> from pomata.pnl import returns_net
>>>
>>> frame = pl.DataFrame(
... {
... "returns_gross": [0.05, -0.02, 0.03, 0.01, 0.0, 0.04, -0.01, 0.02],
... "cost": [0.0005, 0.0015, 0.0005, 0.0, 0.0005, 0.001, 0.0, 0.0005],
... }
... )
>>> expr = returns_net(pl.col("returns_gross"), pl.col("cost")).round(4)
>>> frame.select(expr.alias("returns_net"))["returns_net"].to_list()
[0.0495, -0.0215, 0.0295, 0.01, -0.0005, 0.039, -0.01, 0.0195]
The subtraction is elementwise, so ``.over`` partitions identically and is shown only for consistency:
>>> frame = pl.DataFrame(
... {
... "ticker": ["A"] * 4 + ["B"] * 4,
... "returns_gross": [0.05, -0.02, 0.03, 0.01, 0.0, 0.04, -0.01, 0.02],
... "cost": [0.0005, 0.0015, 0.0005, 0.0, 0.0005, 0.001, 0.0, 0.0005],
... }
... )
>>> expr = returns_net(pl.col("returns_gross"), pl.col("cost")).over("ticker").round(4)
>>> frame.with_columns(expr.alias("n"))["n"].to_list()
[0.0495, -0.0215, 0.0295, 0.01, -0.0005, 0.039, -0.01, 0.0195]
A ``null`` then a ``NaN`` in ``returns_gross`` (both propagate through the subtraction) make the missing-data
handling visible:
>>> frame = pl.DataFrame(
... {
... "returns_gross": [0.05, None, 0.03, float("nan"), 0.0],
... "cost": [0.0005, 0.0015, 0.0005, 0.0, 0.0005],
... }
... )
>>> expr = returns_net(pl.col("returns_gross"), pl.col("cost")).round(4)
>>> frame.select(expr.alias("returns_net"))["returns_net"].to_list()
[0.0495, None, 0.0295, nan, -0.0005]
"""
returns_gross = float64_expr(returns_gross)
cost = float64_expr(cost)
# Pure elementwise subtraction: null propagates (taking precedence over NaN), NaN propagates; no cost model is
# baked in, so the caller composes and sums the cost components (see the Note).
return returns_gross - cost
[docs]
def turnover(
weight: pl.Expr,
) -> pl.Expr:
r"""
Turnover, the traded fraction of capital between consecutive bars.
The absolute change in the weight from one bar to the next — how much was bought or sold to move from the
previous weight to the current one, as a fraction of capital:
.. math::
\mathrm{turnover}_t = \lvert w_t - w_{t-1} \rvert, \qquad w = \text{weight}.
The pre-series weight is taken as flat (``0``), so the first bar is :math:`\lvert w_0 \rvert`: entering the
initial weight from cash is itself a trade. Turnover is the basis for proportional transaction costs (a cost
per unit traded), and is a dimensionless churn measure in its own right.
Args:
weight: Signed weight, the fraction of capital held (e.g. ``1.0`` fully long, ``-0.5`` half short);
``|weight| > 1`` is leverage.
Returns:
The traded fraction for each row, the same length as ``weight``. The first row is ``|weight_0|`` (the trade
from a flat start), not ``null``.
Raises:
TypeError: If any input is not a ``pl.Expr``.
Note:
**Correctness** -- the result is checked against an independent reference oracle on every input, and every
edge case (missing data, boundaries, and warm-up where applicable) is given a defined behavior.
**Edge-case behavior:**
- **Flat start** — the weight before the series is taken as ``0``, so the first row is ``|weight_0|`` rather
than ``null``; establishing the initial weight from cash is a real trade and carries its cost.
- **Null** — a ``null`` weight makes its own row ``null`` and also the next row ``null`` (the difference
references the previous weight), then turnover resumes; ``null`` takes precedence over ``NaN``.
- **NaN** — a ``NaN`` weight propagates to its own row and the next, yielding ``NaN`` there.
- **Partitioning** — wrap the call in ``.over(...)`` for a multi-series panel so the one-bar difference never
reaches across series boundaries (and each series gets its own flat start), e.g.
``turnover(pl.col("weight")).over("ticker")``.
See Also:
- :func:`cost_proportional`: The proportional transaction cost this turnover scales.
- :func:`cost_slippage`: A per-trade slippage cost also driven by the traded fraction.
- :func:`returns_gross`: The gross return of the same ``weight``.
References:
- https://www.investopedia.com/terms/p/portfolioturnover.asp
Examples:
Basic usage on a weight series:
>>> import polars as pl
>>> from pomata.pnl import turnover
>>>
>>> frame = pl.DataFrame({"weight": [0.5, 1.0, -0.5, -0.5, 0.0, 1.0, 1.0, -1.0]})
>>> frame.select(turnover(pl.col("weight")).round(4).alias("turnover"))["turnover"].to_list()
[0.5, 0.5, 1.5, 0.0, 0.5, 1.0, 0.0, 2.0]
On a multi-ticker panel, wrap the call in ``.over`` so each ticker starts flat and never differences across the
boundary:
>>> frame = pl.DataFrame(
... {
... "ticker": ["A"] * 4 + ["B"] * 4,
... "weight": [0.5, 1.0, -0.5, -0.5, 1.0, 1.0, 0.0, 0.5],
... }
... )
>>> frame.with_columns(turnover(pl.col("weight")).over("ticker").round(4).alias("t"))["t"].to_list()
[0.5, 0.5, 1.5, 0.0, 1.0, 0.0, 1.0, 0.5]
A ``null`` (which voids its own row and the next, since the difference references the previous weight) then a
``NaN`` (likewise) make the missing-data handling visible:
>>> frame = pl.DataFrame({"weight": [0.5, None, -0.5, float("nan"), 0.0]})
>>> frame.select(turnover(pl.col("weight")).round(4).alias("turnover"))["turnover"].to_list()
[0.5, None, None, nan, nan]
"""
weight = float64_expr(weight)
# Absolute one-bar change with the pre-series weight taken as flat (fill_value 0.0), so the first row is the
# |weight_0| entry trade; null propagates to its own row and the next, NaN likewise (see the Note).
return (weight - weight.shift(1, fill_value=0.0)).abs()