import numpy as np
import pandas as pd
from floodlight import PlayerProperty, TeamProperty
from floodlight.utils.types import Numeric
[docs]
def aggregate_property_by_zones(
property_to_aggregate: PlayerProperty | TeamProperty,
binning_property: PlayerProperty | TeamProperty,
zones: list[tuple[Numeric, Numeric]],
zone_names: list[str] | None = None,
aggregation: str = "sum",
) -> pd.DataFrame:
"""Aggregates a property over threshold-based zones of another property.
This function bins frames based on the value of ``binning_property`` and
aggregates values from ``property_to_aggregate`` within each zone. Common use
cases include calculating distance covered per velocity zone or time spent in
different intensity zones [1]_.
Parameters
----------
property_to_aggregate: PlayerProperty or TeamProperty
Property values to aggregate. For PlayerProperty, shape is (T, N) where T is
the number of frames and N is the number of players. For TeamProperty, shape
is (T,) where T is the number of frames.
binning_property: PlayerProperty or TeamProperty
Property values used to determine zone membership. Must have the same shape
as ``property_to_aggregate``.
zones: list[tuple[Numeric, Numeric]]
List of (min, max) threshold tuples defining each zone. Zones use half-open
intervals [min, max) where the minimum is inclusive and maximum is exclusive.
For example, [(0, 2), (2, 4)] creates two zones: [0, 2) and [2, 4).
zone_names: list[str], optional
Names for each zone. If None, zones are named as "*min* to *max*". Must
have the same length as ``zones`` if provided.
aggregation: str, optional
Aggregation function to apply within each zone. Options:
- "sum": Sum of property values in zone
- "count": Number of frames with valid data in zone
- "mean": Average property value in zone
- "min": Minimum property value in zone
- "max": Maximum property value in zone
Default is 'sum'.
Returns
-------
zone_aggregates: pd.DataFrame
DataFrame with aggregated values. For PlayerProperty inputs, rows correspond
to players and columns to zones. For TeamProperty inputs, a single-row
DataFrame is returned. Empty zones (no frames matching) return NaN for
mean/min/max and 0 for sum/count.
Notes
-----
Valid property combinations:
- PlayerProperty by PlayerProperty: Both must have shape (T, N) with matching T and
N
- TeamProperty by TeamProperty: Both must have shape (T,) with matching T
- PlayerProperty by TeamProperty: Aggregation property has shape (T, N),
binning property has shape (T,). The binning values are broadcast across all
players.
Invalid combination:
- TeamProperty by PlayerProperty: Cannot bin a single team value using
player-specific thresholds.
Frames where either property has NaN values are excluded from all aggregations.
The boundary handling uses half-open intervals [min, max) to avoid ambiguity at
zone boundaries.
Examples
--------
Calculate distance covered in velocity zones for each player:
>>> import numpy as np
>>> from floodlight import PlayerProperty
>>> from floodlight.metrics.zone_aggregation import aggregate_property_by_zones
>>> # Create sample data: 4 frames, 2 players
>>> distances = PlayerProperty(
... property=np.array([[10, 5], [10, 5], [10, 5], [10, 5]], dtype=float),
... name="distance"
... )
>>> velocities = PlayerProperty(
... property=np.array([[1, 6], [3, 8], [1, 6], [3, 8]], dtype=float),
... name="velocity"
... )
>>> # Define velocity zones (m/s)
>>> zones = [(0, 2), (2, 4), (5, 9)]
>>> zone_names = ["Low", "Medium", "High"]
>>> result = aggregate_property_by_zones(
... distances, velocities, zones, zone_names, aggregation='sum'
... )
>>> result
Low Medium High
0 20.0 20.0 0.0
1 0.0 0.0 20.0
Calculate time spent (frame count) in metabolic power zones:
>>> power = PlayerProperty(
... property=np.array([[5, 15], [8, 25], [12, 30], [6, 18]], dtype=float),
... name="power"
... )
>>> zones = [(0, 10), (10, 20), (20, 35)]
>>> result = aggregate_property_by_zones(
... power, power, zones, aggregation='count'
... )
>>> result
0 to 10 10 to 20 20 to 35
0 3.0 1.0 0.0
1 0.0 2.0 2.0
References
----------
.. [1] `Miguel, M., Oliveira, R., Loureiro, N. Garcia-Rubio, J. & Ibáñez, S.
(2021). Load Measures in Training/Match Monitoring in Soccer: A Systematic
Review. International Journal of Environmental Research and Public Health,
18(5), 2721.
<https://www.mdpi.com/1660-4601/18/5/2721>`_
"""
n_zones = len(zones)
# Validate zone_names parameter
if zone_names is not None and len(zone_names) != n_zones:
raise ValueError(
f"zone_names length ({len(zone_names)}) must match zones length ({n_zones})"
)
# Generate default zone names if not provided
if zone_names is None:
zone_names = [f"{min_val} to {max_val}" for min_val, max_val in zones]
# Validate aggregation parameter
agg_funcs = {
"sum": lambda arr: np.nansum(arr, axis=0),
"count": lambda arr: np.sum(~arr.mask & ~np.isnan(arr.data), axis=0),
"mean": lambda arr: np.nanmean(arr, axis=0),
"min": lambda arr: np.nanmin(arr, axis=0),
"max": lambda arr: np.nanmax(arr, axis=0),
}
if aggregation not in agg_funcs:
raise ValueError(
f"aggregation must be one of {list(agg_funcs.keys())}, got '{aggregation}'"
)
agg_func = agg_funcs[aggregation]
# Get property arrays
prop_to_agg = property_to_aggregate.property
binning_prop = binning_property.property
# Validate property combination: TeamProperty by PlayerProperty is invalid
if prop_to_agg.ndim == 1 and binning_prop.ndim == 2:
raise ValueError(
"Cannot aggregate TeamProperty by PlayerProperty. "
"Valid combinations: PlayerProperty by PlayerProperty, "
"TeamProperty by TeamProperty, or PlayerProperty by TeamProperty."
)
# Validate matching time dimension
if prop_to_agg.shape[0] != binning_prop.shape[0]:
raise ValueError(
f"Time dimensions must match: property_to_aggregate has "
f"{prop_to_agg.shape[0]} frames but binning_property has "
f"{binning_prop.shape[0]} frames"
)
# Handle TeamProperty: reshape (T,) to (T, 1) for uniform processing
if prop_to_agg.ndim == 1:
prop_to_agg = prop_to_agg.reshape(-1, 1)
n_entities = 1
elif prop_to_agg.ndim == 2:
n_entities = prop_to_agg.shape[1]
else:
raise ValueError(
"property_to_aggregate must be 1D (TeamProperty) or 2D (PlayerProperty)"
)
if binning_prop.ndim == 1:
binning_prop = binning_prop.reshape(-1, 1)
elif binning_prop.ndim == 2:
# For PlayerProperty by PlayerProperty, validate matching N dimension
if prop_to_agg.shape[1] != binning_prop.shape[1]:
raise ValueError(
f"Player dimensions must match: property_to_aggregate has "
f"{prop_to_agg.shape[1]} players but binning_property has "
f"{binning_prop.shape[1]} players"
)
# Initialize output array
aggregated_values = np.full((n_entities, n_zones), np.nan)
# Loop over zones and aggregate
for i, (min_val, max_val) in enumerate(zones):
# Create mask for frames in this zone (half-open interval [min, max))
in_zone_mask = np.bitwise_and(binning_prop >= min_val, binning_prop < max_val)
# Broadcast mask if needed (PlayerProperty by TeamProperty case)
if in_zone_mask.shape != prop_to_agg.shape:
in_zone_mask = np.broadcast_to(in_zone_mask, prop_to_agg.shape)
# Mask the property to aggregate
masked_property = np.ma.masked_array(prop_to_agg, ~in_zone_mask)
# Apply aggregation function
aggregated_values[:, i] = agg_func(masked_property).data
# Create DataFrame with zone names as columns
zone_aggregates = pd.DataFrame(data=aggregated_values, columns=zone_names)
return zone_aggregates