Source code for floodlight.metrics.zone_aggregation

import numpy as np
import pandas as pd

from floodlight import PlayerProperty, TeamProperty
from floodlight.utils.types import Numeric



[docs]
def aggregate_property_by_zones(
    property_to_aggregate: PlayerProperty | TeamProperty,
    binning_property: PlayerProperty | TeamProperty,
    zones: list[tuple[Numeric, Numeric]],
    zone_names: list[str] | None = None,
    aggregation: str = "sum",
) -> pd.DataFrame:
    """Aggregates a property over threshold-based zones of another property.

    This function bins frames based on the value of ``binning_property`` and
    aggregates values from ``property_to_aggregate`` within each zone. Common use
    cases include calculating distance covered per velocity zone or time spent in
    different intensity zones [1]_.

    Parameters
    ----------
    property_to_aggregate: PlayerProperty or TeamProperty
        Property values to aggregate. For PlayerProperty, shape is (T, N) where T is
        the number of frames and N is the number of players. For TeamProperty, shape
        is (T,) where T is the number of frames.
    binning_property: PlayerProperty or TeamProperty
        Property values used to determine zone membership. Must have the same shape
        as ``property_to_aggregate``.
    zones: list[tuple[Numeric, Numeric]]
        List of (min, max) threshold tuples defining each zone. Zones use half-open
        intervals [min, max) where the minimum is inclusive and maximum is exclusive.
        For example, [(0, 2), (2, 4)] creates two zones: [0, 2) and [2, 4).
    zone_names: list[str], optional
        Names for each zone. If None, zones are named as "*min* to *max*". Must
        have the same length as ``zones`` if provided.
    aggregation: str, optional
        Aggregation function to apply within each zone. Options:

        - "sum": Sum of property values in zone
        - "count": Number of frames with valid data in zone
        - "mean": Average property value in zone
        - "min": Minimum property value in zone
        - "max": Maximum property value in zone

        Default is 'sum'.

    Returns
    -------
    zone_aggregates: pd.DataFrame
        DataFrame with aggregated values. For PlayerProperty inputs, rows correspond
        to players and columns to zones. For TeamProperty inputs, a single-row
        DataFrame is returned. Empty zones (no frames matching) return NaN for
        mean/min/max and 0 for sum/count.

    Notes
    -----
    Valid property combinations:

    - PlayerProperty by PlayerProperty: Both must have shape (T, N) with matching T and
      N
    - TeamProperty by TeamProperty: Both must have shape (T,) with matching T
    - PlayerProperty by TeamProperty: Aggregation property has shape (T, N),
      binning property has shape (T,). The binning values are broadcast across all
      players.

    Invalid combination:

    - TeamProperty by PlayerProperty: Cannot bin a single team value using
      player-specific thresholds.

    Frames where either property has NaN values are excluded from all aggregations.
    The boundary handling uses half-open intervals [min, max) to avoid ambiguity at
    zone boundaries.

    Examples
    --------
    Calculate distance covered in velocity zones for each player:

    >>> import numpy as np
    >>> from floodlight import PlayerProperty
    >>> from floodlight.metrics.zone_aggregation import aggregate_property_by_zones

    >>> # Create sample data: 4 frames, 2 players
    >>> distances = PlayerProperty(
    ...     property=np.array([[10, 5], [10, 5], [10, 5], [10, 5]], dtype=float),
    ...     name="distance"
    ... )
    >>> velocities = PlayerProperty(
    ...     property=np.array([[1, 6], [3, 8], [1, 6], [3, 8]], dtype=float),
    ...     name="velocity"
    ... )
    >>> # Define velocity zones (m/s)
    >>> zones = [(0, 2), (2, 4), (5, 9)]
    >>> zone_names = ["Low", "Medium", "High"]
    >>> result = aggregate_property_by_zones(
    ...     distances, velocities, zones, zone_names, aggregation='sum'
    ... )
    >>> result
       Low  Medium  High
    0  20.0    20.0   0.0
    1   0.0     0.0  20.0

    Calculate time spent (frame count) in metabolic power zones:

    >>> power = PlayerProperty(
    ...     property=np.array([[5, 15], [8, 25], [12, 30], [6, 18]], dtype=float),
    ...     name="power"
    ... )
    >>> zones = [(0, 10), (10, 20), (20, 35)]
    >>> result = aggregate_property_by_zones(
    ...     power, power, zones, aggregation='count'
    ... )
    >>> result
       0 to 10  10 to 20  20 to 35
    0      3.0       1.0       0.0
    1      0.0       2.0       2.0

    References
    ----------
    .. [1] `Miguel, M., Oliveira, R., Loureiro, N. Garcia-Rubio, J. & Ibáñez, S.
           (2021). Load Measures in Training/Match Monitoring in Soccer: A Systematic
           Review. International Journal of Environmental Research and Public Health,
           18(5), 2721.
           <https://www.mdpi.com/1660-4601/18/5/2721>`_
    """

    n_zones = len(zones)

    # Validate zone_names parameter
    if zone_names is not None and len(zone_names) != n_zones:
        raise ValueError(
            f"zone_names length ({len(zone_names)}) must match zones length ({n_zones})"
        )

    # Generate default zone names if not provided
    if zone_names is None:
        zone_names = [f"{min_val} to {max_val}" for min_val, max_val in zones]

    # Validate aggregation parameter
    agg_funcs = {
        "sum": lambda arr: np.nansum(arr, axis=0),
        "count": lambda arr: np.sum(~arr.mask & ~np.isnan(arr.data), axis=0),
        "mean": lambda arr: np.nanmean(arr, axis=0),
        "min": lambda arr: np.nanmin(arr, axis=0),
        "max": lambda arr: np.nanmax(arr, axis=0),
    }

    if aggregation not in agg_funcs:
        raise ValueError(
            f"aggregation must be one of {list(agg_funcs.keys())}, got '{aggregation}'"
        )

    agg_func = agg_funcs[aggregation]

    # Get property arrays
    prop_to_agg = property_to_aggregate.property
    binning_prop = binning_property.property

    # Validate property combination: TeamProperty by PlayerProperty is invalid
    if prop_to_agg.ndim == 1 and binning_prop.ndim == 2:
        raise ValueError(
            "Cannot aggregate TeamProperty by PlayerProperty. "
            "Valid combinations: PlayerProperty by PlayerProperty, "
            "TeamProperty by TeamProperty, or PlayerProperty by TeamProperty."
        )

    # Validate matching time dimension
    if prop_to_agg.shape[0] != binning_prop.shape[0]:
        raise ValueError(
            f"Time dimensions must match: property_to_aggregate has "
            f"{prop_to_agg.shape[0]} frames but binning_property has "
            f"{binning_prop.shape[0]} frames"
        )

    # Handle TeamProperty: reshape (T,) to (T, 1) for uniform processing
    if prop_to_agg.ndim == 1:
        prop_to_agg = prop_to_agg.reshape(-1, 1)
        n_entities = 1
    elif prop_to_agg.ndim == 2:
        n_entities = prop_to_agg.shape[1]
    else:
        raise ValueError(
            "property_to_aggregate must be 1D (TeamProperty) or 2D (PlayerProperty)"
        )

    if binning_prop.ndim == 1:
        binning_prop = binning_prop.reshape(-1, 1)
    elif binning_prop.ndim == 2:
        # For PlayerProperty by PlayerProperty, validate matching N dimension
        if prop_to_agg.shape[1] != binning_prop.shape[1]:
            raise ValueError(
                f"Player dimensions must match: property_to_aggregate has "
                f"{prop_to_agg.shape[1]} players but binning_property has "
                f"{binning_prop.shape[1]} players"
            )

    # Initialize output array
    aggregated_values = np.full((n_entities, n_zones), np.nan)

    # Loop over zones and aggregate
    for i, (min_val, max_val) in enumerate(zones):
        # Create mask for frames in this zone (half-open interval [min, max))
        in_zone_mask = np.bitwise_and(binning_prop >= min_val, binning_prop < max_val)

        # Broadcast mask if needed (PlayerProperty by TeamProperty case)
        if in_zone_mask.shape != prop_to_agg.shape:
            in_zone_mask = np.broadcast_to(in_zone_mask, prop_to_agg.shape)

        # Mask the property to aggregate
        masked_property = np.ma.masked_array(prop_to_agg, ~in_zone_mask)

        # Apply aggregation function
        aggregated_values[:, i] = agg_func(masked_property).data

    # Create DataFrame with zone names as columns
    zone_aggregates = pd.DataFrame(data=aggregated_values, columns=zone_names)

    return zone_aggregates