Sindbad~EG File Manager
"""
Numba 1D min/max kernels that can be shared by
* Dataframe / Series
* groupby
* rolling / expanding
Mirrors pandas/_libs/window/aggregation.pyx
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import numba
import numpy as np
if TYPE_CHECKING:
from pandas._typing import npt
@numba.jit(nopython=True, nogil=True, parallel=False)
def sliding_min_max(
values: np.ndarray,
result_dtype: np.dtype,
start: np.ndarray,
end: np.ndarray,
min_periods: int,
is_max: bool,
) -> tuple[np.ndarray, list[int]]:
N = len(start)
nobs = 0
output = np.empty(N, dtype=result_dtype)
na_pos = []
# Use deque once numba supports it
# https://github.com/numba/numba/issues/7417
Q: list = []
W: list = []
for i in range(N):
curr_win_size = end[i] - start[i]
if i == 0:
st = start[i]
else:
st = end[i - 1]
for k in range(st, end[i]):
ai = values[k]
if not np.isnan(ai):
nobs += 1
elif is_max:
ai = -np.inf
else:
ai = np.inf
# Discard previous entries if we find new min or max
if is_max:
while Q and ((ai >= values[Q[-1]]) or values[Q[-1]] != values[Q[-1]]):
Q.pop()
else:
while Q and ((ai <= values[Q[-1]]) or values[Q[-1]] != values[Q[-1]]):
Q.pop()
Q.append(k)
W.append(k)
# Discard entries outside and left of current window
while Q and Q[0] <= start[i] - 1:
Q.pop(0)
while W and W[0] <= start[i] - 1:
if not np.isnan(values[W[0]]):
nobs -= 1
W.pop(0)
# Save output based on index in input value array
if Q and curr_win_size > 0 and nobs >= min_periods:
output[i] = values[Q[0]]
else:
if values.dtype.kind != "i":
output[i] = np.nan
else:
na_pos.append(i)
return output, na_pos
@numba.jit(nopython=True, nogil=True, parallel=False)
def grouped_min_max(
values: np.ndarray,
result_dtype: np.dtype,
labels: npt.NDArray[np.intp],
ngroups: int,
min_periods: int,
is_max: bool,
) -> tuple[np.ndarray, list[int]]:
N = len(labels)
nobs = np.zeros(ngroups, dtype=np.int64)
na_pos = []
output = np.empty(ngroups, dtype=result_dtype)
for i in range(N):
lab = labels[i]
val = values[i]
if lab < 0:
continue
if values.dtype.kind == "i" or not np.isnan(val):
nobs[lab] += 1
else:
# NaN value cannot be a min/max value
continue
if nobs[lab] == 1:
# First element in group, set output equal to this
output[lab] = val
continue
if is_max:
if val > output[lab]:
output[lab] = val
else:
if val < output[lab]:
output[lab] = val
# Set labels that don't satisfy min_periods as np.nan
for lab, count in enumerate(nobs):
if count < min_periods:
na_pos.append(lab)
return output, na_pos
Sindbad File Manager Version 1.0, Coded By Sindbad EG ~ The Terrorists