Sindbad~EG File Manager
from __future__ import annotations
from datetime import (
datetime,
timedelta,
)
from functools import wraps
import operator
from typing import (
TYPE_CHECKING,
Any,
Callable,
Literal,
Union,
cast,
final,
overload,
)
import warnings
import numpy as np
from pandas._libs import (
algos,
lib,
)
from pandas._libs.arrays import NDArrayBacked
from pandas._libs.tslibs import (
BaseOffset,
IncompatibleFrequency,
NaT,
NaTType,
Period,
Resolution,
Tick,
Timedelta,
Timestamp,
add_overflowsafe,
astype_overflowsafe,
get_unit_from_dtype,
iNaT,
ints_to_pydatetime,
ints_to_pytimedelta,
periods_per_day,
to_offset,
)
from pandas._libs.tslibs.fields import (
RoundTo,
round_nsint64,
)
from pandas._libs.tslibs.np_datetime import compare_mismatched_resolutions
from pandas._libs.tslibs.timedeltas import get_unit_for_round
from pandas._libs.tslibs.timestamps import integer_op_not_supported
from pandas._typing import (
ArrayLike,
AxisInt,
DatetimeLikeScalar,
Dtype,
DtypeObj,
F,
InterpolateOptions,
NpDtype,
PositionalIndexer2D,
PositionalIndexerTuple,
ScalarIndexer,
Self,
SequenceIndexer,
TimeAmbiguous,
TimeNonexistent,
npt,
)
from pandas.compat.numpy import function as nv
from pandas.errors import (
AbstractMethodError,
InvalidComparison,
PerformanceWarning,
)
from pandas.util._decorators import (
Appender,
Substitution,
cache_readonly,
)
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
from pandas.core.dtypes.common import (
is_all_strings,
is_integer_dtype,
is_list_like,
is_object_dtype,
is_string_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import (
ArrowDtype,
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
PeriodDtype,
)
from pandas.core.dtypes.generic import (
ABCCategorical,
ABCMultiIndex,
)
from pandas.core.dtypes.missing import (
is_valid_na_for_dtype,
isna,
)
from pandas.core import (
algorithms,
missing,
nanops,
ops,
)
from pandas.core.algorithms import (
isin,
map_array,
unique1d,
)
from pandas.core.array_algos import datetimelike_accumulations
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays._mixins import (
NDArrayBackedExtensionArray,
ravel_compat,
)
from pandas.core.arrays.arrow.array import ArrowExtensionArray
from pandas.core.arrays.base import ExtensionArray
from pandas.core.arrays.integer import IntegerArray
import pandas.core.common as com
from pandas.core.construction import (
array as pd_array,
ensure_wrapped_if_datetimelike,
extract_array,
)
from pandas.core.indexers import (
check_array_indexer,
check_setitem_lengths,
)
from pandas.core.ops.common import unpack_zerodim_and_defer
from pandas.core.ops.invalid import (
invalid_comparison,
make_invalid_op,
)
from pandas.tseries import frequencies
if TYPE_CHECKING:
from collections.abc import (
Iterator,
Sequence,
)
from pandas import Index
from pandas.core.arrays import (
DatetimeArray,
PeriodArray,
TimedeltaArray,
)
DTScalarOrNaT = Union[DatetimeLikeScalar, NaTType]
def _make_unpacked_invalid_op(op_name: str):
op = make_invalid_op(op_name)
return unpack_zerodim_and_defer(op_name)(op)
def _period_dispatch(meth: F) -> F:
"""
For PeriodArray methods, dispatch to DatetimeArray and re-wrap the results
in PeriodArray. We cannot use ._ndarray directly for the affected
methods because the i8 data has different semantics on NaT values.
"""
@wraps(meth)
def new_meth(self, *args, **kwargs):
if not isinstance(self.dtype, PeriodDtype):
return meth(self, *args, **kwargs)
arr = self.view("M8[ns]")
result = meth(arr, *args, **kwargs)
if result is NaT:
return NaT
elif isinstance(result, Timestamp):
return self._box_func(result._value)
res_i8 = result.view("i8")
return self._from_backing_data(res_i8)
return cast(F, new_meth)
# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
# incompatible with definition in base class "ExtensionArray"
class DatetimeLikeArrayMixin( # type: ignore[misc]
OpsMixin, NDArrayBackedExtensionArray
):
"""
Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
Assumes that __new__/__init__ defines:
_ndarray
and that inheriting subclass implements:
freq
"""
# _infer_matches -> which infer_dtype strings are close enough to our own
_infer_matches: tuple[str, ...]
_is_recognized_dtype: Callable[[DtypeObj], bool]
_recognized_scalars: tuple[type, ...]
_ndarray: np.ndarray
freq: BaseOffset | None
@cache_readonly
def _can_hold_na(self) -> bool:
return True
def __init__(
self, data, dtype: Dtype | None = None, freq=None, copy: bool = False
) -> None:
raise AbstractMethodError(self)
@property
def _scalar_type(self) -> type[DatetimeLikeScalar]:
"""
The scalar associated with this datelike
* PeriodArray : Period
* DatetimeArray : Timestamp
* TimedeltaArray : Timedelta
"""
raise AbstractMethodError(self)
def _scalar_from_string(self, value: str) -> DTScalarOrNaT:
"""
Construct a scalar type from a string.
Parameters
----------
value : str
Returns
-------
Period, Timestamp, or Timedelta, or NaT
Whatever the type of ``self._scalar_type`` is.
Notes
-----
This should call ``self._check_compatible_with`` before
unboxing the result.
"""
raise AbstractMethodError(self)
def _unbox_scalar(
self, value: DTScalarOrNaT
) -> np.int64 | np.datetime64 | np.timedelta64:
"""
Unbox the integer value of a scalar `value`.
Parameters
----------
value : Period, Timestamp, Timedelta, or NaT
Depending on subclass.
Returns
-------
int
Examples
--------
>>> arr = pd.array(np.array(['1970-01-01'], 'datetime64[ns]'))
>>> arr._unbox_scalar(arr[0])
numpy.datetime64('1970-01-01T00:00:00.000000000')
"""
raise AbstractMethodError(self)
def _check_compatible_with(self, other: DTScalarOrNaT) -> None:
"""
Verify that `self` and `other` are compatible.
* DatetimeArray verifies that the timezones (if any) match
* PeriodArray verifies that the freq matches
* Timedelta has no verification
In each case, NaT is considered compatible.
Parameters
----------
other
Raises
------
Exception
"""
raise AbstractMethodError(self)
# ------------------------------------------------------------------
def _box_func(self, x):
"""
box function to get object from internal representation
"""
raise AbstractMethodError(self)
def _box_values(self, values) -> np.ndarray:
"""
apply box func to passed values
"""
return lib.map_infer(values, self._box_func, convert=False)
def __iter__(self) -> Iterator:
if self.ndim > 1:
return (self[n] for n in range(len(self)))
else:
return (self._box_func(v) for v in self.asi8)
@property
def asi8(self) -> npt.NDArray[np.int64]:
"""
Integer representation of the values.
Returns
-------
ndarray
An ndarray with int64 dtype.
"""
# do not cache or you'll create a memory leak
return self._ndarray.view("i8")
# ----------------------------------------------------------------
# Rendering Methods
def _format_native_types(
self, *, na_rep: str | float = "NaT", date_format=None
) -> npt.NDArray[np.object_]:
"""
Helper method for astype when converting to strings.
Returns
-------
ndarray[str]
"""
raise AbstractMethodError(self)
def _formatter(self, boxed: bool = False):
# TODO: Remove Datetime & DatetimeTZ formatters.
return "'{}'".format
# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
if is_object_dtype(dtype):
return np.array(list(self), dtype=object)
return self._ndarray
@overload
def __getitem__(self, item: ScalarIndexer) -> DTScalarOrNaT:
...
@overload
def __getitem__(
self,
item: SequenceIndexer | PositionalIndexerTuple,
) -> Self:
...
def __getitem__(self, key: PositionalIndexer2D) -> Self | DTScalarOrNaT:
"""
This getitem defers to the underlying array, which by-definition can
only handle list-likes, slices, and integer scalars
"""
# Use cast as we know we will get back a DatetimeLikeArray or DTScalar,
# but skip evaluating the Union at runtime for performance
# (see https://github.com/pandas-dev/pandas/pull/44624)
result = cast("Union[Self, DTScalarOrNaT]", super().__getitem__(key))
if lib.is_scalar(result):
return result
else:
# At this point we know the result is an array.
result = cast(Self, result)
result._freq = self._get_getitem_freq(key)
return result
def _get_getitem_freq(self, key) -> BaseOffset | None:
"""
Find the `freq` attribute to assign to the result of a __getitem__ lookup.
"""
is_period = isinstance(self.dtype, PeriodDtype)
if is_period:
freq = self.freq
elif self.ndim != 1:
freq = None
else:
key = check_array_indexer(self, key) # maybe ndarray[bool] -> slice
freq = None
if isinstance(key, slice):
if self.freq is not None and key.step is not None:
freq = key.step * self.freq
else:
freq = self.freq
elif key is Ellipsis:
# GH#21282 indexing with Ellipsis is similar to a full slice,
# should preserve `freq` attribute
freq = self.freq
elif com.is_bool_indexer(key):
new_key = lib.maybe_booleans_to_slice(key.view(np.uint8))
if isinstance(new_key, slice):
return self._get_getitem_freq(new_key)
return freq
# error: Argument 1 of "__setitem__" is incompatible with supertype
# "ExtensionArray"; supertype defines the argument type as "Union[int,
# ndarray]"
def __setitem__(
self,
key: int | Sequence[int] | Sequence[bool] | slice,
value: NaTType | Any | Sequence[Any],
) -> None:
# I'm fudging the types a bit here. "Any" above really depends
# on type(self). For PeriodArray, it's Period (or stuff coercible
# to a period in from_sequence). For DatetimeArray, it's Timestamp...
# I don't know if mypy can do that, possibly with Generics.
# https://mypy.readthedocs.io/en/latest/generics.html
no_op = check_setitem_lengths(key, value, self)
# Calling super() before the no_op short-circuit means that we raise
# on invalid 'value' even if this is a no-op, e.g. wrong-dtype empty array.
super().__setitem__(key, value)
if no_op:
return
self._maybe_clear_freq()
def _maybe_clear_freq(self) -> None:
# inplace operations like __setitem__ may invalidate the freq of
# DatetimeArray and TimedeltaArray
pass
def astype(self, dtype, copy: bool = True):
# Some notes on cases we don't have to handle here in the base class:
# 1. PeriodArray.astype handles period -> period
# 2. DatetimeArray.astype handles conversion between tz.
# 3. DatetimeArray.astype handles datetime -> period
dtype = pandas_dtype(dtype)
if dtype == object:
if self.dtype.kind == "M":
self = cast("DatetimeArray", self)
# *much* faster than self._box_values
# for e.g. test_get_loc_tuple_monotonic_above_size_cutoff
i8data = self.asi8
converted = ints_to_pydatetime(
i8data,
tz=self.tz,
box="timestamp",
reso=self._creso,
)
return converted
elif self.dtype.kind == "m":
return ints_to_pytimedelta(self._ndarray, box=True)
return self._box_values(self.asi8.ravel()).reshape(self.shape)
elif isinstance(dtype, ExtensionDtype):
return super().astype(dtype, copy=copy)
elif is_string_dtype(dtype):
return self._format_native_types()
elif dtype.kind in "iu":
# we deliberately ignore int32 vs. int64 here.
# See https://github.com/pandas-dev/pandas/issues/24381 for more.
values = self.asi8
if dtype != np.int64:
raise TypeError(
f"Converting from {self.dtype} to {dtype} is not supported. "
"Do obj.astype('int64').astype(dtype) instead"
)
if copy:
values = values.copy()
return values
elif (dtype.kind in "mM" and self.dtype != dtype) or dtype.kind == "f":
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
raise TypeError(msg)
else:
return np.asarray(self, dtype=dtype)
@overload
def view(self) -> Self:
...
@overload
def view(self, dtype: Literal["M8[ns]"]) -> DatetimeArray:
...
@overload
def view(self, dtype: Literal["m8[ns]"]) -> TimedeltaArray:
...
@overload
def view(self, dtype: Dtype | None = ...) -> ArrayLike:
...
# pylint: disable-next=useless-parent-delegation
def view(self, dtype: Dtype | None = None) -> ArrayLike:
# we need to explicitly call super() method as long as the `@overload`s
# are present in this file.
return super().view(dtype)
# ------------------------------------------------------------------
# Validation Methods
# TODO: try to de-duplicate these, ensure identical behavior
def _validate_comparison_value(self, other):
if isinstance(other, str):
try:
# GH#18435 strings get a pass from tzawareness compat
other = self._scalar_from_string(other)
except (ValueError, IncompatibleFrequency):
# failed to parse as Timestamp/Timedelta/Period
raise InvalidComparison(other)
if isinstance(other, self._recognized_scalars) or other is NaT:
other = self._scalar_type(other)
try:
self._check_compatible_with(other)
except (TypeError, IncompatibleFrequency) as err:
# e.g. tzawareness mismatch
raise InvalidComparison(other) from err
elif not is_list_like(other):
raise InvalidComparison(other)
elif len(other) != len(self):
raise ValueError("Lengths must match")
else:
try:
other = self._validate_listlike(other, allow_object=True)
self._check_compatible_with(other)
except (TypeError, IncompatibleFrequency) as err:
if is_object_dtype(getattr(other, "dtype", None)):
# We will have to operate element-wise
pass
else:
raise InvalidComparison(other) from err
return other
def _validate_scalar(
self,
value,
*,
allow_listlike: bool = False,
unbox: bool = True,
):
"""
Validate that the input value can be cast to our scalar_type.
Parameters
----------
value : object
allow_listlike: bool, default False
When raising an exception, whether the message should say
listlike inputs are allowed.
unbox : bool, default True
Whether to unbox the result before returning. Note: unbox=False
skips the setitem compatibility check.
Returns
-------
self._scalar_type or NaT
"""
if isinstance(value, self._scalar_type):
pass
elif isinstance(value, str):
# NB: Careful about tzawareness
try:
value = self._scalar_from_string(value)
except ValueError as err:
msg = self._validation_error_message(value, allow_listlike)
raise TypeError(msg) from err
elif is_valid_na_for_dtype(value, self.dtype):
# GH#18295
value = NaT
elif isna(value):
# if we are dt64tz and value is dt64("NaT"), dont cast to NaT,
# or else we'll fail to raise in _unbox_scalar
msg = self._validation_error_message(value, allow_listlike)
raise TypeError(msg)
elif isinstance(value, self._recognized_scalars):
# error: Argument 1 to "Timestamp" has incompatible type "object"; expected
# "integer[Any] | float | str | date | datetime | datetime64"
value = self._scalar_type(value) # type: ignore[arg-type]
else:
msg = self._validation_error_message(value, allow_listlike)
raise TypeError(msg)
if not unbox:
# NB: In general NDArrayBackedExtensionArray will unbox here;
# this option exists to prevent a performance hit in
# TimedeltaIndex.get_loc
return value
return self._unbox_scalar(value)
def _validation_error_message(self, value, allow_listlike: bool = False) -> str:
"""
Construct an exception message on validation error.
Some methods allow only scalar inputs, while others allow either scalar
or listlike.
Parameters
----------
allow_listlike: bool, default False
Returns
-------
str
"""
if hasattr(value, "dtype") and getattr(value, "ndim", 0) > 0:
msg_got = f"{value.dtype} array"
else:
msg_got = f"'{type(value).__name__}'"
if allow_listlike:
msg = (
f"value should be a '{self._scalar_type.__name__}', 'NaT', "
f"or array of those. Got {msg_got} instead."
)
else:
msg = (
f"value should be a '{self._scalar_type.__name__}' or 'NaT'. "
f"Got {msg_got} instead."
)
return msg
def _validate_listlike(self, value, allow_object: bool = False):
if isinstance(value, type(self)):
if self.dtype.kind in "mM" and not allow_object:
# error: "DatetimeLikeArrayMixin" has no attribute "as_unit"
value = value.as_unit(self.unit, round_ok=False) # type: ignore[attr-defined]
return value
if isinstance(value, list) and len(value) == 0:
# We treat empty list as our own dtype.
return type(self)._from_sequence([], dtype=self.dtype)
if hasattr(value, "dtype") and value.dtype == object:
# `array` below won't do inference if value is an Index or Series.
# so do so here. in the Index case, inferred_type may be cached.
if lib.infer_dtype(value) in self._infer_matches:
try:
value = type(self)._from_sequence(value)
except (ValueError, TypeError):
if allow_object:
return value
msg = self._validation_error_message(value, True)
raise TypeError(msg)
# Do type inference if necessary up front (after unpacking
# NumpyExtensionArray)
# e.g. we passed PeriodIndex.values and got an ndarray of Periods
value = extract_array(value, extract_numpy=True)
value = pd_array(value)
value = extract_array(value, extract_numpy=True)
if is_all_strings(value):
# We got a StringArray
try:
# TODO: Could use from_sequence_of_strings if implemented
# Note: passing dtype is necessary for PeriodArray tests
value = type(self)._from_sequence(value, dtype=self.dtype)
except ValueError:
pass
if isinstance(value.dtype, CategoricalDtype):
# e.g. we have a Categorical holding self.dtype
if value.categories.dtype == self.dtype:
# TODO: do we need equal dtype or just comparable?
value = value._internal_get_values()
value = extract_array(value, extract_numpy=True)
if allow_object and is_object_dtype(value.dtype):
pass
elif not type(self)._is_recognized_dtype(value.dtype):
msg = self._validation_error_message(value, True)
raise TypeError(msg)
if self.dtype.kind in "mM" and not allow_object:
# error: "DatetimeLikeArrayMixin" has no attribute "as_unit"
value = value.as_unit(self.unit, round_ok=False) # type: ignore[attr-defined]
return value
def _validate_setitem_value(self, value):
if is_list_like(value):
value = self._validate_listlike(value)
else:
return self._validate_scalar(value, allow_listlike=True)
return self._unbox(value)
@final
def _unbox(self, other) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarray:
"""
Unbox either a scalar with _unbox_scalar or an instance of our own type.
"""
if lib.is_scalar(other):
other = self._unbox_scalar(other)
else:
# same type as self
self._check_compatible_with(other)
other = other._ndarray
return other
# ------------------------------------------------------------------
# Additional array methods
# These are not part of the EA API, but we implement them because
# pandas assumes they're there.
@ravel_compat
def map(self, mapper, na_action=None):
from pandas import Index
result = map_array(self, mapper, na_action=na_action)
result = Index(result)
if isinstance(result, ABCMultiIndex):
return result.to_numpy()
else:
return result.array
def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
"""
Compute boolean array of whether each value is found in the
passed set of values.
Parameters
----------
values : np.ndarray or ExtensionArray
Returns
-------
ndarray[bool]
"""
if values.dtype.kind in "fiuc":
# TODO: de-duplicate with equals, validate_comparison_value
return np.zeros(self.shape, dtype=bool)
values = ensure_wrapped_if_datetimelike(values)
if not isinstance(values, type(self)):
inferable = [
"timedelta",
"timedelta64",
"datetime",
"datetime64",
"date",
"period",
]
if values.dtype == object:
values = lib.maybe_convert_objects(
values, # type: ignore[arg-type]
convert_non_numeric=True,
dtype_if_all_nat=self.dtype,
)
if values.dtype != object:
return self.isin(values)
inferred = lib.infer_dtype(values, skipna=False)
if inferred not in inferable:
if inferred == "string":
pass
elif "mixed" in inferred:
return isin(self.astype(object), values)
else:
return np.zeros(self.shape, dtype=bool)
try:
values = type(self)._from_sequence(values)
except ValueError:
return isin(self.astype(object), values)
else:
warnings.warn(
# GH#53111
f"The behavior of 'isin' with dtype={self.dtype} and "
"castable values (e.g. strings) is deprecated. In a "
"future version, these will not be considered matching "
"by isin. Explicitly cast to the appropriate dtype before "
"calling isin instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
if self.dtype.kind in "mM":
self = cast("DatetimeArray | TimedeltaArray", self)
# error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"
# has no attribute "as_unit"
values = values.as_unit(self.unit) # type: ignore[union-attr]
try:
# error: Argument 1 to "_check_compatible_with" of "DatetimeLikeArrayMixin"
# has incompatible type "ExtensionArray | ndarray[Any, Any]"; expected
# "Period | Timestamp | Timedelta | NaTType"
self._check_compatible_with(values) # type: ignore[arg-type]
except (TypeError, ValueError):
# Includes tzawareness mismatch and IncompatibleFrequencyError
return np.zeros(self.shape, dtype=bool)
# error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"
# has no attribute "asi8"
return isin(self.asi8, values.asi8) # type: ignore[union-attr]
# ------------------------------------------------------------------
# Null Handling
def isna(self) -> npt.NDArray[np.bool_]:
return self._isnan
@property # NB: override with cache_readonly in immutable subclasses
def _isnan(self) -> npt.NDArray[np.bool_]:
"""
return if each value is nan
"""
return self.asi8 == iNaT
@property # NB: override with cache_readonly in immutable subclasses
def _hasna(self) -> bool:
"""
return if I have any nans; enables various perf speedups
"""
return bool(self._isnan.any())
def _maybe_mask_results(
self, result: np.ndarray, fill_value=iNaT, convert=None
) -> np.ndarray:
"""
Parameters
----------
result : np.ndarray
fill_value : object, default iNaT
convert : str, dtype or None
Returns
-------
result : ndarray with values replace by the fill_value
mask the result if needed, convert to the provided dtype if its not
None
This is an internal routine.
"""
if self._hasna:
if convert:
result = result.astype(convert)
if fill_value is None:
fill_value = np.nan
np.putmask(result, self._isnan, fill_value)
return result
# ------------------------------------------------------------------
# Frequency Properties/Methods
@property
def freqstr(self) -> str | None:
"""
Return the frequency object as a string if it's set, otherwise None.
Examples
--------
For DatetimeIndex:
>>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00"], freq="D")
>>> idx.freqstr
'D'
The frequency can be inferred if there are more than 2 points:
>>> idx = pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"],
... freq="infer")
>>> idx.freqstr
'2D'
For PeriodIndex:
>>> idx = pd.PeriodIndex(["2023-1", "2023-2", "2023-3"], freq="M")
>>> idx.freqstr
'M'
"""
if self.freq is None:
return None
return self.freq.freqstr
@property # NB: override with cache_readonly in immutable subclasses
def inferred_freq(self) -> str | None:
"""
Tries to return a string representing a frequency generated by infer_freq.
Returns None if it can't autodetect the frequency.
Examples
--------
For DatetimeIndex:
>>> idx = pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"])
>>> idx.inferred_freq
'2D'
For TimedeltaIndex:
>>> tdelta_idx = pd.to_timedelta(["0 days", "10 days", "20 days"])
>>> tdelta_idx
TimedeltaIndex(['0 days', '10 days', '20 days'],
dtype='timedelta64[ns]', freq=None)
>>> tdelta_idx.inferred_freq
'10D'
"""
if self.ndim != 1:
return None
try:
return frequencies.infer_freq(self)
except ValueError:
return None
@property # NB: override with cache_readonly in immutable subclasses
def _resolution_obj(self) -> Resolution | None:
freqstr = self.freqstr
if freqstr is None:
return None
try:
return Resolution.get_reso_from_freqstr(freqstr)
except KeyError:
return None
@property # NB: override with cache_readonly in immutable subclasses
def resolution(self) -> str:
"""
Returns day, hour, minute, second, millisecond or microsecond
"""
# error: Item "None" of "Optional[Any]" has no attribute "attrname"
return self._resolution_obj.attrname # type: ignore[union-attr]
# monotonicity/uniqueness properties are called via frequencies.infer_freq,
# see GH#23789
@property
def _is_monotonic_increasing(self) -> bool:
return algos.is_monotonic(self.asi8, timelike=True)[0]
@property
def _is_monotonic_decreasing(self) -> bool:
return algos.is_monotonic(self.asi8, timelike=True)[1]
@property
def _is_unique(self) -> bool:
return len(unique1d(self.asi8.ravel("K"))) == self.size
# ------------------------------------------------------------------
# Arithmetic Methods
def _cmp_method(self, other, op):
if self.ndim > 1 and getattr(other, "shape", None) == self.shape:
# TODO: handle 2D-like listlikes
return op(self.ravel(), other.ravel()).reshape(self.shape)
try:
other = self._validate_comparison_value(other)
except InvalidComparison:
return invalid_comparison(self, other, op)
dtype = getattr(other, "dtype", None)
if is_object_dtype(dtype):
# We have to use comp_method_OBJECT_ARRAY instead of numpy
# comparison otherwise it would raise when comparing to None
result = ops.comp_method_OBJECT_ARRAY(
op, np.asarray(self.astype(object)), other
)
return result
if other is NaT:
if op is operator.ne:
result = np.ones(self.shape, dtype=bool)
else:
result = np.zeros(self.shape, dtype=bool)
return result
if not isinstance(self.dtype, PeriodDtype):
self = cast(TimelikeOps, self)
if self._creso != other._creso:
if not isinstance(other, type(self)):
# i.e. Timedelta/Timestamp, cast to ndarray and let
# compare_mismatched_resolutions handle broadcasting
try:
# GH#52080 see if we can losslessly cast to shared unit
other = other.as_unit(self.unit, round_ok=False)
except ValueError:
other_arr = np.array(other.asm8)
return compare_mismatched_resolutions(
self._ndarray, other_arr, op
)
else:
other_arr = other._ndarray
return compare_mismatched_resolutions(self._ndarray, other_arr, op)
other_vals = self._unbox(other)
# GH#37462 comparison on i8 values is almost 2x faster than M8/m8
result = op(self._ndarray.view("i8"), other_vals.view("i8"))
o_mask = isna(other)
mask = self._isnan | o_mask
if mask.any():
nat_result = op is operator.ne
np.putmask(result, mask, nat_result)
return result
# pow is invalid for all three subclasses; TimedeltaArray will override
# the multiplication and division ops
__pow__ = _make_unpacked_invalid_op("__pow__")
__rpow__ = _make_unpacked_invalid_op("__rpow__")
__mul__ = _make_unpacked_invalid_op("__mul__")
__rmul__ = _make_unpacked_invalid_op("__rmul__")
__truediv__ = _make_unpacked_invalid_op("__truediv__")
__rtruediv__ = _make_unpacked_invalid_op("__rtruediv__")
__floordiv__ = _make_unpacked_invalid_op("__floordiv__")
__rfloordiv__ = _make_unpacked_invalid_op("__rfloordiv__")
__mod__ = _make_unpacked_invalid_op("__mod__")
__rmod__ = _make_unpacked_invalid_op("__rmod__")
__divmod__ = _make_unpacked_invalid_op("__divmod__")
__rdivmod__ = _make_unpacked_invalid_op("__rdivmod__")
@final
def _get_i8_values_and_mask(
self, other
) -> tuple[int | npt.NDArray[np.int64], None | npt.NDArray[np.bool_]]:
"""
Get the int64 values and b_mask to pass to add_overflowsafe.
"""
if isinstance(other, Period):
i8values = other.ordinal
mask = None
elif isinstance(other, (Timestamp, Timedelta)):
i8values = other._value
mask = None
else:
# PeriodArray, DatetimeArray, TimedeltaArray
mask = other._isnan
i8values = other.asi8
return i8values, mask
@final
def _get_arithmetic_result_freq(self, other) -> BaseOffset | None:
"""
Check if we can preserve self.freq in addition or subtraction.
"""
# Adding or subtracting a Timedelta/Timestamp scalar is freq-preserving
# whenever self.freq is a Tick
if isinstance(self.dtype, PeriodDtype):
return self.freq
elif not lib.is_scalar(other):
return None
elif isinstance(self.freq, Tick):
# In these cases
return self.freq
return None
@final
def _add_datetimelike_scalar(self, other) -> DatetimeArray:
if not lib.is_np_dtype(self.dtype, "m"):
raise TypeError(
f"cannot add {type(self).__name__} and {type(other).__name__}"
)
self = cast("TimedeltaArray", self)
from pandas.core.arrays import DatetimeArray
from pandas.core.arrays.datetimes import tz_to_dtype
assert other is not NaT
if isna(other):
# i.e. np.datetime64("NaT")
# In this case we specifically interpret NaT as a datetime, not
# the timedelta interpretation we would get by returning self + NaT
result = self._ndarray + NaT.to_datetime64().astype(f"M8[{self.unit}]")
# Preserve our resolution
return DatetimeArray._simple_new(result, dtype=result.dtype)
other = Timestamp(other)
self, other = self._ensure_matching_resos(other)
self = cast("TimedeltaArray", self)
other_i8, o_mask = self._get_i8_values_and_mask(other)
result = add_overflowsafe(self.asi8, np.asarray(other_i8, dtype="i8"))
res_values = result.view(f"M8[{self.unit}]")
dtype = tz_to_dtype(tz=other.tz, unit=self.unit)
res_values = result.view(f"M8[{self.unit}]")
new_freq = self._get_arithmetic_result_freq(other)
return DatetimeArray._simple_new(res_values, dtype=dtype, freq=new_freq)
@final
def _add_datetime_arraylike(self, other: DatetimeArray) -> DatetimeArray:
if not lib.is_np_dtype(self.dtype, "m"):
raise TypeError(
f"cannot add {type(self).__name__} and {type(other).__name__}"
)
# defer to DatetimeArray.__add__
return other + self
@final
def _sub_datetimelike_scalar(
self, other: datetime | np.datetime64
) -> TimedeltaArray:
if self.dtype.kind != "M":
raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")
self = cast("DatetimeArray", self)
# subtract a datetime from myself, yielding a ndarray[timedelta64[ns]]
if isna(other):
# i.e. np.datetime64("NaT")
return self - NaT
ts = Timestamp(other)
self, ts = self._ensure_matching_resos(ts)
return self._sub_datetimelike(ts)
@final
def _sub_datetime_arraylike(self, other: DatetimeArray) -> TimedeltaArray:
if self.dtype.kind != "M":
raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")
if len(self) != len(other):
raise ValueError("cannot add indices of unequal length")
self = cast("DatetimeArray", self)
self, other = self._ensure_matching_resos(other)
return self._sub_datetimelike(other)
@final
def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray:
self = cast("DatetimeArray", self)
from pandas.core.arrays import TimedeltaArray
try:
self._assert_tzawareness_compat(other)
except TypeError as err:
new_message = str(err).replace("compare", "subtract")
raise type(err)(new_message) from err
other_i8, o_mask = self._get_i8_values_and_mask(other)
res_values = add_overflowsafe(self.asi8, np.asarray(-other_i8, dtype="i8"))
res_m8 = res_values.view(f"timedelta64[{self.unit}]")
new_freq = self._get_arithmetic_result_freq(other)
new_freq = cast("Tick | None", new_freq)
return TimedeltaArray._simple_new(res_m8, dtype=res_m8.dtype, freq=new_freq)
@final
def _add_period(self, other: Period) -> PeriodArray:
if not lib.is_np_dtype(self.dtype, "m"):
raise TypeError(f"cannot add Period to a {type(self).__name__}")
# We will wrap in a PeriodArray and defer to the reversed operation
from pandas.core.arrays.period import PeriodArray
i8vals = np.broadcast_to(other.ordinal, self.shape)
dtype = PeriodDtype(other.freq)
parr = PeriodArray(i8vals, dtype=dtype)
return parr + self
def _add_offset(self, offset):
raise AbstractMethodError(self)
def _add_timedeltalike_scalar(self, other):
"""
Add a delta of a timedeltalike
Returns
-------
Same type as self
"""
if isna(other):
# i.e np.timedelta64("NaT")
new_values = np.empty(self.shape, dtype="i8").view(self._ndarray.dtype)
new_values.fill(iNaT)
return type(self)._simple_new(new_values, dtype=self.dtype)
# PeriodArray overrides, so we only get here with DTA/TDA
self = cast("DatetimeArray | TimedeltaArray", self)
other = Timedelta(other)
self, other = self._ensure_matching_resos(other)
return self._add_timedeltalike(other)
def _add_timedelta_arraylike(self, other: TimedeltaArray):
"""
Add a delta of a TimedeltaIndex
Returns
-------
Same type as self
"""
# overridden by PeriodArray
if len(self) != len(other):
raise ValueError("cannot add indices of unequal length")
self = cast("DatetimeArray | TimedeltaArray", self)
self, other = self._ensure_matching_resos(other)
return self._add_timedeltalike(other)
@final
def _add_timedeltalike(self, other: Timedelta | TimedeltaArray):
self = cast("DatetimeArray | TimedeltaArray", self)
other_i8, o_mask = self._get_i8_values_and_mask(other)
new_values = add_overflowsafe(self.asi8, np.asarray(other_i8, dtype="i8"))
res_values = new_values.view(self._ndarray.dtype)
new_freq = self._get_arithmetic_result_freq(other)
# error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
# incompatible type "Union[dtype[datetime64], DatetimeTZDtype,
# dtype[timedelta64]]"; expected "Union[dtype[datetime64], DatetimeTZDtype]"
return type(self)._simple_new(
res_values, dtype=self.dtype, freq=new_freq # type: ignore[arg-type]
)
@final
def _add_nat(self):
"""
Add pd.NaT to self
"""
if isinstance(self.dtype, PeriodDtype):
raise TypeError(
f"Cannot add {type(self).__name__} and {type(NaT).__name__}"
)
self = cast("TimedeltaArray | DatetimeArray", self)
# GH#19124 pd.NaT is treated like a timedelta for both timedelta
# and datetime dtypes
result = np.empty(self.shape, dtype=np.int64)
result.fill(iNaT)
result = result.view(self._ndarray.dtype) # preserve reso
# error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
# incompatible type "Union[dtype[timedelta64], dtype[datetime64],
# DatetimeTZDtype]"; expected "Union[dtype[datetime64], DatetimeTZDtype]"
return type(self)._simple_new(
result, dtype=self.dtype, freq=None # type: ignore[arg-type]
)
@final
def _sub_nat(self):
"""
Subtract pd.NaT from self
"""
# GH#19124 Timedelta - datetime is not in general well-defined.
# We make an exception for pd.NaT, which in this case quacks
# like a timedelta.
# For datetime64 dtypes by convention we treat NaT as a datetime, so
# this subtraction returns a timedelta64 dtype.
# For period dtype, timedelta64 is a close-enough return dtype.
result = np.empty(self.shape, dtype=np.int64)
result.fill(iNaT)
if self.dtype.kind in "mM":
# We can retain unit in dtype
self = cast("DatetimeArray| TimedeltaArray", self)
return result.view(f"timedelta64[{self.unit}]")
else:
return result.view("timedelta64[ns]")
@final
def _sub_periodlike(self, other: Period | PeriodArray) -> npt.NDArray[np.object_]:
# If the operation is well-defined, we return an object-dtype ndarray
# of DateOffsets. Null entries are filled with pd.NaT
if not isinstance(self.dtype, PeriodDtype):
raise TypeError(
f"cannot subtract {type(other).__name__} from {type(self).__name__}"
)
self = cast("PeriodArray", self)
self._check_compatible_with(other)
other_i8, o_mask = self._get_i8_values_and_mask(other)
new_i8_data = add_overflowsafe(self.asi8, np.asarray(-other_i8, dtype="i8"))
new_data = np.array([self.freq.base * x for x in new_i8_data])
if o_mask is None:
# i.e. Period scalar
mask = self._isnan
else:
# i.e. PeriodArray
mask = self._isnan | o_mask
new_data[mask] = NaT
return new_data
@final
def _addsub_object_array(self, other: npt.NDArray[np.object_], op):
"""
Add or subtract array-like of DateOffset objects
Parameters
----------
other : np.ndarray[object]
op : {operator.add, operator.sub}
Returns
-------
np.ndarray[object]
Except in fastpath case with length 1 where we operate on the
contained scalar.
"""
assert op in [operator.add, operator.sub]
if len(other) == 1 and self.ndim == 1:
# Note: without this special case, we could annotate return type
# as ndarray[object]
# If both 1D then broadcasting is unambiguous
return op(self, other[0])
warnings.warn(
"Adding/subtracting object-dtype array to "
f"{type(self).__name__} not vectorized.",
PerformanceWarning,
stacklevel=find_stack_level(),
)
# Caller is responsible for broadcasting if necessary
assert self.shape == other.shape, (self.shape, other.shape)
res_values = op(self.astype("O"), np.asarray(other))
return res_values
def _accumulate(self, name: str, *, skipna: bool = True, **kwargs) -> Self:
if name not in {"cummin", "cummax"}:
raise TypeError(f"Accumulation {name} not supported for {type(self)}")
op = getattr(datetimelike_accumulations, name)
result = op(self.copy(), skipna=skipna, **kwargs)
return type(self)._simple_new(result, dtype=self.dtype)
@unpack_zerodim_and_defer("__add__")
def __add__(self, other):
other_dtype = getattr(other, "dtype", None)
other = ensure_wrapped_if_datetimelike(other)
# scalar others
if other is NaT:
result = self._add_nat()
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
result = self._add_timedeltalike_scalar(other)
elif isinstance(other, BaseOffset):
# specifically _not_ a Tick
result = self._add_offset(other)
elif isinstance(other, (datetime, np.datetime64)):
result = self._add_datetimelike_scalar(other)
elif isinstance(other, Period) and lib.is_np_dtype(self.dtype, "m"):
result = self._add_period(other)
elif lib.is_integer(other):
# This check must come after the check for np.timedelta64
# as is_integer returns True for these
if not isinstance(self.dtype, PeriodDtype):
raise integer_op_not_supported(self)
obj = cast("PeriodArray", self)
result = obj._addsub_int_array_or_scalar(other * obj.dtype._n, operator.add)
# array-like others
elif lib.is_np_dtype(other_dtype, "m"):
# TimedeltaIndex, ndarray[timedelta64]
result = self._add_timedelta_arraylike(other)
elif is_object_dtype(other_dtype):
# e.g. Array/Index of DateOffset objects
result = self._addsub_object_array(other, operator.add)
elif lib.is_np_dtype(other_dtype, "M") or isinstance(
other_dtype, DatetimeTZDtype
):
# DatetimeIndex, ndarray[datetime64]
return self._add_datetime_arraylike(other)
elif is_integer_dtype(other_dtype):
if not isinstance(self.dtype, PeriodDtype):
raise integer_op_not_supported(self)
obj = cast("PeriodArray", self)
result = obj._addsub_int_array_or_scalar(other * obj.dtype._n, operator.add)
else:
# Includes Categorical, other ExtensionArrays
# For PeriodDtype, if self is a TimedeltaArray and other is a
# PeriodArray with a timedelta-like (i.e. Tick) freq, this
# operation is valid. Defer to the PeriodArray implementation.
# In remaining cases, this will end up raising TypeError.
return NotImplemented
if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
from pandas.core.arrays import TimedeltaArray
return TimedeltaArray._from_sequence(result)
return result
def __radd__(self, other):
# alias for __add__
return self.__add__(other)
@unpack_zerodim_and_defer("__sub__")
def __sub__(self, other):
other_dtype = getattr(other, "dtype", None)
other = ensure_wrapped_if_datetimelike(other)
# scalar others
if other is NaT:
result = self._sub_nat()
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
result = self._add_timedeltalike_scalar(-other)
elif isinstance(other, BaseOffset):
# specifically _not_ a Tick
result = self._add_offset(-other)
elif isinstance(other, (datetime, np.datetime64)):
result = self._sub_datetimelike_scalar(other)
elif lib.is_integer(other):
# This check must come after the check for np.timedelta64
# as is_integer returns True for these
if not isinstance(self.dtype, PeriodDtype):
raise integer_op_not_supported(self)
obj = cast("PeriodArray", self)
result = obj._addsub_int_array_or_scalar(other * obj.dtype._n, operator.sub)
elif isinstance(other, Period):
result = self._sub_periodlike(other)
# array-like others
elif lib.is_np_dtype(other_dtype, "m"):
# TimedeltaIndex, ndarray[timedelta64]
result = self._add_timedelta_arraylike(-other)
elif is_object_dtype(other_dtype):
# e.g. Array/Index of DateOffset objects
result = self._addsub_object_array(other, operator.sub)
elif lib.is_np_dtype(other_dtype, "M") or isinstance(
other_dtype, DatetimeTZDtype
):
# DatetimeIndex, ndarray[datetime64]
result = self._sub_datetime_arraylike(other)
elif isinstance(other_dtype, PeriodDtype):
# PeriodIndex
result = self._sub_periodlike(other)
elif is_integer_dtype(other_dtype):
if not isinstance(self.dtype, PeriodDtype):
raise integer_op_not_supported(self)
obj = cast("PeriodArray", self)
result = obj._addsub_int_array_or_scalar(other * obj.dtype._n, operator.sub)
else:
# Includes ExtensionArrays, float_dtype
return NotImplemented
if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
from pandas.core.arrays import TimedeltaArray
return TimedeltaArray._from_sequence(result)
return result
def __rsub__(self, other):
other_dtype = getattr(other, "dtype", None)
other_is_dt64 = lib.is_np_dtype(other_dtype, "M") or isinstance(
other_dtype, DatetimeTZDtype
)
if other_is_dt64 and lib.is_np_dtype(self.dtype, "m"):
# ndarray[datetime64] cannot be subtracted from self, so
# we need to wrap in DatetimeArray/Index and flip the operation
if lib.is_scalar(other):
# i.e. np.datetime64 object
return Timestamp(other) - self
if not isinstance(other, DatetimeLikeArrayMixin):
# Avoid down-casting DatetimeIndex
from pandas.core.arrays import DatetimeArray
other = DatetimeArray._from_sequence(other)
return other - self
elif self.dtype.kind == "M" and hasattr(other, "dtype") and not other_is_dt64:
# GH#19959 datetime - datetime is well-defined as timedelta,
# but any other type - datetime is not well-defined.
raise TypeError(
f"cannot subtract {type(self).__name__} from {type(other).__name__}"
)
elif isinstance(self.dtype, PeriodDtype) and lib.is_np_dtype(other_dtype, "m"):
# TODO: Can we simplify/generalize these cases at all?
raise TypeError(f"cannot subtract {type(self).__name__} from {other.dtype}")
elif lib.is_np_dtype(self.dtype, "m"):
self = cast("TimedeltaArray", self)
return (-self) + other
# We get here with e.g. datetime objects
return -(self - other)
def __iadd__(self, other) -> Self:
result = self + other
self[:] = result[:]
if not isinstance(self.dtype, PeriodDtype):
# restore freq, which is invalidated by setitem
self._freq = result.freq
return self
def __isub__(self, other) -> Self:
result = self - other
self[:] = result[:]
if not isinstance(self.dtype, PeriodDtype):
# restore freq, which is invalidated by setitem
self._freq = result.freq
return self
# --------------------------------------------------------------
# Reductions
@_period_dispatch
def _quantile(
self,
qs: npt.NDArray[np.float64],
interpolation: str,
) -> Self:
return super()._quantile(qs=qs, interpolation=interpolation)
@_period_dispatch
def min(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
"""
Return the minimum value of the Array or minimum along
an axis.
See Also
--------
numpy.ndarray.min
Index.min : Return the minimum value in an Index.
Series.min : Return the minimum value in a Series.
"""
nv.validate_min((), kwargs)
nv.validate_minmax_axis(axis, self.ndim)
result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)
@_period_dispatch
def max(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
"""
Return the maximum value of the Array or maximum along
an axis.
See Also
--------
numpy.ndarray.max
Index.max : Return the maximum value in an Index.
Series.max : Return the maximum value in a Series.
"""
nv.validate_max((), kwargs)
nv.validate_minmax_axis(axis, self.ndim)
result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)
def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0):
"""
Return the mean value of the Array.
Parameters
----------
skipna : bool, default True
Whether to ignore any NaT elements.
axis : int, optional, default 0
Returns
-------
scalar
Timestamp or Timedelta.
See Also
--------
numpy.ndarray.mean : Returns the average of array elements along a given axis.
Series.mean : Return the mean value in a Series.
Notes
-----
mean is only defined for Datetime and Timedelta dtypes, not for Period.
Examples
--------
For :class:`pandas.DatetimeIndex`:
>>> idx = pd.date_range('2001-01-01 00:00', periods=3)
>>> idx
DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'],
dtype='datetime64[ns]', freq='D')
>>> idx.mean()
Timestamp('2001-01-02 00:00:00')
For :class:`pandas.TimedeltaIndex`:
>>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='D')
>>> tdelta_idx
TimedeltaIndex(['1 days', '2 days', '3 days'],
dtype='timedelta64[ns]', freq=None)
>>> tdelta_idx.mean()
Timedelta('2 days 00:00:00')
"""
if isinstance(self.dtype, PeriodDtype):
# See discussion in GH#24757
raise TypeError(
f"mean is not implemented for {type(self).__name__} since the "
"meaning is ambiguous. An alternative is "
"obj.to_timestamp(how='start').mean()"
)
result = nanops.nanmean(
self._ndarray, axis=axis, skipna=skipna, mask=self.isna()
)
return self._wrap_reduction_result(axis, result)
@_period_dispatch
def median(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
nv.validate_median((), kwargs)
if axis is not None and abs(axis) >= self.ndim:
raise ValueError("abs(axis) must be less than ndim")
result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)
def _mode(self, dropna: bool = True):
mask = None
if dropna:
mask = self.isna()
i8modes = algorithms.mode(self.view("i8"), mask=mask)
npmodes = i8modes.view(self._ndarray.dtype)
npmodes = cast(np.ndarray, npmodes)
return self._from_backing_data(npmodes)
# ------------------------------------------------------------------
# GroupBy Methods
def _groupby_op(
self,
*,
how: str,
has_dropped_na: bool,
min_count: int,
ngroups: int,
ids: npt.NDArray[np.intp],
**kwargs,
):
dtype = self.dtype
if dtype.kind == "M":
# Adding/multiplying datetimes is not valid
if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew"]:
raise TypeError(f"datetime64 type does not support {how} operations")
if how in ["any", "all"]:
# GH#34479
warnings.warn(
f"'{how}' with datetime64 dtypes is deprecated and will raise in a "
f"future version. Use (obj != pd.Timestamp(0)).{how}() instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
elif isinstance(dtype, PeriodDtype):
# Adding/multiplying Periods is not valid
if how in ["sum", "prod", "cumsum", "cumprod", "var", "skew"]:
raise TypeError(f"Period type does not support {how} operations")
if how in ["any", "all"]:
# GH#34479
warnings.warn(
f"'{how}' with PeriodDtype is deprecated and will raise in a "
f"future version. Use (obj != pd.Period(0, freq)).{how}() instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
else:
# timedeltas we can add but not multiply
if how in ["prod", "cumprod", "skew", "var"]:
raise TypeError(f"timedelta64 type does not support {how} operations")
# All of the functions implemented here are ordinal, so we can
# operate on the tz-naive equivalents
npvalues = self._ndarray.view("M8[ns]")
from pandas.core.groupby.ops import WrappedCythonOp
kind = WrappedCythonOp.get_kind_from_how(how)
op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
res_values = op._cython_op_ndim_compat(
npvalues,
min_count=min_count,
ngroups=ngroups,
comp_ids=ids,
mask=None,
**kwargs,
)
if op.how in op.cast_blocklist:
# i.e. how in ["rank"], since other cast_blocklist methods don't go
# through cython_operation
return res_values
# We did a view to M8[ns] above, now we go the other direction
assert res_values.dtype == "M8[ns]"
if how in ["std", "sem"]:
from pandas.core.arrays import TimedeltaArray
if isinstance(self.dtype, PeriodDtype):
raise TypeError("'std' and 'sem' are not valid for PeriodDtype")
self = cast("DatetimeArray | TimedeltaArray", self)
new_dtype = f"m8[{self.unit}]"
res_values = res_values.view(new_dtype)
return TimedeltaArray._simple_new(res_values, dtype=res_values.dtype)
res_values = res_values.view(self._ndarray.dtype)
return self._from_backing_data(res_values)
class DatelikeOps(DatetimeLikeArrayMixin):
"""
Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex.
"""
@Substitution(
URL="https://docs.python.org/3/library/datetime.html"
"#strftime-and-strptime-behavior"
)
def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
"""
Convert to Index using specified date_format.
Return an Index of formatted strings specified by date_format, which
supports the same string format as the python standard library. Details
of the string format can be found in `python string format
doc <%(URL)s>`__.
Formats supported by the C `strftime` API but not by the python string format
doc (such as `"%%R"`, `"%%r"`) are not officially supported and should be
preferably replaced with their supported equivalents (such as `"%%H:%%M"`,
`"%%I:%%M:%%S %%p"`).
Note that `PeriodIndex` support additional directives, detailed in
`Period.strftime`.
Parameters
----------
date_format : str
Date format string (e.g. "%%Y-%%m-%%d").
Returns
-------
ndarray[object]
NumPy ndarray of formatted strings.
See Also
--------
to_datetime : Convert the given argument to datetime.
DatetimeIndex.normalize : Return DatetimeIndex with times to midnight.
DatetimeIndex.round : Round the DatetimeIndex to the specified freq.
DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq.
Timestamp.strftime : Format a single Timestamp.
Period.strftime : Format a single Period.
Examples
--------
>>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"),
... periods=3, freq='s')
>>> rng.strftime('%%B %%d, %%Y, %%r')
Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM',
'March 10, 2018, 09:00:02 AM'],
dtype='object')
"""
result = self._format_native_types(date_format=date_format, na_rep=np.nan)
return result.astype(object, copy=False)
_round_doc = """
Perform {op} operation on the data to the specified `freq`.
Parameters
----------
freq : str or Offset
The frequency level to {op} the index to. Must be a fixed
frequency like 'S' (second) not 'ME' (month end). See
:ref:`frequency aliases <timeseries.offset_aliases>` for
a list of possible `freq` values.
ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
Only relevant for DatetimeIndex:
- 'infer' will attempt to infer fall dst-transition hours based on
order
- bool-ndarray where True signifies a DST time, False designates
a non-DST time (note that this flag is only applicable for
ambiguous times)
- 'NaT' will return NaT where there are ambiguous times
- 'raise' will raise an AmbiguousTimeError if there are ambiguous
times.
nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, default 'raise'
A nonexistent time does not exist in a particular timezone
where clocks moved forward due to DST.
- 'shift_forward' will shift the nonexistent time forward to the
closest existing time
- 'shift_backward' will shift the nonexistent time backward to the
closest existing time
- 'NaT' will return NaT where there are nonexistent times
- timedelta objects will shift nonexistent times by the timedelta
- 'raise' will raise an NonExistentTimeError if there are
nonexistent times.
Returns
-------
DatetimeIndex, TimedeltaIndex, or Series
Index of the same type for a DatetimeIndex or TimedeltaIndex,
or a Series with the same index for a Series.
Raises
------
ValueError if the `freq` cannot be converted.
Notes
-----
If the timestamps have a timezone, {op}ing will take place relative to the
local ("wall") time and re-localized to the same timezone. When {op}ing
near daylight savings time, use ``nonexistent`` and ``ambiguous`` to
control the re-localization behavior.
Examples
--------
**DatetimeIndex**
>>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min')
>>> rng
DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00',
'2018-01-01 12:01:00'],
dtype='datetime64[ns]', freq='min')
"""
_round_example = """>>> rng.round('h')
DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
'2018-01-01 12:00:00'],
dtype='datetime64[ns]', freq=None)
**Series**
>>> pd.Series(rng).dt.round("h")
0 2018-01-01 12:00:00
1 2018-01-01 12:00:00
2 2018-01-01 12:00:00
dtype: datetime64[ns]
When rounding near a daylight savings time transition, use ``ambiguous`` or
``nonexistent`` to control how the timestamp should be re-localized.
>>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam")
>>> rng_tz.floor("2h", ambiguous=False)
DatetimeIndex(['2021-10-31 02:00:00+01:00'],
dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
>>> rng_tz.floor("2h", ambiguous=True)
DatetimeIndex(['2021-10-31 02:00:00+02:00'],
dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
"""
_floor_example = """>>> rng.floor('h')
DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00',
'2018-01-01 12:00:00'],
dtype='datetime64[ns]', freq=None)
**Series**
>>> pd.Series(rng).dt.floor("h")
0 2018-01-01 11:00:00
1 2018-01-01 12:00:00
2 2018-01-01 12:00:00
dtype: datetime64[ns]
When rounding near a daylight savings time transition, use ``ambiguous`` or
``nonexistent`` to control how the timestamp should be re-localized.
>>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam")
>>> rng_tz.floor("2h", ambiguous=False)
DatetimeIndex(['2021-10-31 02:00:00+01:00'],
dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
>>> rng_tz.floor("2h", ambiguous=True)
DatetimeIndex(['2021-10-31 02:00:00+02:00'],
dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
"""
_ceil_example = """>>> rng.ceil('h')
DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
'2018-01-01 13:00:00'],
dtype='datetime64[ns]', freq=None)
**Series**
>>> pd.Series(rng).dt.ceil("h")
0 2018-01-01 12:00:00
1 2018-01-01 12:00:00
2 2018-01-01 13:00:00
dtype: datetime64[ns]
When rounding near a daylight savings time transition, use ``ambiguous`` or
``nonexistent`` to control how the timestamp should be re-localized.
>>> rng_tz = pd.DatetimeIndex(["2021-10-31 01:30:00"], tz="Europe/Amsterdam")
>>> rng_tz.ceil("h", ambiguous=False)
DatetimeIndex(['2021-10-31 02:00:00+01:00'],
dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
>>> rng_tz.ceil("h", ambiguous=True)
DatetimeIndex(['2021-10-31 02:00:00+02:00'],
dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
"""
class TimelikeOps(DatetimeLikeArrayMixin):
"""
Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex.
"""
_default_dtype: np.dtype
def __init__(
self, values, dtype=None, freq=lib.no_default, copy: bool = False
) -> None:
warnings.warn(
# GH#55623
f"{type(self).__name__}.__init__ is deprecated and will be "
"removed in a future version. Use pd.array instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
if dtype is not None:
dtype = pandas_dtype(dtype)
values = extract_array(values, extract_numpy=True)
if isinstance(values, IntegerArray):
values = values.to_numpy("int64", na_value=iNaT)
inferred_freq = getattr(values, "_freq", None)
explicit_none = freq is None
freq = freq if freq is not lib.no_default else None
if isinstance(values, type(self)):
if explicit_none:
# don't inherit from values
pass
elif freq is None:
freq = values.freq
elif freq and values.freq:
freq = to_offset(freq)
freq = _validate_inferred_freq(freq, values.freq)
if dtype is not None and dtype != values.dtype:
# TODO: we only have tests for this for DTA, not TDA (2022-07-01)
raise TypeError(
f"dtype={dtype} does not match data dtype {values.dtype}"
)
dtype = values.dtype
values = values._ndarray
elif dtype is None:
if isinstance(values, np.ndarray) and values.dtype.kind in "Mm":
dtype = values.dtype
else:
dtype = self._default_dtype
if isinstance(values, np.ndarray) and values.dtype == "i8":
values = values.view(dtype)
if not isinstance(values, np.ndarray):
raise ValueError(
f"Unexpected type '{type(values).__name__}'. 'values' must be a "
f"{type(self).__name__}, ndarray, or Series or Index "
"containing one of those."
)
if values.ndim not in [1, 2]:
raise ValueError("Only 1-dimensional input arrays are supported.")
if values.dtype == "i8":
# for compat with datetime/timedelta/period shared methods,
# we can sometimes get here with int64 values. These represent
# nanosecond UTC (or tz-naive) unix timestamps
if dtype is None:
dtype = self._default_dtype
values = values.view(self._default_dtype)
elif lib.is_np_dtype(dtype, "mM"):
values = values.view(dtype)
elif isinstance(dtype, DatetimeTZDtype):
kind = self._default_dtype.kind
new_dtype = f"{kind}8[{dtype.unit}]"
values = values.view(new_dtype)
dtype = self._validate_dtype(values, dtype)
if freq == "infer":
raise ValueError(
f"Frequency inference not allowed in {type(self).__name__}.__init__. "
"Use 'pd.array()' instead."
)
if copy:
values = values.copy()
if freq:
freq = to_offset(freq)
if values.dtype.kind == "m" and not isinstance(freq, Tick):
raise TypeError("TimedeltaArray/Index freq must be a Tick")
NDArrayBacked.__init__(self, values=values, dtype=dtype)
self._freq = freq
if inferred_freq is None and freq is not None:
type(self)._validate_frequency(self, freq)
@classmethod
def _validate_dtype(cls, values, dtype):
raise AbstractMethodError(cls)
@property
def freq(self):
"""
Return the frequency object if it is set, otherwise None.
"""
return self._freq
@freq.setter
def freq(self, value) -> None:
if value is not None:
value = to_offset(value)
self._validate_frequency(self, value)
if self.dtype.kind == "m" and not isinstance(value, Tick):
raise TypeError("TimedeltaArray/Index freq must be a Tick")
if self.ndim > 1:
raise ValueError("Cannot set freq with ndim > 1")
self._freq = value
@final
def _maybe_pin_freq(self, freq, validate_kwds: dict):
"""
Constructor helper to pin the appropriate `freq` attribute. Assumes
that self._freq is currently set to any freq inferred in
_from_sequence_not_strict.
"""
if freq is None:
# user explicitly passed None -> override any inferred_freq
self._freq = None
elif freq == "infer":
# if self._freq is *not* None then we already inferred a freq
# and there is nothing left to do
if self._freq is None:
# Set _freq directly to bypass duplicative _validate_frequency
# check.
self._freq = to_offset(self.inferred_freq)
elif freq is lib.no_default:
# user did not specify anything, keep inferred freq if the original
# data had one, otherwise do nothing
pass
elif self._freq is None:
# We cannot inherit a freq from the data, so we need to validate
# the user-passed freq
freq = to_offset(freq)
type(self)._validate_frequency(self, freq, **validate_kwds)
self._freq = freq
else:
# Otherwise we just need to check that the user-passed freq
# doesn't conflict with the one we already have.
freq = to_offset(freq)
_validate_inferred_freq(freq, self._freq)
@final
@classmethod
def _validate_frequency(cls, index, freq: BaseOffset, **kwargs):
"""
Validate that a frequency is compatible with the values of a given
Datetime Array/Index or Timedelta Array/Index
Parameters
----------
index : DatetimeIndex or TimedeltaIndex
The index on which to determine if the given frequency is valid
freq : DateOffset
The frequency to validate
"""
inferred = index.inferred_freq
if index.size == 0 or inferred == freq.freqstr:
return None
try:
on_freq = cls._generate_range(
start=index[0],
end=None,
periods=len(index),
freq=freq,
unit=index.unit,
**kwargs,
)
if not np.array_equal(index.asi8, on_freq.asi8):
raise ValueError
except ValueError as err:
if "non-fixed" in str(err):
# non-fixed frequencies are not meaningful for timedelta64;
# we retain that error message
raise err
# GH#11587 the main way this is reached is if the `np.array_equal`
# check above is False. This can also be reached if index[0]
# is `NaT`, in which case the call to `cls._generate_range` will
# raise a ValueError, which we re-raise with a more targeted
# message.
raise ValueError(
f"Inferred frequency {inferred} from passed values "
f"does not conform to passed frequency {freq.freqstr}"
) from err
@classmethod
def _generate_range(
cls, start, end, periods: int | None, freq, *args, **kwargs
) -> Self:
raise AbstractMethodError(cls)
# --------------------------------------------------------------
@cache_readonly
def _creso(self) -> int:
return get_unit_from_dtype(self._ndarray.dtype)
@cache_readonly
def unit(self) -> str:
# e.g. "ns", "us", "ms"
# error: Argument 1 to "dtype_to_unit" has incompatible type
# "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]"
return dtype_to_unit(self.dtype) # type: ignore[arg-type]
def as_unit(self, unit: str, round_ok: bool = True) -> Self:
if unit not in ["s", "ms", "us", "ns"]:
raise ValueError("Supported units are 's', 'ms', 'us', 'ns'")
dtype = np.dtype(f"{self.dtype.kind}8[{unit}]")
new_values = astype_overflowsafe(self._ndarray, dtype, round_ok=round_ok)
if isinstance(self.dtype, np.dtype):
new_dtype = new_values.dtype
else:
tz = cast("DatetimeArray", self).tz
new_dtype = DatetimeTZDtype(tz=tz, unit=unit)
# error: Unexpected keyword argument "freq" for "_simple_new" of
# "NDArrayBacked" [call-arg]
return type(self)._simple_new(
new_values, dtype=new_dtype, freq=self.freq # type: ignore[call-arg]
)
# TODO: annotate other as DatetimeArray | TimedeltaArray | Timestamp | Timedelta
# with the return type matching input type. TypeVar?
def _ensure_matching_resos(self, other):
if self._creso != other._creso:
# Just as with Timestamp/Timedelta, we cast to the higher resolution
if self._creso < other._creso:
self = self.as_unit(other.unit)
else:
other = other.as_unit(self.unit)
return self, other
# --------------------------------------------------------------
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
if (
ufunc in [np.isnan, np.isinf, np.isfinite]
and len(inputs) == 1
and inputs[0] is self
):
# numpy 1.18 changed isinf and isnan to not raise on dt64/td64
return getattr(ufunc, method)(self._ndarray, **kwargs)
return super().__array_ufunc__(ufunc, method, *inputs, **kwargs)
def _round(self, freq, mode, ambiguous, nonexistent):
# round the local times
if isinstance(self.dtype, DatetimeTZDtype):
# operate on naive timestamps, then convert back to aware
self = cast("DatetimeArray", self)
naive = self.tz_localize(None)
result = naive._round(freq, mode, ambiguous, nonexistent)
return result.tz_localize(
self.tz, ambiguous=ambiguous, nonexistent=nonexistent
)
values = self.view("i8")
values = cast(np.ndarray, values)
nanos = get_unit_for_round(freq, self._creso)
if nanos == 0:
# GH 52761
return self.copy()
result_i8 = round_nsint64(values, mode, nanos)
result = self._maybe_mask_results(result_i8, fill_value=iNaT)
result = result.view(self._ndarray.dtype)
return self._simple_new(result, dtype=self.dtype)
@Appender((_round_doc + _round_example).format(op="round"))
def round(
self,
freq,
ambiguous: TimeAmbiguous = "raise",
nonexistent: TimeNonexistent = "raise",
) -> Self:
return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent)
@Appender((_round_doc + _floor_example).format(op="floor"))
def floor(
self,
freq,
ambiguous: TimeAmbiguous = "raise",
nonexistent: TimeNonexistent = "raise",
) -> Self:
return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
@Appender((_round_doc + _ceil_example).format(op="ceil"))
def ceil(
self,
freq,
ambiguous: TimeAmbiguous = "raise",
nonexistent: TimeNonexistent = "raise",
) -> Self:
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
# --------------------------------------------------------------
# Reductions
def any(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
# GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
def all(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
# GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
# --------------------------------------------------------------
# Frequency Methods
def _maybe_clear_freq(self) -> None:
self._freq = None
def _with_freq(self, freq) -> Self:
"""
Helper to get a view on the same data, with a new freq.
Parameters
----------
freq : DateOffset, None, or "infer"
Returns
-------
Same type as self
"""
# GH#29843
if freq is None:
# Always valid
pass
elif len(self) == 0 and isinstance(freq, BaseOffset):
# Always valid. In the TimedeltaArray case, we require a Tick offset
if self.dtype.kind == "m" and not isinstance(freq, Tick):
raise TypeError("TimedeltaArray/Index freq must be a Tick")
else:
# As an internal method, we can ensure this assertion always holds
assert freq == "infer"
freq = to_offset(self.inferred_freq)
arr = self.view()
arr._freq = freq
return arr
# --------------------------------------------------------------
# ExtensionArray Interface
def _values_for_json(self) -> np.ndarray:
# Small performance bump vs the base class which calls np.asarray(self)
if isinstance(self.dtype, np.dtype):
return self._ndarray
return super()._values_for_json()
def factorize(
self,
use_na_sentinel: bool = True,
sort: bool = False,
):
if self.freq is not None:
# We must be unique, so can short-circuit (and retain freq)
codes = np.arange(len(self), dtype=np.intp)
uniques = self.copy() # TODO: copy or view?
if sort and self.freq.n < 0:
codes = codes[::-1]
uniques = uniques[::-1]
return codes, uniques
if sort:
# algorithms.factorize only passes sort=True here when freq is
# not None, so this should not be reached.
raise NotImplementedError(
f"The 'sort' keyword in {type(self).__name__}.factorize is "
"ignored unless arr.freq is not None. To factorize with sort, "
"call pd.factorize(obj, sort=True) instead."
)
return super().factorize(use_na_sentinel=use_na_sentinel)
@classmethod
def _concat_same_type(
cls,
to_concat: Sequence[Self],
axis: AxisInt = 0,
) -> Self:
new_obj = super()._concat_same_type(to_concat, axis)
obj = to_concat[0]
if axis == 0:
# GH 3232: If the concat result is evenly spaced, we can retain the
# original frequency
to_concat = [x for x in to_concat if len(x)]
if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):
pairs = zip(to_concat[:-1], to_concat[1:])
if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs):
new_freq = obj.freq
new_obj._freq = new_freq
return new_obj
def copy(self, order: str = "C") -> Self:
new_obj = super().copy(order=order)
new_obj._freq = self.freq
return new_obj
def interpolate(
self,
*,
method: InterpolateOptions,
axis: int,
index: Index,
limit,
limit_direction,
limit_area,
copy: bool,
**kwargs,
) -> Self:
"""
See NDFrame.interpolate.__doc__.
"""
# NB: we return type(self) even if copy=False
if method != "linear":
raise NotImplementedError
if not copy:
out_data = self._ndarray
else:
out_data = self._ndarray.copy()
missing.interpolate_2d_inplace(
out_data,
method=method,
axis=axis,
index=index,
limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
**kwargs,
)
if not copy:
return self
return type(self)._simple_new(out_data, dtype=self.dtype)
# --------------------------------------------------------------
# Unsorted
@property
def _is_dates_only(self) -> bool:
"""
Check if we are round times at midnight (and no timezone), which will
be given a more compact __repr__ than other cases. For TimedeltaArray
we are checking for multiples of 24H.
"""
if not lib.is_np_dtype(self.dtype):
# i.e. we have a timezone
return False
values_int = self.asi8
consider_values = values_int != iNaT
reso = get_unit_from_dtype(self.dtype)
ppd = periods_per_day(reso)
# TODO: can we reuse is_date_array_normalized? would need a skipna kwd
# (first attempt at this was less performant than this implementation)
even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
return even_days
# -------------------------------------------------------------------
# Shared Constructor Helpers
def ensure_arraylike_for_datetimelike(
data, copy: bool, cls_name: str
) -> tuple[ArrayLike, bool]:
if not hasattr(data, "dtype"):
# e.g. list, tuple
if not isinstance(data, (list, tuple)) and np.ndim(data) == 0:
# i.e. generator
data = list(data)
data = construct_1d_object_array_from_listlike(data)
copy = False
elif isinstance(data, ABCMultiIndex):
raise TypeError(f"Cannot create a {cls_name} from a MultiIndex.")
else:
data = extract_array(data, extract_numpy=True)
if isinstance(data, IntegerArray) or (
isinstance(data, ArrowExtensionArray) and data.dtype.kind in "iu"
):
data = data.to_numpy("int64", na_value=iNaT)
copy = False
elif isinstance(data, ArrowExtensionArray):
data = data._maybe_convert_datelike_array()
data = data.to_numpy()
copy = False
elif not isinstance(data, (np.ndarray, ExtensionArray)):
# GH#24539 e.g. xarray, dask object
data = np.asarray(data)
elif isinstance(data, ABCCategorical):
# GH#18664 preserve tz in going DTI->Categorical->DTI
# TODO: cases where we need to do another pass through maybe_convert_dtype,
# e.g. the categories are timedelta64s
data = data.categories.take(data.codes, fill_value=NaT)._values
copy = False
return data, copy
@overload
def validate_periods(periods: None) -> None:
...
@overload
def validate_periods(periods: int | float) -> int:
...
def validate_periods(periods: int | float | None) -> int | None:
"""
If a `periods` argument is passed to the Datetime/Timedelta Array/Index
constructor, cast it to an integer.
Parameters
----------
periods : None, float, int
Returns
-------
periods : None or int
Raises
------
TypeError
if periods is None, float, or int
"""
if periods is not None:
if lib.is_float(periods):
warnings.warn(
# GH#56036
"Non-integer 'periods' in pd.date_range, pd.timedelta_range, "
"pd.period_range, and pd.interval_range are deprecated and "
"will raise in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
periods = int(periods)
elif not lib.is_integer(periods):
raise TypeError(f"periods must be a number, got {periods}")
return periods
def _validate_inferred_freq(
freq: BaseOffset | None, inferred_freq: BaseOffset | None
) -> BaseOffset | None:
"""
If the user passes a freq and another freq is inferred from passed data,
require that they match.
Parameters
----------
freq : DateOffset or None
inferred_freq : DateOffset or None
Returns
-------
freq : DateOffset or None
"""
if inferred_freq is not None:
if freq is not None and freq != inferred_freq:
raise ValueError(
f"Inferred frequency {inferred_freq} from passed "
"values does not conform to passed frequency "
f"{freq.freqstr}"
)
if freq is None:
freq = inferred_freq
return freq
def dtype_to_unit(dtype: DatetimeTZDtype | np.dtype | ArrowDtype) -> str:
"""
Return the unit str corresponding to the dtype's resolution.
Parameters
----------
dtype : DatetimeTZDtype or np.dtype
If np.dtype, we assume it is a datetime64 dtype.
Returns
-------
str
"""
if isinstance(dtype, DatetimeTZDtype):
return dtype.unit
elif isinstance(dtype, ArrowDtype):
if dtype.kind not in "mM":
raise ValueError(f"{dtype=} does not have a resolution.")
return dtype.pyarrow_dtype.unit
return np.datetime_data(dtype)[0]
Sindbad File Manager Version 1.0, Coded By Sindbad EG ~ The Terrorists