Sindbad~EG File Manager

Current Path : /proc/4039398/root/usr/local/lib/python3.12/site-packages/pandas/tests/io/pytables/
Upload File :
Current File : //proc/4039398/root/usr/local/lib/python3.12/site-packages/pandas/tests/io/pytables/test_put.py

import re

import numpy as np
import pytest

from pandas._libs.tslibs import Timestamp

import pandas as pd
from pandas import (
    DataFrame,
    HDFStore,
    Index,
    MultiIndex,
    Series,
    _testing as tm,
    concat,
    date_range,
)
from pandas.tests.io.pytables.common import (
    _maybe_remove,
    ensure_clean_store,
)
from pandas.util import _test_decorators as td

pytestmark = pytest.mark.single_cpu


def test_format_type(tmp_path, setup_path):
    df = DataFrame({"A": [1, 2]})
    with HDFStore(tmp_path / setup_path) as store:
        store.put("a", df, format="fixed")
        store.put("b", df, format="table")

        assert store.get_storer("a").format_type == "fixed"
        assert store.get_storer("b").format_type == "table"


def test_format_kwarg_in_constructor(tmp_path, setup_path):
    # GH 13291

    msg = "format is not a defined argument for HDFStore"

    with pytest.raises(ValueError, match=msg):
        HDFStore(tmp_path / setup_path, format="table")


def test_api_default_format(tmp_path, setup_path):
    # default_format option
    with ensure_clean_store(setup_path) as store:
        df = DataFrame(
            1.1 * np.arange(120).reshape((30, 4)),
            columns=Index(list("ABCD"), dtype=object),
            index=Index([f"i-{i}" for i in range(30)], dtype=object),
        )

        with pd.option_context("io.hdf.default_format", "fixed"):
            _maybe_remove(store, "df")
            store.put("df", df)
            assert not store.get_storer("df").is_table

            msg = "Can only append to Tables"
            with pytest.raises(ValueError, match=msg):
                store.append("df2", df)

        with pd.option_context("io.hdf.default_format", "table"):
            _maybe_remove(store, "df")
            store.put("df", df)
            assert store.get_storer("df").is_table

            _maybe_remove(store, "df2")
            store.append("df2", df)
            assert store.get_storer("df").is_table

    path = tmp_path / setup_path
    df = DataFrame(
        1.1 * np.arange(120).reshape((30, 4)),
        columns=Index(list("ABCD"), dtype=object),
        index=Index([f"i-{i}" for i in range(30)], dtype=object),
    )

    with pd.option_context("io.hdf.default_format", "fixed"):
        df.to_hdf(path, key="df")
        with HDFStore(path) as store:
            assert not store.get_storer("df").is_table
        with pytest.raises(ValueError, match=msg):
            df.to_hdf(path, key="df2", append=True)

    with pd.option_context("io.hdf.default_format", "table"):
        df.to_hdf(path, key="df3")
        with HDFStore(path) as store:
            assert store.get_storer("df3").is_table
        df.to_hdf(path, key="df4", append=True)
        with HDFStore(path) as store:
            assert store.get_storer("df4").is_table


def test_put(setup_path):
    with ensure_clean_store(setup_path) as store:
        ts = Series(
            np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
        )
        df = DataFrame(
            np.random.default_rng(2).standard_normal((20, 4)),
            columns=Index(list("ABCD"), dtype=object),
            index=date_range("2000-01-01", periods=20, freq="B"),
        )
        store["a"] = ts
        store["b"] = df[:10]
        store["foo/bar/bah"] = df[:10]
        store["foo"] = df[:10]
        store["/foo"] = df[:10]
        store.put("c", df[:10], format="table")

        # not OK, not a table
        msg = "Can only append to Tables"
        with pytest.raises(ValueError, match=msg):
            store.put("b", df[10:], append=True)

        # node does not currently exist, test _is_table_type returns False
        # in this case
        _maybe_remove(store, "f")
        with pytest.raises(ValueError, match=msg):
            store.put("f", df[10:], append=True)

        # can't put to a table (use append instead)
        with pytest.raises(ValueError, match=msg):
            store.put("c", df[10:], append=True)

        # overwrite table
        store.put("c", df[:10], format="table", append=False)
        tm.assert_frame_equal(df[:10], store["c"])


def test_put_string_index(setup_path):
    with ensure_clean_store(setup_path) as store:
        index = Index([f"I am a very long string index: {i}" for i in range(20)])
        s = Series(np.arange(20), index=index)
        df = DataFrame({"A": s, "B": s})

        store["a"] = s
        tm.assert_series_equal(store["a"], s)

        store["b"] = df
        tm.assert_frame_equal(store["b"], df)

        # mixed length
        index = Index(
            ["abcdefghijklmnopqrstuvwxyz1234567890"]
            + [f"I am a very long string index: {i}" for i in range(20)]
        )
        s = Series(np.arange(21), index=index)
        df = DataFrame({"A": s, "B": s})
        store["a"] = s
        tm.assert_series_equal(store["a"], s)

        store["b"] = df
        tm.assert_frame_equal(store["b"], df)


def test_put_compression(setup_path):
    with ensure_clean_store(setup_path) as store:
        df = DataFrame(
            np.random.default_rng(2).standard_normal((10, 4)),
            columns=Index(list("ABCD"), dtype=object),
            index=date_range("2000-01-01", periods=10, freq="B"),
        )

        store.put("c", df, format="table", complib="zlib")
        tm.assert_frame_equal(store["c"], df)

        # can't compress if format='fixed'
        msg = "Compression not supported on Fixed format stores"
        with pytest.raises(ValueError, match=msg):
            store.put("b", df, format="fixed", complib="zlib")


@td.skip_if_windows
def test_put_compression_blosc(setup_path):
    df = DataFrame(
        np.random.default_rng(2).standard_normal((10, 4)),
        columns=Index(list("ABCD"), dtype=object),
        index=date_range("2000-01-01", periods=10, freq="B"),
    )

    with ensure_clean_store(setup_path) as store:
        # can't compress if format='fixed'
        msg = "Compression not supported on Fixed format stores"
        with pytest.raises(ValueError, match=msg):
            store.put("b", df, format="fixed", complib="blosc")

        store.put("c", df, format="table", complib="blosc")
        tm.assert_frame_equal(store["c"], df)


def test_put_mixed_type(setup_path):
    df = DataFrame(
        np.random.default_rng(2).standard_normal((10, 4)),
        columns=Index(list("ABCD"), dtype=object),
        index=date_range("2000-01-01", periods=10, freq="B"),
    )
    df["obj1"] = "foo"
    df["obj2"] = "bar"
    df["bool1"] = df["A"] > 0
    df["bool2"] = df["B"] > 0
    df["bool3"] = True
    df["int1"] = 1
    df["int2"] = 2
    df["timestamp1"] = Timestamp("20010102").as_unit("ns")
    df["timestamp2"] = Timestamp("20010103").as_unit("ns")
    df["datetime1"] = Timestamp("20010102").as_unit("ns")
    df["datetime2"] = Timestamp("20010103").as_unit("ns")
    df.loc[df.index[3:6], ["obj1"]] = np.nan
    df = df._consolidate()

    with ensure_clean_store(setup_path) as store:
        _maybe_remove(store, "df")

        with tm.assert_produces_warning(pd.errors.PerformanceWarning):
            store.put("df", df)

        expected = store.get("df")
        tm.assert_frame_equal(expected, df)


@pytest.mark.parametrize("format", ["table", "fixed"])
@pytest.mark.parametrize(
    "index",
    [
        Index([str(i) for i in range(10)]),
        Index(np.arange(10, dtype=float)),
        Index(np.arange(10)),
        date_range("2020-01-01", periods=10),
        pd.period_range("2020-01-01", periods=10),
    ],
)
def test_store_index_types(setup_path, format, index):
    # GH5386
    # test storing various index types

    with ensure_clean_store(setup_path) as store:
        df = DataFrame(
            np.random.default_rng(2).standard_normal((10, 2)),
            columns=list("AB"),
            index=index,
        )
        _maybe_remove(store, "df")
        store.put("df", df, format=format)
        tm.assert_frame_equal(df, store["df"])


def test_column_multiindex(setup_path):
    # GH 4710
    # recreate multi-indexes properly

    index = MultiIndex.from_tuples(
        [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")], names=["first", "second"]
    )
    df = DataFrame(np.arange(12).reshape(3, 4), columns=index)
    expected = df.set_axis(df.index.to_numpy())

    with ensure_clean_store(setup_path) as store:
        store.put("df", df)
        tm.assert_frame_equal(
            store["df"], expected, check_index_type=True, check_column_type=True
        )

        store.put("df1", df, format="table")
        tm.assert_frame_equal(
            store["df1"], expected, check_index_type=True, check_column_type=True
        )

        msg = re.escape("cannot use a multi-index on axis [1] with data_columns ['A']")
        with pytest.raises(ValueError, match=msg):
            store.put("df2", df, format="table", data_columns=["A"])
        msg = re.escape("cannot use a multi-index on axis [1] with data_columns True")
        with pytest.raises(ValueError, match=msg):
            store.put("df3", df, format="table", data_columns=True)

    # appending multi-column on existing table (see GH 6167)
    with ensure_clean_store(setup_path) as store:
        store.append("df2", df)
        store.append("df2", df)

        tm.assert_frame_equal(store["df2"], concat((df, df)))

    # non_index_axes name
    df = DataFrame(np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo"))
    expected = df.set_axis(df.index.to_numpy())

    with ensure_clean_store(setup_path) as store:
        store.put("df1", df, format="table")
        tm.assert_frame_equal(
            store["df1"], expected, check_index_type=True, check_column_type=True
        )


def test_store_multiindex(setup_path):
    # validate multi-index names
    # GH 5527
    with ensure_clean_store(setup_path) as store:

        def make_index(names=None):
            dti = date_range("2013-12-01", "2013-12-02")
            mi = MultiIndex.from_product([dti, range(2), range(3)], names=names)
            return mi

        # no names
        _maybe_remove(store, "df")
        df = DataFrame(np.zeros((12, 2)), columns=["a", "b"], index=make_index())
        store.append("df", df)
        tm.assert_frame_equal(store.select("df"), df)

        # partial names
        _maybe_remove(store, "df")
        df = DataFrame(
            np.zeros((12, 2)),
            columns=["a", "b"],
            index=make_index(["date", None, None]),
        )
        store.append("df", df)
        tm.assert_frame_equal(store.select("df"), df)

        # series
        _maybe_remove(store, "ser")
        ser = Series(np.zeros(12), index=make_index(["date", None, None]))
        store.append("ser", ser)
        xp = Series(np.zeros(12), index=make_index(["date", "level_1", "level_2"]))
        tm.assert_series_equal(store.select("ser"), xp)

        # dup with column
        _maybe_remove(store, "df")
        df = DataFrame(
            np.zeros((12, 2)),
            columns=["a", "b"],
            index=make_index(["date", "a", "t"]),
        )
        msg = "duplicate names/columns in the multi-index when storing as a table"
        with pytest.raises(ValueError, match=msg):
            store.append("df", df)

        # dup within level
        _maybe_remove(store, "df")
        df = DataFrame(
            np.zeros((12, 2)),
            columns=["a", "b"],
            index=make_index(["date", "date", "date"]),
        )
        with pytest.raises(ValueError, match=msg):
            store.append("df", df)

        # fully names
        _maybe_remove(store, "df")
        df = DataFrame(
            np.zeros((12, 2)),
            columns=["a", "b"],
            index=make_index(["date", "s", "t"]),
        )
        store.append("df", df)
        tm.assert_frame_equal(store.select("df"), df)


@pytest.mark.parametrize("format", ["fixed", "table"])
def test_store_periodindex(tmp_path, setup_path, format):
    # GH 7796
    # test of PeriodIndex in HDFStore
    df = DataFrame(
        np.random.default_rng(2).standard_normal((5, 1)),
        index=pd.period_range("20220101", freq="M", periods=5),
    )

    path = tmp_path / setup_path
    df.to_hdf(path, key="df", mode="w", format=format)
    expected = pd.read_hdf(path, "df")
    tm.assert_frame_equal(df, expected)

Sindbad File Manager Version 1.0, Coded By Sindbad EG ~ The Terrorists