Sindbad~EG File Manager
"""
Tests for the file pandas.io.formats.format, *not* tests for general formatting
of pandas objects.
"""
from datetime import datetime
from io import StringIO
from pathlib import Path
import re
from shutil import get_terminal_size
import numpy as np
import pytest
from pandas._config import using_pyarrow_string_dtype
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
NaT,
Series,
Timestamp,
date_range,
get_option,
option_context,
read_csv,
reset_option,
)
from pandas.io.formats import printing
import pandas.io.formats.format as fmt
@pytest.fixture(params=["string", "pathlike", "buffer"])
def filepath_or_buffer_id(request):
"""
A fixture yielding test ids for filepath_or_buffer testing.
"""
return request.param
@pytest.fixture
def filepath_or_buffer(filepath_or_buffer_id, tmp_path):
"""
A fixture yielding a string representing a filepath, a path-like object
and a StringIO buffer. Also checks that buffer is not closed.
"""
if filepath_or_buffer_id == "buffer":
buf = StringIO()
yield buf
assert not buf.closed
else:
assert isinstance(tmp_path, Path)
if filepath_or_buffer_id == "pathlike":
yield tmp_path / "foo"
else:
yield str(tmp_path / "foo")
@pytest.fixture
def assert_filepath_or_buffer_equals(
filepath_or_buffer, filepath_or_buffer_id, encoding
):
"""
Assertion helper for checking filepath_or_buffer.
"""
if encoding is None:
encoding = "utf-8"
def _assert_filepath_or_buffer_equals(expected):
if filepath_or_buffer_id == "string":
with open(filepath_or_buffer, encoding=encoding) as f:
result = f.read()
elif filepath_or_buffer_id == "pathlike":
result = filepath_or_buffer.read_text(encoding=encoding)
elif filepath_or_buffer_id == "buffer":
result = filepath_or_buffer.getvalue()
assert result == expected
return _assert_filepath_or_buffer_equals
def has_info_repr(df):
r = repr(df)
c1 = r.split("\n")[0].startswith("<class")
c2 = r.split("\n")[0].startswith(r"<class") # _repr_html_
return c1 or c2
def has_non_verbose_info_repr(df):
has_info = has_info_repr(df)
r = repr(df)
# 1. <class>
# 2. Index
# 3. Columns
# 4. dtype
# 5. memory usage
# 6. trailing newline
nv = len(r.split("\n")) == 6
return has_info and nv
def has_horizontally_truncated_repr(df):
try: # Check header row
fst_line = np.array(repr(df).splitlines()[0].split())
cand_col = np.where(fst_line == "...")[0][0]
except IndexError:
return False
# Make sure each row has this ... in the same place
r = repr(df)
for ix, _ in enumerate(r.splitlines()):
if not r.split()[cand_col] == "...":
return False
return True
def has_vertically_truncated_repr(df):
r = repr(df)
only_dot_row = False
for row in r.splitlines():
if re.match(r"^[\.\ ]+$", row):
only_dot_row = True
return only_dot_row
def has_truncated_repr(df):
return has_horizontally_truncated_repr(df) or has_vertically_truncated_repr(df)
def has_doubly_truncated_repr(df):
return has_horizontally_truncated_repr(df) and has_vertically_truncated_repr(df)
def has_expanded_repr(df):
r = repr(df)
for line in r.split("\n"):
if line.endswith("\\"):
return True
return False
class TestDataFrameFormatting:
def test_repr_truncation(self):
max_len = 20
with option_context("display.max_colwidth", max_len):
df = DataFrame(
{
"A": np.random.default_rng(2).standard_normal(10),
"B": [
"a"
* np.random.default_rng(2).integers(max_len - 1, max_len + 1)
for _ in range(10)
],
}
)
r = repr(df)
r = r[r.find("\n") + 1 :]
adj = printing.get_adjustment()
for line, value in zip(r.split("\n"), df["B"]):
if adj.len(value) + 1 > max_len:
assert "..." in line
else:
assert "..." not in line
with option_context("display.max_colwidth", 999999):
assert "..." not in repr(df)
with option_context("display.max_colwidth", max_len + 2):
assert "..." not in repr(df)
def test_repr_truncation_preserves_na(self):
# https://github.com/pandas-dev/pandas/issues/55630
df = DataFrame({"a": [pd.NA for _ in range(10)]})
with option_context("display.max_rows", 2, "display.show_dimensions", False):
assert repr(df) == " a\n0 <NA>\n.. ...\n9 <NA>"
def test_max_colwidth_negative_int_raises(self):
# Deprecation enforced from:
# https://github.com/pandas-dev/pandas/issues/31532
with pytest.raises(
ValueError, match="Value must be a nonnegative integer or None"
):
with option_context("display.max_colwidth", -1):
pass
def test_repr_chop_threshold(self):
df = DataFrame([[0.1, 0.5], [0.5, -0.1]])
reset_option("display.chop_threshold") # default None
assert repr(df) == " 0 1\n0 0.1 0.5\n1 0.5 -0.1"
with option_context("display.chop_threshold", 0.2):
assert repr(df) == " 0 1\n0 0.0 0.5\n1 0.5 0.0"
with option_context("display.chop_threshold", 0.6):
assert repr(df) == " 0 1\n0 0.0 0.0\n1 0.0 0.0"
with option_context("display.chop_threshold", None):
assert repr(df) == " 0 1\n0 0.1 0.5\n1 0.5 -0.1"
def test_repr_chop_threshold_column_below(self):
# GH 6839: validation case
df = DataFrame([[10, 20, 30, 40], [8e-10, -1e-11, 2e-9, -2e-11]]).T
with option_context("display.chop_threshold", 0):
assert repr(df) == (
" 0 1\n"
"0 10.0 8.000000e-10\n"
"1 20.0 -1.000000e-11\n"
"2 30.0 2.000000e-09\n"
"3 40.0 -2.000000e-11"
)
with option_context("display.chop_threshold", 1e-8):
assert repr(df) == (
" 0 1\n"
"0 10.0 0.000000e+00\n"
"1 20.0 0.000000e+00\n"
"2 30.0 0.000000e+00\n"
"3 40.0 0.000000e+00"
)
with option_context("display.chop_threshold", 5e-11):
assert repr(df) == (
" 0 1\n"
"0 10.0 8.000000e-10\n"
"1 20.0 0.000000e+00\n"
"2 30.0 2.000000e-09\n"
"3 40.0 0.000000e+00"
)
def test_repr_no_backslash(self):
with option_context("mode.sim_interactive", True):
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
assert "\\" not in repr(df)
def test_expand_frame_repr(self):
df_small = DataFrame("hello", index=[0], columns=[0])
df_wide = DataFrame("hello", index=[0], columns=range(10))
df_tall = DataFrame("hello", index=range(30), columns=range(5))
with option_context("mode.sim_interactive", True):
with option_context(
"display.max_columns",
10,
"display.width",
20,
"display.max_rows",
20,
"display.show_dimensions",
True,
):
with option_context("display.expand_frame_repr", True):
assert not has_truncated_repr(df_small)
assert not has_expanded_repr(df_small)
assert not has_truncated_repr(df_wide)
assert has_expanded_repr(df_wide)
assert has_vertically_truncated_repr(df_tall)
assert has_expanded_repr(df_tall)
with option_context("display.expand_frame_repr", False):
assert not has_truncated_repr(df_small)
assert not has_expanded_repr(df_small)
assert not has_horizontally_truncated_repr(df_wide)
assert not has_expanded_repr(df_wide)
assert has_vertically_truncated_repr(df_tall)
assert not has_expanded_repr(df_tall)
def test_repr_non_interactive(self):
# in non interactive mode, there can be no dependency on the
# result of terminal auto size detection
df = DataFrame("hello", index=range(1000), columns=range(5))
with option_context(
"mode.sim_interactive", False, "display.width", 0, "display.max_rows", 5000
):
assert not has_truncated_repr(df)
assert not has_expanded_repr(df)
def test_repr_truncates_terminal_size(self, monkeypatch):
# see gh-21180
terminal_size = (118, 96)
monkeypatch.setattr(
"pandas.io.formats.format.get_terminal_size", lambda: terminal_size
)
index = range(5)
columns = MultiIndex.from_tuples(
[
("This is a long title with > 37 chars.", "cat"),
("This is a loooooonger title with > 43 chars.", "dog"),
]
)
df = DataFrame(1, index=index, columns=columns)
result = repr(df)
h1, h2 = result.split("\n")[:2]
assert "long" in h1
assert "loooooonger" in h1
assert "cat" in h2
assert "dog" in h2
# regular columns
df2 = DataFrame({"A" * 41: [1, 2], "B" * 41: [1, 2]})
result = repr(df2)
assert df2.columns[0] in result.split("\n")[0]
def test_repr_truncates_terminal_size_full(self, monkeypatch):
# GH 22984 ensure entire window is filled
terminal_size = (80, 24)
df = DataFrame(np.random.default_rng(2).random((1, 7)))
monkeypatch.setattr(
"pandas.io.formats.format.get_terminal_size", lambda: terminal_size
)
assert "..." not in str(df)
def test_repr_truncation_column_size(self):
# dataframe with last column very wide -> check it is not used to
# determine size of truncation (...) column
df = DataFrame(
{
"a": [108480, 30830],
"b": [12345, 12345],
"c": [12345, 12345],
"d": [12345, 12345],
"e": ["a" * 50] * 2,
}
)
assert "..." in str(df)
assert " ... " not in str(df)
def test_repr_max_columns_max_rows(self):
term_width, term_height = get_terminal_size()
if term_width < 10 or term_height < 10:
pytest.skip(f"terminal size too small, {term_width} x {term_height}")
def mkframe(n):
index = [f"{i:05d}" for i in range(n)]
return DataFrame(0, index, index)
df6 = mkframe(6)
df10 = mkframe(10)
with option_context("mode.sim_interactive", True):
with option_context("display.width", term_width * 2):
with option_context("display.max_rows", 5, "display.max_columns", 5):
assert not has_expanded_repr(mkframe(4))
assert not has_expanded_repr(mkframe(5))
assert not has_expanded_repr(df6)
assert has_doubly_truncated_repr(df6)
with option_context("display.max_rows", 20, "display.max_columns", 10):
# Out off max_columns boundary, but no extending
# since not exceeding width
assert not has_expanded_repr(df6)
assert not has_truncated_repr(df6)
with option_context("display.max_rows", 9, "display.max_columns", 10):
# out vertical bounds can not result in expanded repr
assert not has_expanded_repr(df10)
assert has_vertically_truncated_repr(df10)
# width=None in terminal, auto detection
with option_context(
"display.max_columns",
100,
"display.max_rows",
term_width * 20,
"display.width",
None,
):
df = mkframe((term_width // 7) - 2)
assert not has_expanded_repr(df)
df = mkframe((term_width // 7) + 2)
printing.pprint_thing(df._repr_fits_horizontal_())
assert has_expanded_repr(df)
def test_repr_min_rows(self):
df = DataFrame({"a": range(20)})
# default setting no truncation even if above min_rows
assert ".." not in repr(df)
assert ".." not in df._repr_html_()
df = DataFrame({"a": range(61)})
# default of max_rows 60 triggers truncation if above
assert ".." in repr(df)
assert ".." in df._repr_html_()
with option_context("display.max_rows", 10, "display.min_rows", 4):
# truncated after first two rows
assert ".." in repr(df)
assert "2 " not in repr(df)
assert "..." in df._repr_html_()
assert "<td>2</td>" not in df._repr_html_()
with option_context("display.max_rows", 12, "display.min_rows", None):
# when set to None, follow value of max_rows
assert "5 5" in repr(df)
assert "<td>5</td>" in df._repr_html_()
with option_context("display.max_rows", 10, "display.min_rows", 12):
# when set value higher as max_rows, use the minimum
assert "5 5" not in repr(df)
assert "<td>5</td>" not in df._repr_html_()
with option_context("display.max_rows", None, "display.min_rows", 12):
# max_rows of None -> never truncate
assert ".." not in repr(df)
assert ".." not in df._repr_html_()
def test_str_max_colwidth(self):
# GH 7856
df = DataFrame(
[
{
"a": "foo",
"b": "bar",
"c": "uncomfortably long line with lots of stuff",
"d": 1,
},
{"a": "foo", "b": "bar", "c": "stuff", "d": 1},
]
)
df.set_index(["a", "b", "c"])
assert str(df) == (
" a b c d\n"
"0 foo bar uncomfortably long line with lots of stuff 1\n"
"1 foo bar stuff 1"
)
with option_context("max_colwidth", 20):
assert str(df) == (
" a b c d\n"
"0 foo bar uncomfortably lo... 1\n"
"1 foo bar stuff 1"
)
def test_auto_detect(self):
term_width, term_height = get_terminal_size()
fac = 1.05 # Arbitrary large factor to exceed term width
cols = range(int(term_width * fac))
index = range(10)
df = DataFrame(index=index, columns=cols)
with option_context("mode.sim_interactive", True):
with option_context("display.max_rows", None):
with option_context("display.max_columns", None):
# Wrap around with None
assert has_expanded_repr(df)
with option_context("display.max_rows", 0):
with option_context("display.max_columns", 0):
# Truncate with auto detection.
assert has_horizontally_truncated_repr(df)
index = range(int(term_height * fac))
df = DataFrame(index=index, columns=cols)
with option_context("display.max_rows", 0):
with option_context("display.max_columns", None):
# Wrap around with None
assert has_expanded_repr(df)
# Truncate vertically
assert has_vertically_truncated_repr(df)
with option_context("display.max_rows", None):
with option_context("display.max_columns", 0):
assert has_horizontally_truncated_repr(df)
def test_to_string_repr_unicode2(self):
idx = Index(["abc", "\u03c3a", "aegdvg"])
ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx)
rs = repr(ser).split("\n")
line_len = len(rs[0])
for line in rs[1:]:
try:
line = line.decode(get_option("display.encoding"))
except AttributeError:
pass
if not line.startswith("dtype:"):
assert len(line) == line_len
def test_east_asian_unicode_false(self):
# not aligned properly because of east asian width
# mid col
df = DataFrame(
{"a": ["あ", "いいい", "う", "ええええええ"], "b": [1, 222, 33333, 4]},
index=["a", "bb", "c", "ddd"],
)
expected = (
" a b\na あ 1\n"
"bb いいい 222\nc う 33333\n"
"ddd ええええええ 4"
)
assert repr(df) == expected
# last col
df = DataFrame(
{"a": [1, 222, 33333, 4], "b": ["あ", "いいい", "う", "ええええええ"]},
index=["a", "bb", "c", "ddd"],
)
expected = (
" a b\na 1 あ\n"
"bb 222 いいい\nc 33333 う\n"
"ddd 4 ええええええ"
)
assert repr(df) == expected
# all col
df = DataFrame(
{
"a": ["あああああ", "い", "う", "えええ"],
"b": ["あ", "いいい", "う", "ええええええ"],
},
index=["a", "bb", "c", "ddd"],
)
expected = (
" a b\na あああああ あ\n"
"bb い いいい\nc う う\n"
"ddd えええ ええええええ"
)
assert repr(df) == expected
# column name
df = DataFrame(
{
"b": ["あ", "いいい", "う", "ええええええ"],
"あああああ": [1, 222, 33333, 4],
},
index=["a", "bb", "c", "ddd"],
)
expected = (
" b あああああ\na あ 1\n"
"bb いいい 222\nc う 33333\n"
"ddd ええええええ 4"
)
assert repr(df) == expected
# index
df = DataFrame(
{
"a": ["あああああ", "い", "う", "えええ"],
"b": ["あ", "いいい", "う", "ええええええ"],
},
index=["あああ", "いいいいいい", "うう", "え"],
)
expected = (
" a b\nあああ あああああ あ\n"
"いいいいいい い いいい\nうう う う\n"
"え えええ ええええええ"
)
assert repr(df) == expected
# index name
df = DataFrame(
{
"a": ["あああああ", "い", "う", "えええ"],
"b": ["あ", "いいい", "う", "ええええええ"],
},
index=Index(["あ", "い", "うう", "え"], name="おおおお"),
)
expected = (
" a b\n"
"おおおお \n"
"あ あああああ あ\n"
"い い いいい\n"
"うう う う\n"
"え えええ ええええええ"
)
assert repr(df) == expected
# all
df = DataFrame(
{
"あああ": ["あああ", "い", "う", "えええええ"],
"いいいいい": ["あ", "いいい", "う", "ええ"],
},
index=Index(["あ", "いいい", "うう", "え"], name="お"),
)
expected = (
" あああ いいいいい\n"
"お \n"
"あ あああ あ\n"
"いいい い いいい\n"
"うう う う\n"
"え えええええ ええ"
)
assert repr(df) == expected
# MultiIndex
idx = MultiIndex.from_tuples(
[("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")]
)
df = DataFrame(
{
"a": ["あああああ", "い", "う", "えええ"],
"b": ["あ", "いいい", "う", "ええええええ"],
},
index=idx,
)
expected = (
" a b\n"
"あ いい あああああ あ\n"
"う え い いいい\n"
"おおお かかかか う う\n"
"き くく えええ ええええええ"
)
assert repr(df) == expected
# truncate
with option_context("display.max_rows", 3, "display.max_columns", 3):
df = DataFrame(
{
"a": ["あああああ", "い", "う", "えええ"],
"b": ["あ", "いいい", "う", "ええええええ"],
"c": ["お", "か", "ききき", "くくくくくく"],
"ああああ": ["さ", "し", "す", "せ"],
},
columns=["a", "b", "c", "ああああ"],
)
expected = (
" a ... ああああ\n0 あああああ ... さ\n"
".. ... ... ...\n3 えええ ... せ\n"
"\n[4 rows x 4 columns]"
)
assert repr(df) == expected
df.index = ["あああ", "いいいい", "う", "aaa"]
expected = (
" a ... ああああ\nあああ あああああ ... さ\n"
".. ... ... ...\naaa えええ ... せ\n"
"\n[4 rows x 4 columns]"
)
assert repr(df) == expected
def test_east_asian_unicode_true(self):
# Enable Unicode option -----------------------------------------
with option_context("display.unicode.east_asian_width", True):
# mid col
df = DataFrame(
{"a": ["あ", "いいい", "う", "ええええええ"], "b": [1, 222, 33333, 4]},
index=["a", "bb", "c", "ddd"],
)
expected = (
" a b\na あ 1\n"
"bb いいい 222\nc う 33333\n"
"ddd ええええええ 4"
)
assert repr(df) == expected
# last col
df = DataFrame(
{"a": [1, 222, 33333, 4], "b": ["あ", "いいい", "う", "ええええええ"]},
index=["a", "bb", "c", "ddd"],
)
expected = (
" a b\na 1 あ\n"
"bb 222 いいい\nc 33333 う\n"
"ddd 4 ええええええ"
)
assert repr(df) == expected
# all col
df = DataFrame(
{
"a": ["あああああ", "い", "う", "えええ"],
"b": ["あ", "いいい", "う", "ええええええ"],
},
index=["a", "bb", "c", "ddd"],
)
expected = (
" a b\n"
"a あああああ あ\n"
"bb い いいい\n"
"c う う\n"
"ddd えええ ええええええ"
)
assert repr(df) == expected
# column name
df = DataFrame(
{
"b": ["あ", "いいい", "う", "ええええええ"],
"あああああ": [1, 222, 33333, 4],
},
index=["a", "bb", "c", "ddd"],
)
expected = (
" b あああああ\n"
"a あ 1\n"
"bb いいい 222\n"
"c う 33333\n"
"ddd ええええええ 4"
)
assert repr(df) == expected
# index
df = DataFrame(
{
"a": ["あああああ", "い", "う", "えええ"],
"b": ["あ", "いいい", "う", "ええええええ"],
},
index=["あああ", "いいいいいい", "うう", "え"],
)
expected = (
" a b\n"
"あああ あああああ あ\n"
"いいいいいい い いいい\n"
"うう う う\n"
"え えええ ええええええ"
)
assert repr(df) == expected
# index name
df = DataFrame(
{
"a": ["あああああ", "い", "う", "えええ"],
"b": ["あ", "いいい", "う", "ええええええ"],
},
index=Index(["あ", "い", "うう", "え"], name="おおおお"),
)
expected = (
" a b\n"
"おおおお \n"
"あ あああああ あ\n"
"い い いいい\n"
"うう う う\n"
"え えええ ええええええ"
)
assert repr(df) == expected
# all
df = DataFrame(
{
"あああ": ["あああ", "い", "う", "えええええ"],
"いいいいい": ["あ", "いいい", "う", "ええ"],
},
index=Index(["あ", "いいい", "うう", "え"], name="お"),
)
expected = (
" あああ いいいいい\n"
"お \n"
"あ あああ あ\n"
"いいい い いいい\n"
"うう う う\n"
"え えええええ ええ"
)
assert repr(df) == expected
# MultiIndex
idx = MultiIndex.from_tuples(
[("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")]
)
df = DataFrame(
{
"a": ["あああああ", "い", "う", "えええ"],
"b": ["あ", "いいい", "う", "ええええええ"],
},
index=idx,
)
expected = (
" a b\n"
"あ いい あああああ あ\n"
"う え い いいい\n"
"おおお かかかか う う\n"
"き くく えええ ええええええ"
)
assert repr(df) == expected
# truncate
with option_context("display.max_rows", 3, "display.max_columns", 3):
df = DataFrame(
{
"a": ["あああああ", "い", "う", "えええ"],
"b": ["あ", "いいい", "う", "ええええええ"],
"c": ["お", "か", "ききき", "くくくくくく"],
"ああああ": ["さ", "し", "す", "せ"],
},
columns=["a", "b", "c", "ああああ"],
)
expected = (
" a ... ああああ\n"
"0 あああああ ... さ\n"
".. ... ... ...\n"
"3 えええ ... せ\n"
"\n[4 rows x 4 columns]"
)
assert repr(df) == expected
df.index = ["あああ", "いいいい", "う", "aaa"]
expected = (
" a ... ああああ\n"
"あああ あああああ ... さ\n"
"... ... ... ...\n"
"aaa えええ ... せ\n"
"\n[4 rows x 4 columns]"
)
assert repr(df) == expected
# ambiguous unicode
df = DataFrame(
{
"b": ["あ", "いいい", "¡¡", "ええええええ"],
"あああああ": [1, 222, 33333, 4],
},
index=["a", "bb", "c", "¡¡¡"],
)
expected = (
" b あああああ\n"
"a あ 1\n"
"bb いいい 222\n"
"c ¡¡ 33333\n"
"¡¡¡ ええええええ 4"
)
assert repr(df) == expected
def test_to_string_buffer_all_unicode(self):
buf = StringIO()
empty = DataFrame({"c/\u03c3": Series(dtype=object)})
nonempty = DataFrame({"c/\u03c3": Series([1, 2, 3])})
print(empty, file=buf)
print(nonempty, file=buf)
# this should work
buf.getvalue()
@pytest.mark.parametrize(
"index_scalar",
[
"a" * 10,
1,
Timestamp(2020, 1, 1),
pd.Period("2020-01-01"),
],
)
@pytest.mark.parametrize("h", [10, 20])
@pytest.mark.parametrize("w", [10, 20])
def test_to_string_truncate_indices(self, index_scalar, h, w):
with option_context("display.expand_frame_repr", False):
df = DataFrame(
index=[index_scalar] * h, columns=[str(i) * 10 for i in range(w)]
)
with option_context("display.max_rows", 15):
if h == 20:
assert has_vertically_truncated_repr(df)
else:
assert not has_vertically_truncated_repr(df)
with option_context("display.max_columns", 15):
if w == 20:
assert has_horizontally_truncated_repr(df)
else:
assert not has_horizontally_truncated_repr(df)
with option_context("display.max_rows", 15, "display.max_columns", 15):
if h == 20 and w == 20:
assert has_doubly_truncated_repr(df)
else:
assert not has_doubly_truncated_repr(df)
def test_to_string_truncate_multilevel(self):
arrays = [
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
df = DataFrame(index=arrays, columns=arrays)
with option_context("display.max_rows", 7, "display.max_columns", 7):
assert has_doubly_truncated_repr(df)
@pytest.mark.parametrize("dtype", ["object", "datetime64[us]"])
def test_truncate_with_different_dtypes(self, dtype):
# 11594, 12045
# when truncated the dtypes of the splits can differ
# 11594
ser = Series(
[datetime(2012, 1, 1)] * 10
+ [datetime(1012, 1, 2)]
+ [datetime(2012, 1, 3)] * 10,
dtype=dtype,
)
with option_context("display.max_rows", 8):
result = str(ser)
assert dtype in result
def test_truncate_with_different_dtypes2(self):
# 12045
df = DataFrame({"text": ["some words"] + [None] * 9}, dtype=object)
with option_context("display.max_rows", 8, "display.max_columns", 3):
result = str(df)
assert "None" in result
assert "NaN" not in result
def test_truncate_with_different_dtypes_multiindex(self):
# GH#13000
df = DataFrame({"Vals": range(100)})
frame = pd.concat([df], keys=["Sweep"], names=["Sweep", "Index"])
result = repr(frame)
result2 = repr(frame.iloc[:5])
assert result.startswith(result2)
def test_datetimelike_frame(self):
# GH 12211
df = DataFrame({"date": [Timestamp("20130101").tz_localize("UTC")] + [NaT] * 5})
with option_context("display.max_rows", 5):
result = str(df)
assert "2013-01-01 00:00:00+00:00" in result
assert "NaT" in result
assert "..." in result
assert "[6 rows x 1 columns]" in result
dts = [Timestamp("2011-01-01", tz="US/Eastern")] * 5 + [NaT] * 5
df = DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})
with option_context("display.max_rows", 5):
expected = (
" dt x\n"
"0 2011-01-01 00:00:00-05:00 1\n"
"1 2011-01-01 00:00:00-05:00 2\n"
".. ... ..\n"
"8 NaT 9\n"
"9 NaT 10\n\n"
"[10 rows x 2 columns]"
)
assert repr(df) == expected
dts = [NaT] * 5 + [Timestamp("2011-01-01", tz="US/Eastern")] * 5
df = DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})
with option_context("display.max_rows", 5):
expected = (
" dt x\n"
"0 NaT 1\n"
"1 NaT 2\n"
".. ... ..\n"
"8 2011-01-01 00:00:00-05:00 9\n"
"9 2011-01-01 00:00:00-05:00 10\n\n"
"[10 rows x 2 columns]"
)
assert repr(df) == expected
dts = [Timestamp("2011-01-01", tz="Asia/Tokyo")] * 5 + [
Timestamp("2011-01-01", tz="US/Eastern")
] * 5
df = DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})
with option_context("display.max_rows", 5):
expected = (
" dt x\n"
"0 2011-01-01 00:00:00+09:00 1\n"
"1 2011-01-01 00:00:00+09:00 2\n"
".. ... ..\n"
"8 2011-01-01 00:00:00-05:00 9\n"
"9 2011-01-01 00:00:00-05:00 10\n\n"
"[10 rows x 2 columns]"
)
assert repr(df) == expected
@pytest.mark.parametrize(
"start_date",
[
"2017-01-01 23:59:59.999999999",
"2017-01-01 23:59:59.99999999",
"2017-01-01 23:59:59.9999999",
"2017-01-01 23:59:59.999999",
"2017-01-01 23:59:59.99999",
"2017-01-01 23:59:59.9999",
],
)
def test_datetimeindex_highprecision(self, start_date):
# GH19030
# Check that high-precision time values for the end of day are
# included in repr for DatetimeIndex
df = DataFrame({"A": date_range(start=start_date, freq="D", periods=5)})
result = str(df)
assert start_date in result
dti = date_range(start=start_date, freq="D", periods=5)
df = DataFrame({"A": range(5)}, index=dti)
result = str(df.index)
assert start_date in result
def test_string_repr_encoding(self, datapath):
filepath = datapath("io", "parser", "data", "unicode_series.csv")
df = read_csv(filepath, header=None, encoding="latin1")
repr(df)
repr(df[1])
def test_repr_corner(self):
# representing infs poses no problems
df = DataFrame({"foo": [-np.inf, np.inf]})
repr(df)
def test_frame_info_encoding(self):
index = ["'Til There Was You (1997)", "ldum klaka (Cold Fever) (1994)"]
with option_context("display.max_rows", 1):
df = DataFrame(columns=["a", "b", "c"], index=index)
repr(df)
repr(df.T)
def test_wide_repr(self):
with option_context(
"mode.sim_interactive",
True,
"display.show_dimensions",
True,
"display.max_columns",
20,
):
max_cols = get_option("display.max_columns")
df = DataFrame([["a" * 25] * (max_cols - 1)] * 10)
with option_context("display.expand_frame_repr", False):
rep_str = repr(df)
assert f"10 rows x {max_cols - 1} columns" in rep_str
with option_context("display.expand_frame_repr", True):
wide_repr = repr(df)
assert rep_str != wide_repr
with option_context("display.width", 120):
wider_repr = repr(df)
assert len(wider_repr) < len(wide_repr)
def test_wide_repr_wide_columns(self):
with option_context("mode.sim_interactive", True, "display.max_columns", 20):
df = DataFrame(
np.random.default_rng(2).standard_normal((5, 3)),
columns=["a" * 90, "b" * 90, "c" * 90],
)
rep_str = repr(df)
assert len(rep_str.splitlines()) == 20
def test_wide_repr_named(self):
with option_context("mode.sim_interactive", True, "display.max_columns", 20):
max_cols = get_option("display.max_columns")
df = DataFrame([["a" * 25] * (max_cols - 1)] * 10)
df.index.name = "DataFrame Index"
with option_context("display.expand_frame_repr", False):
rep_str = repr(df)
with option_context("display.expand_frame_repr", True):
wide_repr = repr(df)
assert rep_str != wide_repr
with option_context("display.width", 150):
wider_repr = repr(df)
assert len(wider_repr) < len(wide_repr)
for line in wide_repr.splitlines()[1::13]:
assert "DataFrame Index" in line
def test_wide_repr_multiindex(self):
with option_context("mode.sim_interactive", True, "display.max_columns", 20):
midx = MultiIndex.from_arrays([["a" * 5] * 10] * 2)
max_cols = get_option("display.max_columns")
df = DataFrame([["a" * 25] * (max_cols - 1)] * 10, index=midx)
df.index.names = ["Level 0", "Level 1"]
with option_context("display.expand_frame_repr", False):
rep_str = repr(df)
with option_context("display.expand_frame_repr", True):
wide_repr = repr(df)
assert rep_str != wide_repr
with option_context("display.width", 150):
wider_repr = repr(df)
assert len(wider_repr) < len(wide_repr)
for line in wide_repr.splitlines()[1::13]:
assert "Level 0 Level 1" in line
def test_wide_repr_multiindex_cols(self):
with option_context("mode.sim_interactive", True, "display.max_columns", 20):
max_cols = get_option("display.max_columns")
midx = MultiIndex.from_arrays([["a" * 5] * 10] * 2)
mcols = MultiIndex.from_arrays([["b" * 3] * (max_cols - 1)] * 2)
df = DataFrame(
[["c" * 25] * (max_cols - 1)] * 10, index=midx, columns=mcols
)
df.index.names = ["Level 0", "Level 1"]
with option_context("display.expand_frame_repr", False):
rep_str = repr(df)
with option_context("display.expand_frame_repr", True):
wide_repr = repr(df)
assert rep_str != wide_repr
with option_context("display.width", 150, "display.max_columns", 20):
wider_repr = repr(df)
assert len(wider_repr) < len(wide_repr)
def test_wide_repr_unicode(self):
with option_context("mode.sim_interactive", True, "display.max_columns", 20):
max_cols = 20
df = DataFrame([["a" * 25] * 10] * (max_cols - 1))
with option_context("display.expand_frame_repr", False):
rep_str = repr(df)
with option_context("display.expand_frame_repr", True):
wide_repr = repr(df)
assert rep_str != wide_repr
with option_context("display.width", 150):
wider_repr = repr(df)
assert len(wider_repr) < len(wide_repr)
def test_wide_repr_wide_long_columns(self):
with option_context("mode.sim_interactive", True):
df = DataFrame({"a": ["a" * 30, "b" * 30], "b": ["c" * 70, "d" * 80]})
result = repr(df)
assert "ccccc" in result
assert "ddddd" in result
def test_long_series(self):
n = 1000
s = Series(
np.random.default_rng(2).integers(-50, 50, n),
index=[f"s{x:04d}" for x in range(n)],
dtype="int64",
)
str_rep = str(s)
nmatches = len(re.findall("dtype", str_rep))
assert nmatches == 1
def test_to_string_ascii_error(self):
data = [
(
"0 ",
" .gitignore ",
" 5 ",
" \xe2\x80\xa2\xe2\x80\xa2\xe2\x80\xa2\xe2\x80\xa2\xe2\x80\xa2",
)
]
df = DataFrame(data)
# it works!
repr(df)
def test_show_dimensions(self):
df = DataFrame(123, index=range(10, 15), columns=range(30))
with option_context(
"display.max_rows",
10,
"display.max_columns",
40,
"display.width",
500,
"display.expand_frame_repr",
"info",
"display.show_dimensions",
True,
):
assert "5 rows" in str(df)
assert "5 rows" in df._repr_html_()
with option_context(
"display.max_rows",
10,
"display.max_columns",
40,
"display.width",
500,
"display.expand_frame_repr",
"info",
"display.show_dimensions",
False,
):
assert "5 rows" not in str(df)
assert "5 rows" not in df._repr_html_()
with option_context(
"display.max_rows",
2,
"display.max_columns",
2,
"display.width",
500,
"display.expand_frame_repr",
"info",
"display.show_dimensions",
"truncate",
):
assert "5 rows" in str(df)
assert "5 rows" in df._repr_html_()
with option_context(
"display.max_rows",
10,
"display.max_columns",
40,
"display.width",
500,
"display.expand_frame_repr",
"info",
"display.show_dimensions",
"truncate",
):
assert "5 rows" not in str(df)
assert "5 rows" not in df._repr_html_()
def test_info_repr(self):
# GH#21746 For tests inside a terminal (i.e. not CI) we need to detect
# the terminal size to ensure that we try to print something "too big"
term_width, term_height = get_terminal_size()
max_rows = 60
max_cols = 20 + (max(term_width, 80) - 80) // 4
# Long
h, w = max_rows + 1, max_cols - 1
df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)})
assert has_vertically_truncated_repr(df)
with option_context("display.large_repr", "info"):
assert has_info_repr(df)
# Wide
h, w = max_rows - 1, max_cols + 1
df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)})
assert has_horizontally_truncated_repr(df)
with option_context(
"display.large_repr", "info", "display.max_columns", max_cols
):
assert has_info_repr(df)
def test_info_repr_max_cols(self):
# GH #6939
df = DataFrame(np.random.default_rng(2).standard_normal((10, 5)))
with option_context(
"display.large_repr",
"info",
"display.max_columns",
1,
"display.max_info_columns",
4,
):
assert has_non_verbose_info_repr(df)
with option_context(
"display.large_repr",
"info",
"display.max_columns",
1,
"display.max_info_columns",
5,
):
assert not has_non_verbose_info_repr(df)
# FIXME: don't leave commented-out
# test verbose overrides
# set_option('display.max_info_columns', 4) # exceeded
def test_pprint_pathological_object(self):
"""
If the test fails, it at least won't hang.
"""
class A:
def __getitem__(self, key):
return 3 # obviously simplified
df = DataFrame([A()])
repr(df) # just don't die
def test_float_trim_zeros(self):
vals = [
2.08430917305e10,
3.52205017305e10,
2.30674817305e10,
2.03954217305e10,
5.59897817305e10,
]
skip = True
for line in repr(DataFrame({"A": vals})).split("\n")[:-2]:
if line.startswith("dtype:"):
continue
if _three_digit_exp():
assert ("+010" in line) or skip
else:
assert ("+10" in line) or skip
skip = False
@pytest.mark.parametrize(
"data, expected",
[
(["3.50"], "0 3.50\ndtype: object"),
([1.20, "1.00"], "0 1.2\n1 1.00\ndtype: object"),
([np.nan], "0 NaN\ndtype: float64"),
([None], "0 None\ndtype: object"),
(["3.50", np.nan], "0 3.50\n1 NaN\ndtype: object"),
([3.50, np.nan], "0 3.5\n1 NaN\ndtype: float64"),
([3.50, np.nan, "3.50"], "0 3.5\n1 NaN\n2 3.50\ndtype: object"),
([3.50, None, "3.50"], "0 3.5\n1 None\n2 3.50\ndtype: object"),
],
)
def test_repr_str_float_truncation(self, data, expected, using_infer_string):
# GH#38708
series = Series(data, dtype=object if "3.50" in data else None)
result = repr(series)
assert result == expected
@pytest.mark.parametrize(
"float_format,expected",
[
("{:,.0f}".format, "0 1,000\n1 test\ndtype: object"),
("{:.4f}".format, "0 1000.0000\n1 test\ndtype: object"),
],
)
def test_repr_float_format_in_object_col(self, float_format, expected):
# GH#40024
df = Series([1000.0, "test"])
with option_context("display.float_format", float_format):
result = repr(df)
assert result == expected
def test_period(self):
# GH 12615
df = DataFrame(
{
"A": pd.period_range("2013-01", periods=4, freq="M"),
"B": [
pd.Period("2011-01", freq="M"),
pd.Period("2011-02-01", freq="D"),
pd.Period("2011-03-01 09:00", freq="h"),
pd.Period("2011-04", freq="M"),
],
"C": list("abcd"),
}
)
exp = (
" A B C\n"
"0 2013-01 2011-01 a\n"
"1 2013-02 2011-02-01 b\n"
"2 2013-03 2011-03-01 09:00 c\n"
"3 2013-04 2011-04 d"
)
assert str(df) == exp
@pytest.mark.parametrize(
"length, max_rows, min_rows, expected",
[
(10, 10, 10, 10),
(10, 10, None, 10),
(10, 8, None, 8),
(20, 30, 10, 30), # max_rows > len(frame), hence max_rows
(50, 30, 10, 10), # max_rows < len(frame), hence min_rows
(100, 60, 10, 10), # same
(60, 60, 10, 60), # edge case
(61, 60, 10, 10), # edge case
],
)
def test_max_rows_fitted(self, length, min_rows, max_rows, expected):
"""Check that display logic is correct.
GH #37359
See description here:
https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options
"""
formatter = fmt.DataFrameFormatter(
DataFrame(np.random.default_rng(2).random((length, 3))),
max_rows=max_rows,
min_rows=min_rows,
)
result = formatter.max_rows_fitted
assert result == expected
def gen_series_formatting():
s1 = Series(["a"] * 100)
s2 = Series(["ab"] * 100)
s3 = Series(["a", "ab", "abc", "abcd", "abcde", "abcdef"])
s4 = s3[::-1]
test_sers = {"onel": s1, "twol": s2, "asc": s3, "desc": s4}
return test_sers
class TestSeriesFormatting:
def test_freq_name_separation(self):
s = Series(
np.random.default_rng(2).standard_normal(10),
index=date_range("1/1/2000", periods=10),
name=0,
)
result = repr(s)
assert "Freq: D, Name: 0" in result
def test_unicode_name_in_footer(self):
s = Series([1, 2], name="\u05e2\u05d1\u05e8\u05d9\u05ea")
sf = fmt.SeriesFormatter(s, name="\u05e2\u05d1\u05e8\u05d9\u05ea")
sf._get_footer() # should not raise exception
@pytest.mark.xfail(
using_pyarrow_string_dtype(), reason="Fixup when arrow is default"
)
def test_east_asian_unicode_series(self):
# not aligned properly because of east asian width
# unicode index
s = Series(["a", "bb", "CCC", "D"], index=["あ", "いい", "ううう", "ええええ"])
expected = "".join(
[
"あ a\n",
"いい bb\n",
"ううう CCC\n",
"ええええ D\ndtype: object",
]
)
assert repr(s) == expected
# unicode values
s = Series(["あ", "いい", "ううう", "ええええ"], index=["a", "bb", "c", "ddd"])
expected = "".join(
[
"a あ\n",
"bb いい\n",
"c ううう\n",
"ddd ええええ\n",
"dtype: object",
]
)
assert repr(s) == expected
# both
s = Series(
["あ", "いい", "ううう", "ええええ"],
index=["ああ", "いいいい", "う", "えええ"],
)
expected = "".join(
[
"ああ あ\n",
"いいいい いい\n",
"う ううう\n",
"えええ ええええ\n",
"dtype: object",
]
)
assert repr(s) == expected
# unicode footer
s = Series(
["あ", "いい", "ううう", "ええええ"],
index=["ああ", "いいいい", "う", "えええ"],
name="おおおおおおお",
)
expected = (
"ああ あ\nいいいい いい\nう ううう\n"
"えええ ええええ\nName: おおおおおおお, dtype: object"
)
assert repr(s) == expected
# MultiIndex
idx = MultiIndex.from_tuples(
[("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")]
)
s = Series([1, 22, 3333, 44444], index=idx)
expected = (
"あ いい 1\n"
"う え 22\n"
"おおお かかかか 3333\n"
"き くく 44444\ndtype: int64"
)
assert repr(s) == expected
# object dtype, shorter than unicode repr
s = Series([1, 22, 3333, 44444], index=[1, "AB", np.nan, "あああ"])
expected = (
"1 1\nAB 22\nNaN 3333\nあああ 44444\ndtype: int64"
)
assert repr(s) == expected
# object dtype, longer than unicode repr
s = Series(
[1, 22, 3333, 44444], index=[1, "AB", Timestamp("2011-01-01"), "あああ"]
)
expected = (
"1 1\n"
"AB 22\n"
"2011-01-01 00:00:00 3333\n"
"あああ 44444\ndtype: int64"
)
assert repr(s) == expected
# truncate
with option_context("display.max_rows", 3):
s = Series(["あ", "いい", "ううう", "ええええ"], name="おおおおおおお")
expected = (
"0 あ\n ... \n"
"3 ええええ\n"
"Name: おおおおおおお, Length: 4, dtype: object"
)
assert repr(s) == expected
s.index = ["ああ", "いいいい", "う", "えええ"]
expected = (
"ああ あ\n ... \n"
"えええ ええええ\n"
"Name: おおおおおおお, Length: 4, dtype: object"
)
assert repr(s) == expected
# Enable Unicode option -----------------------------------------
with option_context("display.unicode.east_asian_width", True):
# unicode index
s = Series(
["a", "bb", "CCC", "D"],
index=["あ", "いい", "ううう", "ええええ"],
)
expected = (
"あ a\nいい bb\nううう CCC\n"
"ええええ D\ndtype: object"
)
assert repr(s) == expected
# unicode values
s = Series(
["あ", "いい", "ううう", "ええええ"],
index=["a", "bb", "c", "ddd"],
)
expected = (
"a あ\nbb いい\nc ううう\n"
"ddd ええええ\ndtype: object"
)
assert repr(s) == expected
# both
s = Series(
["あ", "いい", "ううう", "ええええ"],
index=["ああ", "いいいい", "う", "えええ"],
)
expected = (
"ああ あ\n"
"いいいい いい\n"
"う ううう\n"
"えええ ええええ\ndtype: object"
)
assert repr(s) == expected
# unicode footer
s = Series(
["あ", "いい", "ううう", "ええええ"],
index=["ああ", "いいいい", "う", "えええ"],
name="おおおおおおお",
)
expected = (
"ああ あ\n"
"いいいい いい\n"
"う ううう\n"
"えええ ええええ\n"
"Name: おおおおおおお, dtype: object"
)
assert repr(s) == expected
# MultiIndex
idx = MultiIndex.from_tuples(
[("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")]
)
s = Series([1, 22, 3333, 44444], index=idx)
expected = (
"あ いい 1\n"
"う え 22\n"
"おおお かかかか 3333\n"
"き くく 44444\n"
"dtype: int64"
)
assert repr(s) == expected
# object dtype, shorter than unicode repr
s = Series([1, 22, 3333, 44444], index=[1, "AB", np.nan, "あああ"])
expected = (
"1 1\nAB 22\nNaN 3333\n"
"あああ 44444\ndtype: int64"
)
assert repr(s) == expected
# object dtype, longer than unicode repr
s = Series(
[1, 22, 3333, 44444],
index=[1, "AB", Timestamp("2011-01-01"), "あああ"],
)
expected = (
"1 1\n"
"AB 22\n"
"2011-01-01 00:00:00 3333\n"
"あああ 44444\ndtype: int64"
)
assert repr(s) == expected
# truncate
with option_context("display.max_rows", 3):
s = Series(["あ", "いい", "ううう", "ええええ"], name="おおおおおおお")
expected = (
"0 あ\n ... \n"
"3 ええええ\n"
"Name: おおおおおおお, Length: 4, dtype: object"
)
assert repr(s) == expected
s.index = ["ああ", "いいいい", "う", "えええ"]
expected = (
"ああ あ\n"
" ... \n"
"えええ ええええ\n"
"Name: おおおおおおお, Length: 4, dtype: object"
)
assert repr(s) == expected
# ambiguous unicode
s = Series(
["¡¡", "い¡¡", "ううう", "ええええ"],
index=["ああ", "¡¡¡¡いい", "¡¡", "えええ"],
)
expected = (
"ああ ¡¡\n"
"¡¡¡¡いい い¡¡\n"
"¡¡ ううう\n"
"えええ ええええ\ndtype: object"
)
assert repr(s) == expected
def test_float_trim_zeros(self):
vals = [
2.08430917305e10,
3.52205017305e10,
2.30674817305e10,
2.03954217305e10,
5.59897817305e10,
]
for line in repr(Series(vals)).split("\n"):
if line.startswith("dtype:"):
continue
if _three_digit_exp():
assert "+010" in line
else:
assert "+10" in line
@pytest.mark.parametrize(
"start_date",
[
"2017-01-01 23:59:59.999999999",
"2017-01-01 23:59:59.99999999",
"2017-01-01 23:59:59.9999999",
"2017-01-01 23:59:59.999999",
"2017-01-01 23:59:59.99999",
"2017-01-01 23:59:59.9999",
],
)
def test_datetimeindex_highprecision(self, start_date):
# GH19030
# Check that high-precision time values for the end of day are
# included in repr for DatetimeIndex
s1 = Series(date_range(start=start_date, freq="D", periods=5))
result = str(s1)
assert start_date in result
dti = date_range(start=start_date, freq="D", periods=5)
s2 = Series(3, index=dti)
result = str(s2.index)
assert start_date in result
def test_mixed_datetime64(self):
df = DataFrame({"A": [1, 2], "B": ["2012-01-01", "2012-01-02"]})
df["B"] = pd.to_datetime(df.B)
result = repr(df.loc[0])
assert "2012-01-01" in result
def test_period(self):
# GH 12615
index = pd.period_range("2013-01", periods=6, freq="M")
s = Series(np.arange(6, dtype="int64"), index=index)
exp = (
"2013-01 0\n"
"2013-02 1\n"
"2013-03 2\n"
"2013-04 3\n"
"2013-05 4\n"
"2013-06 5\n"
"Freq: M, dtype: int64"
)
assert str(s) == exp
s = Series(index)
exp = (
"0 2013-01\n"
"1 2013-02\n"
"2 2013-03\n"
"3 2013-04\n"
"4 2013-05\n"
"5 2013-06\n"
"dtype: period[M]"
)
assert str(s) == exp
# periods with mixed freq
s = Series(
[
pd.Period("2011-01", freq="M"),
pd.Period("2011-02-01", freq="D"),
pd.Period("2011-03-01 09:00", freq="h"),
]
)
exp = (
"0 2011-01\n1 2011-02-01\n"
"2 2011-03-01 09:00\ndtype: object"
)
assert str(s) == exp
def test_max_multi_index_display(self):
# GH 7101
# doc example (indexing.rst)
# multi-index
arrays = [
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = list(zip(*arrays))
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
s = Series(np.random.default_rng(2).standard_normal(8), index=index)
with option_context("display.max_rows", 10):
assert len(str(s).split("\n")) == 10
with option_context("display.max_rows", 3):
assert len(str(s).split("\n")) == 5
with option_context("display.max_rows", 2):
assert len(str(s).split("\n")) == 5
with option_context("display.max_rows", 1):
assert len(str(s).split("\n")) == 4
with option_context("display.max_rows", 0):
assert len(str(s).split("\n")) == 10
# index
s = Series(np.random.default_rng(2).standard_normal(8), None)
with option_context("display.max_rows", 10):
assert len(str(s).split("\n")) == 9
with option_context("display.max_rows", 3):
assert len(str(s).split("\n")) == 4
with option_context("display.max_rows", 2):
assert len(str(s).split("\n")) == 4
with option_context("display.max_rows", 1):
assert len(str(s).split("\n")) == 3
with option_context("display.max_rows", 0):
assert len(str(s).split("\n")) == 9
# Make sure #8532 is fixed
def test_consistent_format(self):
s = Series([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.9999, 1, 1] * 10)
with option_context("display.max_rows", 10, "display.show_dimensions", False):
res = repr(s)
exp = (
"0 1.0000\n1 1.0000\n2 1.0000\n3 "
"1.0000\n4 1.0000\n ... \n125 "
"1.0000\n126 1.0000\n127 0.9999\n128 "
"1.0000\n129 1.0000\ndtype: float64"
)
assert res == exp
def chck_ncols(self, s):
lines = [
line for line in repr(s).split("\n") if not re.match(r"[^\.]*\.+", line)
][:-1]
ncolsizes = len({len(line.strip()) for line in lines})
assert ncolsizes == 1
@pytest.mark.xfail(
using_pyarrow_string_dtype(), reason="change when arrow is default"
)
def test_format_explicit(self):
test_sers = gen_series_formatting()
with option_context("display.max_rows", 4, "display.show_dimensions", False):
res = repr(test_sers["onel"])
exp = "0 a\n1 a\n ..\n98 a\n99 a\ndtype: object"
assert exp == res
res = repr(test_sers["twol"])
exp = "0 ab\n1 ab\n ..\n98 ab\n99 ab\ndtype: object"
assert exp == res
res = repr(test_sers["asc"])
exp = (
"0 a\n1 ab\n ... \n4 abcde\n5 "
"abcdef\ndtype: object"
)
assert exp == res
res = repr(test_sers["desc"])
exp = (
"5 abcdef\n4 abcde\n ... \n1 ab\n0 "
"a\ndtype: object"
)
assert exp == res
def test_ncols(self):
test_sers = gen_series_formatting()
for s in test_sers.values():
self.chck_ncols(s)
def test_max_rows_eq_one(self):
s = Series(range(10), dtype="int64")
with option_context("display.max_rows", 1):
strrepr = repr(s).split("\n")
exp1 = ["0", "0"]
res1 = strrepr[0].split()
assert exp1 == res1
exp2 = [".."]
res2 = strrepr[1].split()
assert exp2 == res2
def test_truncate_ndots(self):
def getndots(s):
return len(re.match(r"[^\.]*(\.*)", s).groups()[0])
s = Series([0, 2, 3, 6])
with option_context("display.max_rows", 2):
strrepr = repr(s).replace("\n", "")
assert getndots(strrepr) == 2
s = Series([0, 100, 200, 400])
with option_context("display.max_rows", 2):
strrepr = repr(s).replace("\n", "")
assert getndots(strrepr) == 3
def test_show_dimensions(self):
# gh-7117
s = Series(range(5))
assert "Length" not in repr(s)
with option_context("display.max_rows", 4):
assert "Length" in repr(s)
with option_context("display.show_dimensions", True):
assert "Length" in repr(s)
with option_context("display.max_rows", 4, "display.show_dimensions", False):
assert "Length" not in repr(s)
def test_repr_min_rows(self):
s = Series(range(20))
# default setting no truncation even if above min_rows
assert ".." not in repr(s)
s = Series(range(61))
# default of max_rows 60 triggers truncation if above
assert ".." in repr(s)
with option_context("display.max_rows", 10, "display.min_rows", 4):
# truncated after first two rows
assert ".." in repr(s)
assert "2 " not in repr(s)
with option_context("display.max_rows", 12, "display.min_rows", None):
# when set to None, follow value of max_rows
assert "5 5" in repr(s)
with option_context("display.max_rows", 10, "display.min_rows", 12):
# when set value higher as max_rows, use the minimum
assert "5 5" not in repr(s)
with option_context("display.max_rows", None, "display.min_rows", 12):
# max_rows of None -> never truncate
assert ".." not in repr(s)
class TestGenericArrayFormatter:
def test_1d_array(self):
# _GenericArrayFormatter is used on types for which there isn't a dedicated
# formatter. np.bool_ is one of those types.
obj = fmt._GenericArrayFormatter(np.array([True, False]))
res = obj.get_result()
assert len(res) == 2
# Results should be right-justified.
assert res[0] == " True"
assert res[1] == " False"
def test_2d_array(self):
obj = fmt._GenericArrayFormatter(np.array([[True, False], [False, True]]))
res = obj.get_result()
assert len(res) == 2
assert res[0] == " [True, False]"
assert res[1] == " [False, True]"
def test_3d_array(self):
obj = fmt._GenericArrayFormatter(
np.array([[[True, True], [False, False]], [[False, True], [True, False]]])
)
res = obj.get_result()
assert len(res) == 2
assert res[0] == " [[True, True], [False, False]]"
assert res[1] == " [[False, True], [True, False]]"
def test_2d_extension_type(self):
# GH 33770
# Define a stub extension type with just enough code to run Series.__repr__()
class DtypeStub(pd.api.extensions.ExtensionDtype):
@property
def type(self):
return np.ndarray
@property
def name(self):
return "DtypeStub"
class ExtTypeStub(pd.api.extensions.ExtensionArray):
def __len__(self) -> int:
return 2
def __getitem__(self, ix):
return [ix == 1, ix == 0]
@property
def dtype(self):
return DtypeStub()
series = Series(ExtTypeStub(), copy=False)
res = repr(series) # This line crashed before #33770 was fixed.
expected = "\n".join(
["0 [False True]", "1 [True False]", "dtype: DtypeStub"]
)
assert res == expected
def _three_digit_exp():
return f"{1.7e8:.4g}" == "1.7e+008"
class TestFloatArrayFormatter:
def test_misc(self):
obj = fmt.FloatArrayFormatter(np.array([], dtype=np.float64))
result = obj.get_result()
assert len(result) == 0
def test_format(self):
obj = fmt.FloatArrayFormatter(np.array([12, 0], dtype=np.float64))
result = obj.get_result()
assert result[0] == " 12.0"
assert result[1] == " 0.0"
def test_output_display_precision_trailing_zeroes(self):
# Issue #20359: trimming zeros while there is no decimal point
# Happens when display precision is set to zero
with option_context("display.precision", 0):
s = Series([840.0, 4200.0])
expected_output = "0 840\n1 4200\ndtype: float64"
assert str(s) == expected_output
@pytest.mark.parametrize(
"value,expected",
[
([9.4444], " 0\n0 9"),
([0.49], " 0\n0 5e-01"),
([10.9999], " 0\n0 11"),
([9.5444, 9.6], " 0\n0 10\n1 10"),
([0.46, 0.78, -9.9999], " 0\n0 5e-01\n1 8e-01\n2 -1e+01"),
],
)
def test_set_option_precision(self, value, expected):
# Issue #30122
# Precision was incorrectly shown
with option_context("display.precision", 0):
df_value = DataFrame(value)
assert str(df_value) == expected
def test_output_significant_digits(self):
# Issue #9764
# In case default display precision changes:
with option_context("display.precision", 6):
# DataFrame example from issue #9764
d = DataFrame(
{
"col1": [
9.999e-8,
1e-7,
1.0001e-7,
2e-7,
4.999e-7,
5e-7,
5.0001e-7,
6e-7,
9.999e-7,
1e-6,
1.0001e-6,
2e-6,
4.999e-6,
5e-6,
5.0001e-6,
6e-6,
]
}
)
expected_output = {
(0, 6): " col1\n"
"0 9.999000e-08\n"
"1 1.000000e-07\n"
"2 1.000100e-07\n"
"3 2.000000e-07\n"
"4 4.999000e-07\n"
"5 5.000000e-07",
(1, 6): " col1\n"
"1 1.000000e-07\n"
"2 1.000100e-07\n"
"3 2.000000e-07\n"
"4 4.999000e-07\n"
"5 5.000000e-07",
(1, 8): " col1\n"
"1 1.000000e-07\n"
"2 1.000100e-07\n"
"3 2.000000e-07\n"
"4 4.999000e-07\n"
"5 5.000000e-07\n"
"6 5.000100e-07\n"
"7 6.000000e-07",
(8, 16): " col1\n"
"8 9.999000e-07\n"
"9 1.000000e-06\n"
"10 1.000100e-06\n"
"11 2.000000e-06\n"
"12 4.999000e-06\n"
"13 5.000000e-06\n"
"14 5.000100e-06\n"
"15 6.000000e-06",
(9, 16): " col1\n"
"9 0.000001\n"
"10 0.000001\n"
"11 0.000002\n"
"12 0.000005\n"
"13 0.000005\n"
"14 0.000005\n"
"15 0.000006",
}
for (start, stop), v in expected_output.items():
assert str(d[start:stop]) == v
def test_too_long(self):
# GH 10451
with option_context("display.precision", 4):
# need both a number > 1e6 and something that normally formats to
# having length > display.precision + 6
df = DataFrame({"x": [12345.6789]})
assert str(df) == " x\n0 12345.6789"
df = DataFrame({"x": [2e6]})
assert str(df) == " x\n0 2000000.0"
df = DataFrame({"x": [12345.6789, 2e6]})
assert str(df) == " x\n0 1.2346e+04\n1 2.0000e+06"
class TestTimedelta64Formatter:
def test_days(self):
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values
result = fmt._Timedelta64Formatter(x).get_result()
assert result[0].strip() == "0 days"
assert result[1].strip() == "1 days"
result = fmt._Timedelta64Formatter(x[1:2]).get_result()
assert result[0].strip() == "1 days"
result = fmt._Timedelta64Formatter(x).get_result()
assert result[0].strip() == "0 days"
assert result[1].strip() == "1 days"
result = fmt._Timedelta64Formatter(x[1:2]).get_result()
assert result[0].strip() == "1 days"
def test_days_neg(self):
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values
result = fmt._Timedelta64Formatter(-x).get_result()
assert result[0].strip() == "0 days"
assert result[1].strip() == "-1 days"
def test_subdays(self):
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values
result = fmt._Timedelta64Formatter(y).get_result()
assert result[0].strip() == "0 days 00:00:00"
assert result[1].strip() == "0 days 00:00:01"
def test_subdays_neg(self):
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values
result = fmt._Timedelta64Formatter(-y).get_result()
assert result[0].strip() == "0 days 00:00:00"
assert result[1].strip() == "-1 days +23:59:59"
def test_zero(self):
x = pd.to_timedelta(list(range(1)) + [NaT], unit="D")._values
result = fmt._Timedelta64Formatter(x).get_result()
assert result[0].strip() == "0 days"
x = pd.to_timedelta(list(range(1)), unit="D")._values
result = fmt._Timedelta64Formatter(x).get_result()
assert result[0].strip() == "0 days"
class TestDatetime64Formatter:
def test_mixed(self):
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT])._values
result = fmt._Datetime64Formatter(x).get_result()
assert result[0].strip() == "2013-01-01 00:00:00"
assert result[1].strip() == "2013-01-01 12:00:00"
def test_dates(self):
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT])._values
result = fmt._Datetime64Formatter(x).get_result()
assert result[0].strip() == "2013-01-01"
assert result[1].strip() == "2013-01-02"
def test_date_nanos(self):
x = Series([Timestamp(200)])._values
result = fmt._Datetime64Formatter(x).get_result()
assert result[0].strip() == "1970-01-01 00:00:00.000000200"
def test_dates_display(self):
# 10170
# make sure that we are consistently display date formatting
x = Series(date_range("20130101 09:00:00", periods=5, freq="D"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-05 09:00:00"
x = Series(date_range("20130101 09:00:00", periods=5, freq="s"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:04"
x = Series(date_range("20130101 09:00:00", periods=5, freq="ms"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00.000"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:00.004"
x = Series(date_range("20130101 09:00:00", periods=5, freq="us"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00.000000"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:00.000004"
x = Series(date_range("20130101 09:00:00", periods=5, freq="ns"))
x.iloc[1] = np.nan
result = fmt._Datetime64Formatter(x._values).get_result()
assert result[0].strip() == "2013-01-01 09:00:00.000000000"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:00.000000004"
def test_datetime64formatter_yearmonth(self):
x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)])._values
def format_func(x):
return x.strftime("%Y-%m")
formatter = fmt._Datetime64Formatter(x, formatter=format_func)
result = formatter.get_result()
assert result == ["2016-01", "2016-02"]
def test_datetime64formatter_hoursecond(self):
x = Series(
pd.to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f")
)._values
def format_func(x):
return x.strftime("%H:%M")
formatter = fmt._Datetime64Formatter(x, formatter=format_func)
result = formatter.get_result()
assert result == ["10:10", "12:12"]
def test_datetime64formatter_tz_ms(self):
x = (
Series(
np.array(["2999-01-01", "2999-01-02", "NaT"], dtype="datetime64[ms]")
)
.dt.tz_localize("US/Pacific")
._values
)
result = fmt._Datetime64TZFormatter(x).get_result()
assert result[0].strip() == "2999-01-01 00:00:00-08:00"
assert result[1].strip() == "2999-01-02 00:00:00-08:00"
class TestFormatPercentiles:
@pytest.mark.parametrize(
"percentiles, expected",
[
(
[0.01999, 0.02001, 0.5, 0.666666, 0.9999],
["1.999%", "2.001%", "50%", "66.667%", "99.99%"],
),
(
[0, 0.5, 0.02001, 0.5, 0.666666, 0.9999],
["0%", "50%", "2.0%", "50%", "66.67%", "99.99%"],
),
([0.281, 0.29, 0.57, 0.58], ["28.1%", "29%", "57%", "58%"]),
([0.28, 0.29, 0.57, 0.58], ["28%", "29%", "57%", "58%"]),
(
[0.9, 0.99, 0.999, 0.9999, 0.99999],
["90%", "99%", "99.9%", "99.99%", "99.999%"],
),
],
)
def test_format_percentiles(self, percentiles, expected):
result = fmt.format_percentiles(percentiles)
assert result == expected
@pytest.mark.parametrize(
"percentiles",
[
([0.1, np.nan, 0.5]),
([-0.001, 0.1, 0.5]),
([2, 0.1, 0.5]),
([0.1, 0.5, "a"]),
],
)
def test_error_format_percentiles(self, percentiles):
msg = r"percentiles should all be in the interval \[0,1\]"
with pytest.raises(ValueError, match=msg):
fmt.format_percentiles(percentiles)
def test_format_percentiles_integer_idx(self):
# Issue #26660
result = fmt.format_percentiles(np.linspace(0, 1, 10 + 1))
expected = [
"0%",
"10%",
"20%",
"30%",
"40%",
"50%",
"60%",
"70%",
"80%",
"90%",
"100%",
]
assert result == expected
@pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"])
@pytest.mark.parametrize(
"encoding, data",
[(None, "abc"), ("utf-8", "abc"), ("gbk", "造成输出中文显示乱码"), ("foo", "abc")],
)
def test_filepath_or_buffer_arg(
method,
filepath_or_buffer,
assert_filepath_or_buffer_equals,
encoding,
data,
filepath_or_buffer_id,
):
df = DataFrame([data])
if method in ["to_latex"]: # uses styler implementation
pytest.importorskip("jinja2")
if filepath_or_buffer_id not in ["string", "pathlike"] and encoding is not None:
with pytest.raises(
ValueError, match="buf is not a file name and encoding is specified."
):
getattr(df, method)(buf=filepath_or_buffer, encoding=encoding)
elif encoding == "foo":
with pytest.raises(LookupError, match="unknown encoding"):
getattr(df, method)(buf=filepath_or_buffer, encoding=encoding)
else:
expected = getattr(df, method)()
getattr(df, method)(buf=filepath_or_buffer, encoding=encoding)
assert_filepath_or_buffer_equals(expected)
@pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"])
def test_filepath_or_buffer_bad_arg_raises(float_frame, method):
if method in ["to_latex"]: # uses styler implementation
pytest.importorskip("jinja2")
msg = "buf is not a file name and it has no write method"
with pytest.raises(TypeError, match=msg):
getattr(float_frame, method)(buf=object())
Sindbad File Manager Version 1.0, Coded By Sindbad EG ~ The Terrorists