-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
REF: dataframe formatters/outputs #36510
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 33 commits
c3568a2
837858f
08e899f
602c984
bd5cb87
6e8d4d8
cbd3c76
5c30924
af8fe98
6e9fb3c
878eed2
d87638b
1292be5
41553f6
a66ca5e
3fbe4ba
733fa34
bfb37d7
f1b494e
75daa74
6e39277
df3b5c6
5a18386
fc68fa5
19d2156
271ef5c
b1018ad
22d0982
94dbadd
914981b
482ccd1
7b57fc8
1e2969f
90977fd
fc7a091
1335a11
b67b481
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ | |
import csv as csvlib | ||
from io import StringIO, TextIOWrapper | ||
import os | ||
from typing import Any, Dict, Hashable, Iterator, List, Optional, Sequence, Union | ||
from typing import Any, Dict, Iterator, List, Optional, Sequence, Union | ||
|
||
import numpy as np | ||
|
||
|
@@ -29,19 +29,16 @@ | |
from pandas.core.indexes.api import Index | ||
|
||
from pandas.io.common import get_filepath_or_buffer, get_handle | ||
from pandas.io.formats.format import DataFrameFormatter, FloatFormatType | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if the imports are just for type checking, would prefer in a Saying that, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I implemented as you suggested. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep. that's how we do it elsewhere. all imports in pandas._typing are inside a TYPE_CHECKING block so should be safe. |
||
|
||
|
||
class CSVFormatter: | ||
def __init__( | ||
self, | ||
obj, | ||
formatter: DataFrameFormatter, | ||
path_or_buf: Optional[FilePathOrBuffer[str]] = None, | ||
sep: str = ",", | ||
na_rep: str = "", | ||
float_format: Optional[str] = None, | ||
cols: Optional[Sequence[Label]] = None, | ||
header: Union[bool, Sequence[Hashable]] = True, | ||
index: bool = True, | ||
index_label: Optional[IndexLabel] = None, | ||
mode: str = "w", | ||
encoding: Optional[str] = None, | ||
|
@@ -54,10 +51,11 @@ def __init__( | |
date_format: Optional[str] = None, | ||
doublequote: bool = True, | ||
escapechar: Optional[str] = None, | ||
decimal=".", | ||
storage_options: StorageOptions = None, | ||
): | ||
self.obj = obj | ||
self.fmt = formatter | ||
|
||
self.obj = self.fmt.frame | ||
|
||
self.encoding = encoding or "utf-8" | ||
|
||
|
@@ -79,35 +77,45 @@ def __init__( | |
self.mode = ioargs.mode | ||
|
||
self.sep = sep | ||
self.na_rep = na_rep | ||
self.float_format = float_format | ||
self.decimal = decimal | ||
self.header = header | ||
self.index = index | ||
self.index_label = index_label | ||
self.index_label = self._initialize_index_label(index_label) | ||
self.errors = errors | ||
self.quoting = quoting or csvlib.QUOTE_MINIMAL | ||
self.quotechar = quotechar | ||
self.quotechar = self._initialize_quotechar(quotechar) | ||
self.doublequote = doublequote | ||
self.escapechar = escapechar | ||
self.line_terminator = line_terminator or os.linesep | ||
self.date_format = date_format | ||
self.cols = cols # type: ignore[assignment] | ||
self.chunksize = chunksize # type: ignore[assignment] | ||
self.cols = self._initialize_columns(cols) | ||
self.chunksize = self._initialize_chunksize(chunksize) | ||
|
||
@property | ||
def na_rep(self) -> str: | ||
return self.fmt.na_rep | ||
|
||
@property | ||
def float_format(self) -> Optional[FloatFormatType]: | ||
return self.fmt.float_format | ||
|
||
@property | ||
def index_label(self) -> IndexLabel: | ||
return self._index_label | ||
def decimal(self) -> str: | ||
return self.fmt.decimal | ||
|
||
@index_label.setter | ||
def index_label(self, index_label: Optional[IndexLabel]) -> None: | ||
@property | ||
def header(self) -> Union[bool, Sequence[str]]: | ||
return self.fmt.header | ||
|
||
@property | ||
def index(self) -> bool: | ||
return self.fmt.index | ||
|
||
def _initialize_index_label(self, index_label: Optional[IndexLabel]) -> IndexLabel: | ||
if index_label is not False: | ||
if index_label is None: | ||
index_label = self._get_index_label_from_obj() | ||
return self._get_index_label_from_obj() | ||
elif not isinstance(index_label, (list, tuple, np.ndarray, ABCIndexClass)): | ||
# given a string for a DF with Index | ||
index_label = [index_label] | ||
self._index_label = index_label | ||
return [index_label] | ||
return index_label | ||
|
||
def _get_index_label_from_obj(self) -> List[str]: | ||
if isinstance(self.obj.index, ABCMultiIndex): | ||
|
@@ -122,30 +130,17 @@ def _get_index_label_flat(self) -> List[str]: | |
index_label = self.obj.index.name | ||
return [""] if index_label is None else [index_label] | ||
|
||
@property | ||
def quotechar(self) -> Optional[str]: | ||
def _initialize_quotechar(self, quotechar: Optional[str]) -> Optional[str]: | ||
if self.quoting != csvlib.QUOTE_NONE: | ||
# prevents crash in _csv | ||
return self._quotechar | ||
return quotechar | ||
return None | ||
|
||
@quotechar.setter | ||
def quotechar(self, quotechar: Optional[str]) -> None: | ||
self._quotechar = quotechar | ||
|
||
@property | ||
def has_mi_columns(self) -> bool: | ||
return bool(isinstance(self.obj.columns, ABCMultiIndex)) | ||
|
||
@property | ||
def cols(self) -> Sequence[Label]: | ||
return self._cols | ||
|
||
@cols.setter | ||
def cols(self, cols: Optional[Sequence[Label]]) -> None: | ||
self._cols = self._refine_cols(cols) | ||
|
||
def _refine_cols(self, cols: Optional[Sequence[Label]]) -> Sequence[Label]: | ||
def _initialize_columns(self, cols: Optional[Sequence[Label]]) -> Sequence[Label]: | ||
# validate mi options | ||
if self.has_mi_columns: | ||
if cols is not None: | ||
|
@@ -161,12 +156,16 @@ def _refine_cols(self, cols: Optional[Sequence[Label]]) -> Sequence[Label]: | |
|
||
# update columns to include possible multiplicity of dupes | ||
# and make sure sure cols is just a list of labels | ||
cols = self.obj.columns | ||
if isinstance(cols, ABCIndexClass): | ||
return cols._format_native_types(**self._number_format) | ||
new_cols = self.obj.columns | ||
if isinstance(new_cols, ABCIndexClass): | ||
return new_cols._format_native_types(**self._number_format) | ||
else: | ||
assert isinstance(cols, Sequence) | ||
return list(cols) | ||
return list(new_cols) | ||
|
||
def _initialize_chunksize(self, chunksize: Optional[int]) -> int: | ||
if chunksize is None: | ||
return (100000 // (len(self.cols) or 1)) or 1 | ||
return int(chunksize) | ||
|
||
@property | ||
def _number_format(self) -> Dict[str, Any]: | ||
|
@@ -179,17 +178,6 @@ def _number_format(self) -> Dict[str, Any]: | |
decimal=self.decimal, | ||
) | ||
|
||
@property | ||
def chunksize(self) -> int: | ||
return self._chunksize | ||
|
||
@chunksize.setter | ||
def chunksize(self, chunksize: Optional[int]) -> None: | ||
if chunksize is None: | ||
chunksize = (100000 // (len(self.cols) or 1)) or 1 | ||
assert chunksize is not None | ||
self._chunksize = int(chunksize) | ||
|
||
@property | ||
def data_index(self) -> Index: | ||
data_index = self.obj.index | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this TODO still needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know what it actually means here... :)