Skip to content

Commit ce669d6

Browse files
committed
ENH: Add date_format keyword to to_csv()
DOC: add date_format to release notes
1 parent 6ee748e commit ce669d6

File tree

9 files changed

+142
-31
lines changed

9 files changed

+142
-31
lines changed

doc/source/release.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ New features
6060
- Clipboard functionality now works with PySide (:issue:`4282`)
6161
- New ``extract`` string method returns regex matches more conveniently (:issue:`4685`)
6262
- Auto-detect field widths in read_fwf when unspecified (:issue:`4488`)
63+
- ``to_csv()`` now outputs datetime objects according to a specified format string
64+
via the ``date_format`` keyword (:issue:`4313`)
65+
6366

6467
Experimental Features
6568
~~~~~~~~~~~~~~~~~~~~~

doc/source/v0.13.0.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ API changes
8787
and arithmetic flex methods (add, sub, mul, etc.). ``SparsePanel`` does not
8888
support ``pow`` or ``mod`` with non-scalars. (:issue:`3765`)
8989

90+
- ``to_csv`` now takes a ``date_format`` keyword argument that specifies how
91+
output datetime objects should be formatted. Datetimes encountered in the
92+
index, columns, and values will all have this formatting applied. (:issue:`4313`)
9093

9194
Prior Version Deprecations/Changes
9295
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

pandas/core/format.py

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import itertools
1919
import csv
2020

21-
from pandas.tseries.period import PeriodIndex
21+
from pandas.tseries.period import PeriodIndex, DatetimeIndex
2222

2323
docstring_to_string = """
2424
Parameters
@@ -850,7 +850,7 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
850850
cols=None, header=True, index=True, index_label=None,
851851
mode='w', nanRep=None, encoding=None, quoting=None,
852852
line_terminator='\n', chunksize=None, engine=None,
853-
tupleize_cols=False, quotechar='"'):
853+
tupleize_cols=False, quotechar='"', date_format=None):
854854

855855
self.engine = engine # remove for 0.13
856856
self.obj = obj
@@ -877,6 +877,8 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
877877

878878
self.line_terminator = line_terminator
879879

880+
self.date_format = date_format
881+
880882
#GH3457
881883
if not self.obj.columns.is_unique and engine == 'python':
882884
msg= "columns.is_unique == False not supported with engine='python'"
@@ -893,7 +895,8 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
893895

894896
if cols is not None:
895897
if isinstance(cols,Index):
896-
cols = cols.to_native_types(na_rep=na_rep,float_format=float_format)
898+
cols = cols.to_native_types(na_rep=na_rep,float_format=float_format,
899+
date_format=date_format)
897900
else:
898901
cols=list(cols)
899902
self.obj = self.obj.loc[:,cols]
@@ -902,7 +905,8 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
902905
# and make sure sure cols is just a list of labels
903906
cols = self.obj.columns
904907
if isinstance(cols,Index):
905-
cols = cols.to_native_types(na_rep=na_rep,float_format=float_format)
908+
cols = cols.to_native_types(na_rep=na_rep,float_format=float_format,
909+
date_format=date_format)
906910
else:
907911
cols=list(cols)
908912

@@ -923,6 +927,9 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
923927
if isinstance(obj.index, PeriodIndex):
924928
self.data_index = obj.index.to_timestamp()
925929

930+
if isinstance(self.data_index, DatetimeIndex) and date_format is not None:
931+
self.data_index = Index([x.strftime(date_format) if notnull(x) else '' for x in self.data_index])
932+
926933
self.nlevels = getattr(self.data_index, 'nlevels', 1)
927934
if not index:
928935
self.nlevels = 0
@@ -931,15 +938,10 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
931938
# invoked by df.to_csv(engine=python)
932939
def _helper_csv(self, writer, na_rep=None, cols=None,
933940
header=True, index=True,
934-
index_label=None, float_format=None):
941+
index_label=None, float_format=None, date_format=None):
935942
if cols is None:
936943
cols = self.columns
937944

938-
series = {}
939-
for k, v in compat.iteritems(self.obj._series):
940-
series[k] = v.values
941-
942-
943945
has_aliases = isinstance(header, (tuple, list, np.ndarray))
944946
if has_aliases or header:
945947
if index:
@@ -981,10 +983,34 @@ def _helper_csv(self, writer, na_rep=None, cols=None,
981983
encoded_cols = list(cols)
982984
writer.writerow(encoded_cols)
983985

986+
if date_format is None:
987+
date_formatter = lambda x: lib.Timestamp(x)._repr_base
988+
else:
989+
def strftime_with_nulls(x):
990+
x = lib.Timestamp(x)
991+
if notnull(x):
992+
return x.strftime(date_format)
993+
994+
date_formatter = lambda x: strftime_with_nulls(x)
995+
984996
data_index = self.obj.index
997+
985998
if isinstance(self.obj.index, PeriodIndex):
986999
data_index = self.obj.index.to_timestamp()
9871000

1001+
if isinstance(data_index, DatetimeIndex) and date_format is not None:
1002+
data_index = Index([date_formatter(x) for x in data_index])
1003+
1004+
values = self.obj.copy()
1005+
values.index = data_index
1006+
values.columns = values.columns.to_native_types(na_rep=na_rep,float_format=float_format,
1007+
date_format=date_format)
1008+
values = values[cols]
1009+
1010+
series = {}
1011+
for k, v in compat.iteritems(values._series):
1012+
series[k] = v.values
1013+
9881014
nlevels = getattr(data_index, 'nlevels', 1)
9891015
for j, idx in enumerate(data_index):
9901016
row_fields = []
@@ -1000,8 +1026,8 @@ def _helper_csv(self, writer, na_rep=None, cols=None,
10001026

10011027
if float_format is not None and com.is_float(val):
10021028
val = float_format % val
1003-
elif isinstance(val, np.datetime64):
1004-
val = lib.Timestamp(val)._repr_base
1029+
elif isinstance(val, (np.datetime64, lib.Timestamp)):
1030+
val = date_formatter(val)
10051031

10061032
row_fields.append(val)
10071033

@@ -1031,7 +1057,7 @@ def save(self):
10311057
self._helper_csv(self.writer, na_rep=self.na_rep,
10321058
float_format=self.float_format, cols=self.cols,
10331059
header=self.header, index=self.index,
1034-
index_label=self.index_label)
1060+
index_label=self.index_label, date_format=self.date_format)
10351061

10361062
else:
10371063
self._save()
@@ -1150,13 +1176,16 @@ def _save_chunk(self, start_i, end_i):
11501176
slicer = slice(start_i,end_i)
11511177
for i in range(len(self.blocks)):
11521178
b = self.blocks[i]
1153-
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
1179+
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep,
1180+
float_format=self.float_format, date_format=self.date_format)
1181+
11541182
for i, item in enumerate(b.items):
11551183

11561184
# self.data is a preallocated list
11571185
self.data[self.column_map[b][i]] = d[i]
11581186

1159-
ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
1187+
ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep,
1188+
float_format=self.float_format, date_format=self.date_format)
11601189

11611190
lib.write_csv_rows(self.data, ix, self.nlevels, self.cols, self.writer)
11621191

pandas/core/frame.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,7 +1030,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
10301030
cols=None, header=True, index=True, index_label=None,
10311031
mode='w', nanRep=None, encoding=None, quoting=None,
10321032
line_terminator='\n', chunksize=None,
1033-
tupleize_cols=False, **kwds):
1033+
tupleize_cols=False, date_format=None, **kwds):
10341034
r"""Write DataFrame to a comma-separated values (csv) file
10351035
10361036
Parameters
@@ -1073,6 +1073,8 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
10731073
tupleize_cols : boolean, default False
10741074
write multi_index columns as a list of tuples (if True)
10751075
or new (expanded format) if False)
1076+
date_format : string, default None
1077+
Format string for datetime objects.
10761078
"""
10771079
if nanRep is not None: # pragma: no cover
10781080
warnings.warn("nanRep is deprecated, use na_rep",
@@ -1088,7 +1090,8 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
10881090
index_label=index_label, mode=mode,
10891091
chunksize=chunksize, engine=kwds.get(
10901092
"engine"),
1091-
tupleize_cols=tupleize_cols)
1093+
tupleize_cols=tupleize_cols,
1094+
date_format=date_format)
10921095
formatter.save()
10931096

10941097
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',

pandas/core/internals.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
from pandas.tslib import Timestamp
2424
from pandas import compat
25-
from pandas.compat import range, lrange, lmap, callable, map, zip
25+
from pandas.compat import range, lrange, lmap, callable, map, zip, u
2626
from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type
2727

2828
class Block(PandasObject):
@@ -1396,7 +1396,7 @@ def fillna(self, value, inplace=False, downcast=None):
13961396
return [self if inplace else make_block(values, self.items,
13971397
self.ref_items, fastpath=True)]
13981398

1399-
def to_native_types(self, slicer=None, na_rep=None, **kwargs):
1399+
def to_native_types(self, slicer=None, na_rep=None, date_format=None, **kwargs):
14001400
""" convert to our native types format, slicing if desired """
14011401

14021402
values = self.values
@@ -1409,8 +1409,14 @@ def to_native_types(self, slicer=None, na_rep=None, **kwargs):
14091409
na_rep = 'NaT'
14101410
rvalues[mask] = na_rep
14111411
imask = (-mask).ravel()
1412-
rvalues.flat[imask] = np.array(
1413-
[Timestamp(val)._repr_base for val in values.ravel()[imask]], dtype=object)
1412+
1413+
if date_format is None:
1414+
date_formatter = lambda x: Timestamp(x)._repr_base
1415+
else:
1416+
date_formatter = lambda x: Timestamp(x).strftime(date_format)
1417+
1418+
rvalues.flat[imask] = np.array([date_formatter(val) for val in
1419+
values.ravel()[imask]], dtype=object)
14141420

14151421
return rvalues.tolist()
14161422

pandas/core/series.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2129,7 +2129,8 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
21292129

21302130
def to_csv(self, path, index=True, sep=",", na_rep='',
21312131
float_format=None, header=False,
2132-
index_label=None, mode='w', nanRep=None, encoding=None):
2132+
index_label=None, mode='w', nanRep=None, encoding=None,
2133+
date_format=None):
21332134
"""
21342135
Write Series to a comma-separated values (csv) file
21352136
@@ -2154,13 +2155,15 @@ def to_csv(self, path, index=True, sep=",", na_rep='',
21542155
encoding : string, optional
21552156
a string representing the encoding to use if the contents are
21562157
non-ascii, for python versions prior to 3
2158+
date_format: string, default None
2159+
Format string for datetime objects.
21572160
"""
21582161
from pandas.core.frame import DataFrame
21592162
df = DataFrame(self)
21602163
df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
21612164
float_format=float_format, header=header,
21622165
index_label=index_label, mode=mode, nanRep=nanRep,
2163-
encoding=encoding)
2166+
encoding=encoding, date_format=date_format)
21642167

21652168
def dropna(self):
21662169
"""

pandas/tests/test_frame.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11407,6 +11407,53 @@ def test_isin_with_string_scalar(self):
1140711407
with tm.assertRaises(TypeError):
1140811408
df.isin('aaa')
1140911409

11410+
def test_to_csv_date_format(self):
11411+
from pandas import to_datetime
11412+
pname = '__tmp_to_csv_date_format__'
11413+
with ensure_clean(pname) as path:
11414+
for engine in [None, 'python']:
11415+
dt_index = self.tsframe.index
11416+
datetime_frame = DataFrame({'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index)
11417+
11418+
datetime_frame.to_csv(path, date_format='%Y%m%d', engine=engine)
11419+
# Check that the data was put in the specified format
11420+
test = read_csv(path, index_col=0)
11421+
11422+
datetime_frame_int = datetime_frame.applymap(lambda x: int(x.strftime('%Y%m%d')))
11423+
datetime_frame_int.index = datetime_frame_int.index.map(lambda x: int(x.strftime('%Y%m%d')))
11424+
11425+
assert_frame_equal(test, datetime_frame_int)
11426+
11427+
datetime_frame.to_csv(path, date_format='%Y-%m-%d', engine=engine)
11428+
# Check that the data was put in the specified format
11429+
test = read_csv(path, index_col=0)
11430+
datetime_frame_str = datetime_frame.applymap(lambda x: x.strftime('%Y-%m-%d'))
11431+
datetime_frame_str.index = datetime_frame_str.index.map(lambda x: x.strftime('%Y-%m-%d'))
11432+
11433+
assert_frame_equal(test, datetime_frame_str)
11434+
11435+
# Check that columns get converted
11436+
datetime_frame_columns = datetime_frame.T
11437+
11438+
datetime_frame_columns.to_csv(path, date_format='%Y%m%d', engine=engine)
11439+
11440+
test = read_csv(path, index_col=0)
11441+
11442+
datetime_frame_columns = datetime_frame_columns.applymap(lambda x: int(x.strftime('%Y%m%d')))
11443+
# Columns don't get converted to ints by read_csv
11444+
datetime_frame_columns.columns = datetime_frame_columns.columns.map(lambda x: x.strftime('%Y%m%d'))
11445+
11446+
assert_frame_equal(test, datetime_frame_columns)
11447+
11448+
# test NaTs
11449+
nat_index = to_datetime(['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000'])
11450+
nat_frame = DataFrame({'A': nat_index}, index=nat_index)
11451+
11452+
nat_frame.to_csv(path, date_format='%Y-%m-%d', engine=engine)
11453+
11454+
test = read_csv(path, parse_dates=[0, 1], index_col=0)
11455+
11456+
assert_frame_equal(test, nat_frame)
1141011457

1141111458
def skip_if_no_ne(engine='numexpr'):
1141211459
if engine == 'numexpr':

pandas/tseries/index.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
import numpy as np
88

99
from pandas.core.common import (isnull, _NS_DTYPE, _INT64_DTYPE,
10-
is_list_like,_values_from_object, _maybe_box)
10+
is_list_like,_values_from_object, _maybe_box,
11+
notnull)
1112
from pandas.core.index import Index, Int64Index, _Identity
1213
import pandas.compat as compat
1314
from pandas.compat import u
@@ -599,23 +600,29 @@ def __contains__(self, key):
599600
def _format_with_header(self, header, **kwargs):
600601
return header + self._format_native_types(**kwargs)
601602

602-
def _format_native_types(self, na_rep=u('NaT'), **kwargs):
603+
def _format_native_types(self, na_rep=u('NaT'), date_format=None, **kwargs):
603604
data = list(self)
604605

605606
# tz formatter or time formatter
606607
zero_time = time(0, 0)
607-
for d in data:
608-
if d.time() != zero_time or d.tzinfo is not None:
609-
return [u('%s') % x for x in data]
608+
if date_format is None:
609+
for d in data:
610+
if d.time() != zero_time or d.tzinfo is not None:
611+
return [u('%s') % x for x in data]
610612

611613
values = np.array(data, dtype=object)
612614
mask = isnull(self.values)
613615
values[mask] = na_rep
614616

615617
imask = -mask
616-
values[imask] = np.array([u('%d-%.2d-%.2d') % (dt.year, dt.month,
617-
dt.day)
618-
for dt in values[imask]])
618+
619+
if date_format is None:
620+
date_formatter = lambda x: u('%d-%.2d-%.2d' % (x.year, x.month, x.day))
621+
else:
622+
date_formatter = lambda x: u(x.strftime(date_format))
623+
624+
values[imask] = np.array([date_formatter(dt) for dt in values[imask]])
625+
619626
return values.tolist()
620627

621628
def isin(self, values):

vb_suite/io_bench.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,13 @@ def create_cols(name):
8888
" parse_dates=['foo'])")
8989
read_parse_dates_iso8601 = Benchmark(stmt, setup,
9090
start_date=datetime(2012, 3, 1))
91+
92+
setup = common_setup + """
93+
rng = date_range('1/1/2000', periods=1000)
94+
data = DataFrame(rng, index=rng)
95+
"""
96+
97+
stmt = ("data.to_csv('__test__.csv', date_format='%Y%m%d')")
98+
99+
frame_to_csv_date_formatting = Benchmark(stmt, setup,
100+
start_date=datetime(2013, 9, 1))

0 commit comments

Comments
 (0)