Skip to content

Commit ccb98a3

Browse files
committed
Merge branch 'master' into feature/groupby-repr-ellipses-1135
2 parents 29c6263 + f0ba498 commit ccb98a3

File tree

130 files changed

+770
-773
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+770
-773
lines changed

doc/source/whatsnew/v0.25.0.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ I/O
354354
- Bug in :meth:`DataFrame.to_string` and :meth:`DataFrame.to_latex` that would lead to incorrect output when the ``header`` keyword is used (:issue:`16718`)
355355
- Bug in :func:`read_csv` not properly interpreting the UTF8 encoded filenames on Windows on Python 3.6+ (:issue:`15086`)
356356
- Improved performance in :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` when converting columns that have missing values (:issue:`25772`)
357-
357+
- Bug in ``read_csv`` which would not raise ``ValueError`` if a column index in ``usecols`` was out of bounds (:issue:`25623`)
358358

359359
Plotting
360360
^^^^^^^^
@@ -373,6 +373,7 @@ Groupby/Resample/Rolling
373373
- Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`)
374374
- Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`)
375375
- Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`)
376+
- Bug in :func:`idxmax` and :func:`idxmin` on :meth:`DataFrame.groupby` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`)
376377

377378

378379
Reshaping
@@ -396,7 +397,7 @@ Sparse
396397
Other
397398
^^^^^
398399

399-
-
400+
- Bug in :class:`Series` and :class:`DataFrame` repr where ``np.datetime64('NaT')`` and ``np.timedelta64('NaT')`` with ``dtype=object`` would be represented as ``NaN`` (:issue:`25445`)
400401
-
401402
-
402403

pandas/_libs/hashtable_class_helper.pxi.in

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
99
# VectorData
1010
# ----------------------------------------------------------------------
1111

12+
from pandas._libs.tslibs.util cimport get_c_string
13+
1214
{{py:
1315

1416
# name, dtype, arg
@@ -595,7 +597,7 @@ cdef class StringHashTable(HashTable):
595597
cdef:
596598
khiter_t k
597599
const char *v
598-
v = util.get_c_string(val)
600+
v = get_c_string(val)
599601

600602
k = kh_get_str(self.table, v)
601603
if k != self.table.n_buckets:
@@ -609,7 +611,7 @@ cdef class StringHashTable(HashTable):
609611
int ret = 0
610612
const char *v
611613

612-
v = util.get_c_string(val)
614+
v = get_c_string(val)
613615

614616
k = kh_put_str(self.table, v, &ret)
615617
self.table.keys[k] = key
@@ -632,7 +634,7 @@ cdef class StringHashTable(HashTable):
632634
vecs = <const char **>malloc(n * sizeof(char *))
633635
for i in range(n):
634636
val = values[i]
635-
v = util.get_c_string(val)
637+
v = get_c_string(val)
636638
vecs[i] = v
637639

638640
with nogil:
@@ -662,9 +664,9 @@ cdef class StringHashTable(HashTable):
662664
val = values[i]
663665

664666
if isinstance(val, (str, unicode)):
665-
v = util.get_c_string(val)
667+
v = get_c_string(val)
666668
else:
667-
v = util.get_c_string(self.na_string_sentinel)
669+
v = get_c_string(self.na_string_sentinel)
668670
vecs[i] = v
669671

670672
with nogil:
@@ -695,9 +697,9 @@ cdef class StringHashTable(HashTable):
695697
val = values[i]
696698

697699
if isinstance(val, (str, unicode)):
698-
v = util.get_c_string(val)
700+
v = get_c_string(val)
699701
else:
700-
v = util.get_c_string(self.na_string_sentinel)
702+
v = get_c_string(self.na_string_sentinel)
701703
vecs[i] = v
702704

703705
with nogil:
@@ -776,7 +778,7 @@ cdef class StringHashTable(HashTable):
776778
labels[i] = na_sentinel
777779
else:
778780
# if ignore_na is False, we also stringify NaN/None/etc.
779-
v = util.get_c_string(val)
781+
v = get_c_string(val)
780782
vecs[i] = v
781783

782784
# compute

pandas/_libs/parsers.pyx

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -592,8 +592,7 @@ cdef class TextReader:
592592
if not QUOTE_MINIMAL <= quoting <= QUOTE_NONE:
593593
raise TypeError('bad "quoting" value')
594594

595-
if not isinstance(quote_char, (str, compat.text_type,
596-
bytes)) and quote_char is not None:
595+
if not isinstance(quote_char, (str, bytes)) and quote_char is not None:
597596
dtype = type(quote_char).__name__
598597
raise TypeError('"quotechar" must be string, '
599598
'not {dtype}'.format(dtype=dtype))
@@ -2123,7 +2122,7 @@ cdef raise_parser_error(object base, parser_t *parser):
21232122

21242123
# PyErr_Fetch only returned the error message in *value,
21252124
# so the Exception class must be extracted from *type.
2126-
if isinstance(old_exc, compat.string_types):
2125+
if isinstance(old_exc, str):
21272126
if type != NULL:
21282127
exc_type = <object>type
21292128
else:

pandas/_libs/testing.pyx

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import numpy as np
22

3-
from pandas import compat
43
from pandas.core.dtypes.missing import isna, array_equivalent
54
from pandas.core.dtypes.common import is_dtype_equal
65

@@ -108,8 +107,7 @@ cpdef assert_almost_equal(a, b,
108107
if isinstance(a, dict) or isinstance(b, dict):
109108
return assert_dict_equal(a, b)
110109

111-
if (isinstance(a, compat.string_types) or
112-
isinstance(b, compat.string_types)):
110+
if isinstance(a, str) or isinstance(b, str):
113111
assert a == b, "%r != %r" % (a, b)
114112
return True
115113

pandas/_libs/tslibs/nattype.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ cdef class _NaT(datetime):
229229

230230
def total_seconds(self):
231231
"""
232-
Total duration of timedelta in seconds (to ns precision)
232+
Total duration of timedelta in seconds (to ns precision).
233233
"""
234234
# GH#10939
235235
return np.nan

pandas/_libs/tslibs/np_datetime.pyx

Lines changed: 8 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3-
from cpython cimport (Py_EQ, Py_NE, Py_GE, Py_GT, Py_LT, Py_LE,
4-
PyUnicode_AsASCIIString)
3+
from cpython cimport Py_EQ, Py_NE, Py_GE, Py_GT, Py_LT, Py_LE
54

65
from cpython.datetime cimport (datetime, date,
76
PyDateTime_IMPORT,
@@ -13,6 +12,7 @@ from cpython.datetime cimport (datetime, date,
1312
PyDateTime_IMPORT
1413

1514
from numpy cimport int64_t
15+
from pandas._libs.tslibs.util cimport get_c_string_buf_and_size
1616

1717
cdef extern from "src/datetime/np_datetime.h":
1818
int cmp_npy_datetimestruct(npy_datetimestruct *a,
@@ -33,7 +33,7 @@ cdef extern from "src/datetime/np_datetime.h":
3333
npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS
3434

3535
cdef extern from "src/datetime/np_datetime_strings.h":
36-
int parse_iso_8601_datetime(char *str, int len,
36+
int parse_iso_8601_datetime(const char *str, int len,
3737
npy_datetimestruct *out,
3838
int *out_local, int *out_tzoffset)
3939

@@ -174,30 +174,9 @@ cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts):
174174
cdef inline int _string_to_dts(object val, npy_datetimestruct* dts,
175175
int* out_local, int* out_tzoffset) except? -1:
176176
cdef:
177-
int result
178-
char *tmp
177+
Py_ssize_t length
178+
const char* buf
179179

180-
if isinstance(val, unicode):
181-
val = PyUnicode_AsASCIIString(val)
182-
183-
tmp = val
184-
result = _cstring_to_dts(tmp, len(val), dts, out_local, out_tzoffset)
185-
186-
if result == -1:
187-
raise ValueError('Unable to parse %s' % str(val))
188-
return result
189-
190-
191-
cdef inline int _cstring_to_dts(char *val, int length,
192-
npy_datetimestruct* dts,
193-
int* out_local, int* out_tzoffset) except? -1:
194-
# Note: without this "extra layer" between _string_to_dts
195-
# and parse_iso_8601_datetime, calling _string_to_dts raises
196-
# `SystemError: <class 'str'> returned a result with an error set`
197-
# in Python3
198-
cdef:
199-
int result
200-
201-
result = parse_iso_8601_datetime(val, length,
202-
dts, out_local, out_tzoffset)
203-
return result
180+
buf = get_c_string_buf_and_size(val, &length)
181+
return parse_iso_8601_datetime(buf, length,
182+
dts, out_local, out_tzoffset)

pandas/_libs/tslibs/parsing.pyx

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ from cpython.datetime cimport datetime
1212
import numpy as np
1313

1414
import six
15-
from six import binary_type, text_type
1615

1716
# Avoid import from outside _libs
1817
if sys.version_info.major == 2:
@@ -102,7 +101,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
102101
103102
Parameters
104103
----------
105-
arg : compat.string_types
104+
arg : str
106105
freq : str or DateOffset, default None
107106
Helps with interpreting time string if supplied
108107
dayfirst : bool, default None
@@ -537,13 +536,13 @@ class _timelex(object):
537536
if six.PY2:
538537
# In Python 2, we can't duck type properly because unicode has
539538
# a 'decode' function, and we'd be double-decoding
540-
if isinstance(instream, (binary_type, bytearray)):
539+
if isinstance(instream, (bytes, bytearray)):
541540
instream = instream.decode()
542541
else:
543542
if getattr(instream, 'decode', None) is not None:
544543
instream = instream.decode()
545544

546-
if isinstance(instream, text_type):
545+
if isinstance(instream, str):
547546
self.stream = instream
548547
elif getattr(instream, 'read', None) is None:
549548
raise TypeError(

pandas/_libs/tslibs/period.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2390,7 +2390,7 @@ class Period(_Period):
23902390
23912391
Parameters
23922392
----------
2393-
value : Period or compat.string_types, default None
2393+
value : Period or str, default None
23942394
The time period represented (e.g., '4Q2005')
23952395
freq : str, default None
23962396
One of pandas period strings or corresponding objects

pandas/_libs/tslibs/src/datetime/np_datetime_strings.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,13 @@ This file implements string parsing and creation for NumPy datetime.
6666
*
6767
* Returns 0 on success, -1 on failure.
6868
*/
69-
int parse_iso_8601_datetime(char *str, int len,
69+
int parse_iso_8601_datetime(const char *str, int len,
7070
npy_datetimestruct *out,
7171
int *out_local, int *out_tzoffset) {
7272
int year_leap = 0;
7373
int i, numdigits;
74-
char *substr, sublen;
74+
const char *substr;
75+
int sublen;
7576

7677
/* If year-month-day are separated by a valid separator,
7778
* months/days without leading zeroes will be parsed
@@ -586,7 +587,8 @@ int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) {
586587
*/
587588
int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
588589
NPY_DATETIMEUNIT base) {
589-
char *substr = outstr, sublen = outlen;
590+
char *substr = outstr;
591+
int sublen = outlen;
590592
int tmplen;
591593

592594
/*

pandas/_libs/tslibs/src/datetime/np_datetime_strings.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ This file implements string parsing and creation for NumPy datetime.
5454
* Returns 0 on success, -1 on failure.
5555
*/
5656
int
57-
parse_iso_8601_datetime(char *str, int len,
57+
parse_iso_8601_datetime(const char *str, int len,
5858
npy_datetimestruct *out,
5959
int *out_local,
6060
int *out_tzoffset);

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -815,13 +815,29 @@ cdef class _Timedelta(timedelta):
815815

816816
cpdef timedelta to_pytimedelta(_Timedelta self):
817817
"""
818-
return an actual datetime.timedelta object
819-
note: we lose nanosecond resolution if any
818+
Convert a pandas Timedelta object into a python timedelta object.
819+
820+
Timedelta objects are internally saved as numpy datetime64[ns] dtype.
821+
Use to_pytimedelta() to convert to object dtype.
822+
823+
Returns
824+
-------
825+
datetime.timedelta or numpy.array of datetime.timedelta
826+
827+
See Also
828+
--------
829+
to_timedelta : Convert argument to Timedelta type.
830+
831+
Notes
832+
-----
833+
Any nanosecond resolution will be lost.
820834
"""
821835
return timedelta(microseconds=int(self.value) / 1000)
822836

823837
def to_timedelta64(self):
824-
""" Returns a numpy.timedelta64 object with 'ns' precision """
838+
"""
839+
Return a numpy.timedelta64 object with 'ns' precision.
840+
"""
825841
return np.timedelta64(self.value, 'ns')
826842

827843
def to_numpy(self, dtype=None, copy=False):
@@ -846,17 +862,21 @@ cdef class _Timedelta(timedelta):
846862

847863
def total_seconds(self):
848864
"""
849-
Total duration of timedelta in seconds (to ns precision)
865+
Total duration of timedelta in seconds (to ns precision).
850866
"""
851867
return self.value / 1e9
852868

853869
def view(self, dtype):
854-
""" array view compat """
870+
"""
871+
Array view compatibility.
872+
"""
855873
return np.timedelta64(self.value).view(dtype)
856874

857875
@property
858876
def components(self):
859-
""" Return a Components NamedTuple-like """
877+
"""
878+
Return a components namedtuple-like.
879+
"""
860880
self._ensure_components()
861881
# return the named tuple
862882
return Components(self._d, self._h, self._m, self._s,
@@ -1157,6 +1177,7 @@ class Timedelta(_Timedelta):
11571177
-----
11581178
The ``.value`` attribute is always in ns.
11591179
"""
1180+
11601181
def __new__(cls, object value=_no_input, unit=None, **kwargs):
11611182
cdef _Timedelta td_base
11621183

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,8 @@ cdef class _Timestamp(datetime):
539539

540540

541541
class Timestamp(_Timestamp):
542-
"""Pandas replacement for datetime.datetime
542+
"""
543+
Pandas replacement for python datetime.datetime object.
543544
544545
Timestamp is the pandas equivalent of python's Datetime
545546
and is interchangeable with it in most cases. It's the type used
@@ -549,9 +550,9 @@ class Timestamp(_Timestamp):
549550
Parameters
550551
----------
551552
ts_input : datetime-like, str, int, float
552-
Value to be converted to Timestamp
553+
Value to be converted to Timestamp.
553554
freq : str, DateOffset
554-
Offset which Timestamp will have
555+
Offset which Timestamp will have.
555556
tz : str, pytz.timezone, dateutil.tz.tzfile or None
556557
Time zone for time which Timestamp will have.
557558
unit : str

0 commit comments

Comments
 (0)