Skip to content

Separate parsing functions out from tslib #17363

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Sep 26, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pandas/_libs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ from tslibs.timezones cimport (
is_utc, is_tzlocal, get_utcoffset, get_dst_info, maybe_get_tz)
from tslib cimport _nat_scalar_rules

from tslibs.parsing import parse_time_string, NAT_SENTINEL
from tslibs.frequencies cimport get_freq_code

from pandas.tseries import offsets
from pandas.core.tools.datetimes import parse_time_string
from pandas.tseries import frequencies

cdef int64_t NPY_NAT = util.get_nat()
Expand Down Expand Up @@ -1197,6 +1197,8 @@ class Period(_Period):
value = str(value)
value = value.upper()
dt, _, reso = parse_time_string(value, freq)
if dt is NAT_SENTINEL:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this change?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tslibs.parsing does not have NaT in the namespace, so it returns NAT_SENTINEL in places where it otherwise would return NaT. That should be wrapped in tslib, will update.

ordinal = iNaT

if freq is None:
try:
Expand Down
159 changes: 0 additions & 159 deletions pandas/_libs/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1384,165 +1384,6 @@ def convert_sql_column(x):
return maybe_convert_objects(x, try_float=1)


def try_parse_dates(ndarray[object] values, parser=None,
dayfirst=False, default=None):
cdef:
Py_ssize_t i, n
ndarray[object] result

n = len(values)
result = np.empty(n, dtype='O')

if parser is None:
if default is None: # GH2618
date=datetime.now()
default=datetime(date.year, date.month, 1)

try:
from dateutil.parser import parse
parse_date = lambda x: parse(x, dayfirst=dayfirst, default=default)
except ImportError: # pragma: no cover
def parse_date(s):
try:
return datetime.strptime(s, '%m/%d/%Y')
except Exception:
return s
# EAFP here
try:
for i from 0 <= i < n:
if values[i] == '':
result[i] = np.nan
else:
result[i] = parse_date(values[i])
except Exception:
# failed
return values
else:
parse_date = parser

try:
for i from 0 <= i < n:
if values[i] == '':
result[i] = np.nan
else:
result[i] = parse_date(values[i])
except Exception:
# raise if passed parser and it failed
raise

return result


def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
date_parser=None, time_parser=None,
dayfirst=False, default=None):
cdef:
Py_ssize_t i, n
ndarray[object] result

from datetime import date, time, datetime, timedelta

n = len(dates)
if len(times) != n:
raise ValueError('Length of dates and times must be equal')
result = np.empty(n, dtype='O')

if date_parser is None:
if default is None: # GH2618
date=datetime.now()
default=datetime(date.year, date.month, 1)

try:
from dateutil.parser import parse
parse_date = lambda x: parse(x, dayfirst=dayfirst, default=default)
except ImportError: # pragma: no cover
def parse_date(s):
try:
return date.strptime(s, '%m/%d/%Y')
except Exception:
return s
else:
parse_date = date_parser

if time_parser is None:
try:
from dateutil.parser import parse
parse_time = lambda x: parse(x)
except ImportError: # pragma: no cover
def parse_time(s):
try:
return time.strptime(s, '%H:%M:%S')
except Exception:
return s

else:
parse_time = time_parser

for i from 0 <= i < n:
d = parse_date(str(dates[i]))
t = parse_time(str(times[i]))
result[i] = datetime(d.year, d.month, d.day,
t.hour, t.minute, t.second)

return result


def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
ndarray[object] days):
cdef:
Py_ssize_t i, n
ndarray[object] result

from datetime import datetime

n = len(years)
if len(months) != n or len(days) != n:
raise ValueError('Length of years/months/days must all be equal')
result = np.empty(n, dtype='O')

for i from 0 <= i < n:
result[i] = datetime(int(years[i]), int(months[i]), int(days[i]))

return result


def try_parse_datetime_components(ndarray[object] years,
ndarray[object] months,
ndarray[object] days,
ndarray[object] hours,
ndarray[object] minutes,
ndarray[object] seconds):

cdef:
Py_ssize_t i, n
ndarray[object] result
int secs
double float_secs
double micros

from datetime import datetime

n = len(years)
if (len(months) != n or len(days) != n or len(hours) != n or
len(minutes) != n or len(seconds) != n):
raise ValueError('Length of all datetime components must be equal')
result = np.empty(n, dtype='O')

for i from 0 <= i < n:
float_secs = float(seconds[i])
secs = int(float_secs)

micros = float_secs - secs
if micros > 0:
micros = micros * 1000000

result[i] = datetime(int(years[i]), int(months[i]), int(days[i]),
int(hours[i]), int(minutes[i]), secs,
int(micros))

return result


def sanitize_objects(ndarray[object] values, set na_values,
convert_empty=True):
cdef:
Expand Down
Loading