Skip to content

Commit 13e8dea

Browse files
committed
Merge remote-tracking branch 'upstream/master' into str_infer
2 parents 58ce7bf + 1546c35 commit 13e8dea

32 files changed

+1472
-894
lines changed

ci/azure-windows-36.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@ channels:
55
dependencies:
66
- blosc
77
- bottleneck
8+
- boost-cpp<1.67
89
- fastparquet
910
- feather-format
1011
- matplotlib
1112
- numexpr
1213
- numpy=1.14*
1314
- openpyxl=2.5.5
15+
- parquet-cpp
1416
- pyarrow
1517
- pytables
1618
- python-dateutil

ci/code_checks.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
5656
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime
5757
RET=$(($RET + $?)) ; echo $MSG "DONE"
5858

59+
# Imports - Check formatting using isort see setup.cfg for settings
60+
MSG='Check import format using isort ' ; echo $MSG
61+
isort --recursive --check-only pandas
62+
RET=$(($RET + $?)) ; echo $MSG "DONE"
63+
5964
fi
6065

6166
### PATTERNS ###

ci/environment-dev.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
- flake8
99
- flake8-comprehensions
1010
- hypothesis>=3.58.0
11+
- isort
1112
- moto
1213
- pytest>=3.6
1314
- python-dateutil>=2.5.0

ci/requirements_dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ NumPy
55
flake8
66
flake8-comprehensions
77
hypothesis>=3.58.0
8+
isort
89
moto
910
pytest>=3.6
1011
python-dateutil>=2.5.0

ci/travis-36.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ dependencies:
1414
- geopandas
1515
- html5lib
1616
- ipython
17+
- isort
1718
- jinja2
1819
- lxml
1920
- matplotlib

doc/source/whatsnew/v0.24.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -974,6 +974,7 @@ Reshaping
974974
- Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`)
975975
- Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`)
976976
- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue`22796`)
977+
- Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`)
977978

978979
.. _whatsnew_0240.bug_fixes.sparse:
979980

pandas/_libs/algos_common_helper.pxi.in

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,33 +16,30 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
1616

1717
{{py:
1818

19-
# name, c_type, dest_type, dest_dtype
20-
dtypes = [('float64', 'float64_t', 'float64_t', 'np.float64'),
21-
('float32', 'float32_t', 'float32_t', 'np.float32'),
22-
('int8', 'int8_t', 'float32_t', 'np.float32'),
23-
('int16', 'int16_t', 'float32_t', 'np.float32'),
24-
('int32', 'int32_t', 'float64_t', 'np.float64'),
25-
('int64', 'int64_t', 'float64_t', 'np.float64')]
19+
# name, c_type, dest_type
20+
dtypes = [('float64', 'float64_t', 'float64_t'),
21+
('float32', 'float32_t', 'float32_t'),
22+
('int8', 'int8_t', 'float32_t'),
23+
('int16', 'int16_t', 'float32_t'),
24+
('int32', 'int32_t', 'float64_t'),
25+
('int64', 'int64_t', 'float64_t')]
2626

2727
def get_dispatch(dtypes):
2828

29-
for name, c_type, dest_type, dest_dtype, in dtypes:
30-
31-
dest_type2 = dest_type
32-
dest_type = dest_type.replace('_t', '')
33-
34-
yield name, c_type, dest_type, dest_type2, dest_dtype
29+
for name, c_type, dest_type, in dtypes:
30+
dest_name = dest_type[:-2] # i.e. strip "_t"
31+
yield name, c_type, dest_type, dest_name
3532

3633
}}
3734

38-
{{for name, c_type, dest_type, dest_type2, dest_dtype
35+
{{for name, c_type, dest_type, dest_name
3936
in get_dispatch(dtypes)}}
4037

4138

4239
@cython.boundscheck(False)
4340
@cython.wraparound(False)
4441
def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
45-
ndarray[{{dest_type2}}, ndim=2] out,
42+
ndarray[{{dest_type}}, ndim=2] out,
4643
Py_ssize_t periods, int axis):
4744
cdef:
4845
Py_ssize_t i, j, sx, sy
@@ -84,9 +81,9 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
8481
out[i, j] = arr[i, j] - arr[i, j - periods]
8582

8683

87-
def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
84+
def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
8885
ndarray[int64_t] indexer, Py_ssize_t loc,
89-
ndarray[{{dest_type2}}] out):
86+
ndarray[{{dest_type}}] out):
9087
cdef:
9188
Py_ssize_t i, j, k
9289

pandas/_libs/algos_rank_helper.pxi.in

Lines changed: 21 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -131,45 +131,20 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
131131
argsorted = _as.astype('i8')
132132

133133
{{if dtype == 'object'}}
134-
for i in range(n):
135-
sum_ranks += i + 1
136-
dups += 1
137-
isnan = sorted_mask[i]
138-
val = util.get_value_at(sorted_data, i)
139-
140-
if isnan and keep_na:
141-
ranks[argsorted[i]] = nan
142-
continue
143-
count += 1.0
144-
145-
if (i == n - 1 or
146-
are_diff(util.get_value_at(sorted_data, i + 1), val) or
147-
i == non_na_idx):
148-
if tiebreak == TIEBREAK_AVERAGE:
149-
for j in range(i - dups + 1, i + 1):
150-
ranks[argsorted[j]] = sum_ranks / dups
151-
elif tiebreak == TIEBREAK_MIN:
152-
for j in range(i - dups + 1, i + 1):
153-
ranks[argsorted[j]] = i - dups + 2
154-
elif tiebreak == TIEBREAK_MAX:
155-
for j in range(i - dups + 1, i + 1):
156-
ranks[argsorted[j]] = i + 1
157-
elif tiebreak == TIEBREAK_FIRST:
158-
raise ValueError('first not supported for non-numeric data')
159-
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
160-
for j in range(i - dups + 1, i + 1):
161-
ranks[argsorted[j]] = 2 * i - j - dups + 2
162-
elif tiebreak == TIEBREAK_DENSE:
163-
total_tie_count += 1
164-
for j in range(i - dups + 1, i + 1):
165-
ranks[argsorted[j]] = total_tie_count
166-
sum_ranks = dups = 0
134+
if True:
167135
{{else}}
168136
with nogil:
137+
{{endif}}
138+
# TODO: why does the 2d version not have a nogil block?
169139
for i in range(n):
170140
sum_ranks += i + 1
171141
dups += 1
142+
143+
{{if dtype == 'object'}}
144+
val = util.get_value_at(sorted_data, i)
145+
{{else}}
172146
val = sorted_data[i]
147+
{{endif}}
173148

174149
{{if dtype != 'uint64'}}
175150
isnan = sorted_mask[i]
@@ -180,8 +155,14 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
180155

181156
count += 1.0
182157

183-
if (i == n - 1 or sorted_data[i + 1] != val or
184-
i == non_na_idx):
158+
{{if dtype == 'object'}}
159+
if (i == n - 1 or
160+
are_diff(util.get_value_at(sorted_data, i + 1), val) or
161+
i == non_na_idx):
162+
{{else}}
163+
if (i == n - 1 or sorted_data[i + 1] != val or i == non_na_idx):
164+
{{endif}}
165+
185166
if tiebreak == TIEBREAK_AVERAGE:
186167
for j in range(i - dups + 1, i + 1):
187168
ranks[argsorted[j]] = sum_ranks / dups
@@ -192,8 +173,13 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
192173
for j in range(i - dups + 1, i + 1):
193174
ranks[argsorted[j]] = i + 1
194175
elif tiebreak == TIEBREAK_FIRST:
176+
{{if dtype == 'object'}}
177+
raise ValueError('first not supported for '
178+
'non-numeric data')
179+
{{else}}
195180
for j in range(i - dups + 1, i + 1):
196181
ranks[argsorted[j]] = j + 1
182+
{{endif}}
197183
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
198184
for j in range(i - dups + 1, i + 1):
199185
ranks[argsorted[j]] = 2 * i - j - dups + 2
@@ -202,7 +188,6 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
202188
for j in range(i - dups + 1, i + 1):
203189
ranks[argsorted[j]] = total_tie_count
204190
sum_ranks = dups = 0
205-
{{endif}}
206191
if pct:
207192
if tiebreak == TIEBREAK_DENSE:
208193
return ranks / total_tie_count

0 commit comments

Comments
 (0)