Skip to content

Commit 876fb42

Browse files
committed
wip
1 parent fe13de0 commit 876fb42

File tree

3 files changed

+201
-164
lines changed

3 files changed

+201
-164
lines changed

pandas/src/generate_code.py

Lines changed: 57 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
PyTuple_New)
2323
from cpython cimport PyFloat_Check
2424
cimport cpython
25+
cdef double NAN = <double> np.nan
2526
2627
import numpy as np
2728
isnan = np.isnan
@@ -881,7 +882,7 @@ def group_add_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
881882
Only aggregates on axis=0
882883
'''
883884
cdef:
884-
Py_ssize_t i, j, N, K, lab
885+
Py_ssize_t i, j, N, K, lab, lcounts = len(counts)
885886
%(dest_type2)s val, count
886887
ndarray[%(dest_type2)s, ndim=2] sumx, nobs
887888
@@ -894,39 +895,45 @@ def group_add_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
894895
N, K = (<object> values).shape
895896
896897
if K > 1:
897-
for i in range(N):
898-
lab = labels[i]
899-
if lab < 0:
900-
continue
901898
902-
counts[lab] += 1
903-
for j in range(K):
904-
val = values[i, j]
899+
with nogil:
900+
for i in range(N):
901+
lab = labels[i]
902+
if lab < 0:
903+
continue
904+
905+
counts[lab] += 1
906+
for j in range(K):
907+
val = values[i, j]
908+
909+
# not nan
910+
if val == val:
911+
nobs[lab, j] += 1
912+
sumx[lab, j] += val
905913
906-
# not nan
907-
if val == val:
908-
nobs[lab, j] += 1
909-
sumx[lab, j] += val
910914
else:
911-
for i in range(N):
912-
lab = labels[i]
913-
if lab < 0:
914-
continue
915915
916-
counts[lab] += 1
917-
val = values[i, 0]
916+
with nogil:
917+
for i in range(N):
918+
lab = labels[i]
919+
if lab < 0:
920+
continue
918921
919-
# not nan
920-
if val == val:
921-
nobs[lab, 0] += 1
922-
sumx[lab, 0] += val
922+
counts[lab] += 1
923+
val = values[i, 0]
923924
924-
for i in range(len(counts)):
925-
for j in range(K):
926-
if nobs[i, j] == 0:
927-
out[i, j] = nan
928-
else:
929-
out[i, j] = sumx[i, j]
925+
# not nan
926+
if val == val:
927+
nobs[lab, 0] += 1
928+
sumx[lab, 0] += val
929+
930+
with nogil:
931+
for i in range(lcounts):
932+
for j in range(K):
933+
if nobs[i, j] == 0:
934+
out[i, j] = NAN
935+
else:
936+
out[i, j] = sumx[i, j]
930937
"""
931938

932939
group_add_bin_template = """@cython.boundscheck(False)
@@ -982,7 +989,7 @@ def group_add_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
982989
for i in range(ngroups):
983990
for j in range(K):
984991
if nobs[i, j] == 0:
985-
out[i, j] = nan
992+
out[i, j] = NAN
986993
else:
987994
out[i, j] = sumx[i, j]
988995
"""
@@ -1040,7 +1047,7 @@ def group_prod_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
10401047
for i in range(len(counts)):
10411048
for j in range(K):
10421049
if nobs[i, j] == 0:
1043-
out[i, j] = nan
1050+
out[i, j] = NAN
10441051
else:
10451052
out[i, j] = prodx[i, j]
10461053
"""
@@ -1098,7 +1105,7 @@ def group_prod_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
10981105
for i in range(ngroups):
10991106
for j in range(K):
11001107
if nobs[i, j] == 0:
1101-
out[i, j] = nan
1108+
out[i, j] = NAN
11021109
else:
11031110
out[i, j] = prodx[i, j]
11041111
"""
@@ -1160,7 +1167,7 @@ def group_var_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
11601167
for j in range(K):
11611168
ct = nobs[i, j]
11621169
if ct < 2:
1163-
out[i, j] = nan
1170+
out[i, j] = NAN
11641171
else:
11651172
out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) /
11661173
(ct * ct - ct))
@@ -1223,7 +1230,7 @@ def group_var_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
12231230
for j in range(K):
12241231
ct = nobs[i, j]
12251232
if ct < 2:
1226-
out[i, j] = nan
1233+
out[i, j] = NAN
12271234
else:
12281235
out[i, j] = ((ct * sumxx[i, j] - sumx[i, j] * sumx[i, j]) /
12291236
(ct * ct - ct))
@@ -1608,7 +1615,7 @@ def group_mean_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
16081615
for j in range(K):
16091616
count = nobs[i, j]
16101617
if nobs[i, j] == 0:
1611-
out[i, j] = nan
1618+
out[i, j] = NAN
16121619
else:
16131620
out[i, j] = sumx[i, j] / count
16141621
"""
@@ -1663,7 +1670,7 @@ def group_mean_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
16631670
for j in range(K):
16641671
count = nobs[i, j]
16651672
if count == 0:
1666-
out[i, j] = nan
1673+
out[i, j] = NAN
16671674
else:
16681675
out[i, j] = sumx[i, j] / count
16691676
"""
@@ -1680,7 +1687,7 @@ def group_ohlc_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
16801687
cdef:
16811688
Py_ssize_t i, j, N, K, ngroups, b
16821689
%(dest_type2)s val, count
1683-
%(dest_type2)s vopen, vhigh, vlow, vclose, NA
1690+
%(dest_type2)s vopen, vhigh, vlow, vclose
16841691
bint got_first = 0
16851692
16861693
if bins[len(bins) - 1] == len(values):
@@ -1693,8 +1700,6 @@ def group_ohlc_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
16931700
if out.shape[1] != 4:
16941701
raise ValueError('Output array must have 4 columns')
16951702
1696-
NA = np.nan
1697-
16981703
b = 0
16991704
if K > 1:
17001705
raise NotImplementedError("Argument 'values' must have only "
@@ -1703,10 +1708,10 @@ def group_ohlc_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
17031708
for i in range(N):
17041709
while b < ngroups - 1 and i >= bins[b]:
17051710
if not got_first:
1706-
out[b, 0] = NA
1707-
out[b, 1] = NA
1708-
out[b, 2] = NA
1709-
out[b, 3] = NA
1711+
out[b, 0] = NAN
1712+
out[b, 1] = NAN
1713+
out[b, 2] = NAN
1714+
out[b, 3] = NAN
17101715
else:
17111716
out[b, 0] = vopen
17121717
out[b, 1] = vhigh
@@ -1733,10 +1738,10 @@ def group_ohlc_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
17331738
vclose = val
17341739
17351740
if not got_first:
1736-
out[b, 0] = NA
1737-
out[b, 1] = NA
1738-
out[b, 2] = NA
1739-
out[b, 3] = NA
1741+
out[b, 0] = NAN
1742+
out[b, 1] = NAN
1743+
out[b, 2] = NAN
1744+
out[b, 3] = NAN
17401745
else:
17411746
out[b, 0] = vopen
17421747
out[b, 1] = vhigh
@@ -2337,16 +2342,16 @@ def generate_put_template(template, use_ints=True, use_floats=True,
23372342
def generate_put_min_max_template(template, use_ints=True, use_floats=True,
23382343
use_objects=False, use_datelikes=False):
23392344
floats_list = [
2340-
('float64', 'float64_t', 'nan', 'np.inf'),
2341-
('float32', 'float32_t', 'nan', 'np.inf'),
2345+
('float64', 'float64_t', 'NAN', 'np.inf'),
2346+
('float32', 'float32_t', 'NAN', 'np.inf'),
23422347
]
23432348
ints_list = [
23442349
('int64', 'int64_t', 'iNaT', _int64_max),
23452350
]
23462351
date_like_list = [
23472352
('int64', 'int64_t', 'iNaT', _int64_max),
23482353
]
2349-
object_list = [('object', 'object', 'nan', 'np.inf')]
2354+
object_list = [('object', 'object', 'np.nan', 'np.inf')]
23502355
function_list = []
23512356
if use_floats:
23522357
function_list.extend(floats_list)
@@ -2369,16 +2374,16 @@ def generate_put_min_max_template(template, use_ints=True, use_floats=True,
23692374
def generate_put_selection_template(template, use_ints=True, use_floats=True,
23702375
use_objects=False, use_datelikes=False):
23712376
floats_list = [
2372-
('float64', 'float64_t', 'float64_t', 'nan'),
2373-
('float32', 'float32_t', 'float32_t', 'nan'),
2377+
('float64', 'float64_t', 'float64_t', 'NAN'),
2378+
('float32', 'float32_t', 'float32_t', 'NAN'),
23742379
]
23752380
ints_list = [
23762381
('int64', 'int64_t', 'int64_t', 'iNaT'),
23772382
]
23782383
date_like_list = [
23792384
('int64', 'int64_t', 'int64_t', 'iNaT'),
23802385
]
2381-
object_list = [('object', 'object', 'object', 'nan')]
2386+
object_list = [('object', 'object', 'object', 'np.nan')]
23822387
function_list = []
23832388
if use_floats:
23842389
function_list.extend(floats_list)

0 commit comments

Comments
 (0)