@@ -49,18 +49,11 @@ def list_to_object_array(list obj):
49
49
50
50
cdef size_t _INIT_VEC_CAP = 32
51
51
52
- cdef class ObjectVector :
52
+ cdef class Vector :
53
53
54
54
cdef:
55
55
size_t n, m
56
56
ndarray ao
57
- PyObject ** data
58
-
59
- def __cinit__ (self ):
60
- self .n = 0
61
- self .m = _INIT_VEC_CAP
62
- self .ao = np.empty(_INIT_VEC_CAP, dtype = object )
63
- self .data = < PyObject** > self .ao.data
64
57
65
58
def __len__ (self ):
66
59
return self .n
@@ -70,6 +63,18 @@ cdef class ObjectVector:
70
63
self .m = self .n
71
64
return self .ao
72
65
66
+
67
+ cdef class ObjectVector(Vector):
68
+
69
+ cdef:
70
+ PyObject ** data
71
+
72
+ def __cinit__ (self ):
73
+ self .n = 0
74
+ self .m = _INIT_VEC_CAP
75
+ self .ao = np.empty(_INIT_VEC_CAP, dtype = object )
76
+ self .data = < PyObject** > self .ao.data
77
+
73
78
cdef inline append(self , object o):
74
79
if self .n == self .m:
75
80
self .m = max (self .m * 2 , _INIT_VEC_CAP)
@@ -81,11 +86,9 @@ cdef class ObjectVector:
81
86
self .n += 1
82
87
83
88
84
- cdef class Int64Vector:
89
+ cdef class Int64Vector(Vector) :
85
90
86
91
cdef:
87
- size_t n, m
88
- ndarray ao
89
92
int64_t * data
90
93
91
94
def __cinit__ (self ):
@@ -94,28 +97,29 @@ cdef class Int64Vector:
94
97
self .ao = np.empty(_INIT_VEC_CAP, dtype = np.int64)
95
98
self .data = < int64_t* > self .ao.data
96
99
97
- def __len__ (self ):
98
- return self .n
100
+ cdef inline uint8_t needs_resize(self ) nogil:
101
+ # if we need to resize
102
+ return self .n == self .m
99
103
100
- def to_array (self ):
101
- self .ao.resize (self .n )
102
- self .m = self .n
103
- return self .ao
104
+ cdef resize (self ):
105
+ self .m = max (self .m * 2 , _INIT_VEC_CAP )
106
+ self .ao.resize( self .m)
107
+ self .data = < int64_t * > self .ao.data
104
108
105
- cdef inline append(self , int64_t x):
106
- if self .n == self .m:
107
- self .m = max (self .m * 2 , _INIT_VEC_CAP)
108
- self .ao.resize(self .m)
109
- self .data = < int64_t* > self .ao.data
109
+ cdef inline void append(self , int64_t x) nogil:
110
110
111
- self .data[self .n] = x
112
- self .n += 1
111
+ with nogil:
112
+
113
+ if self .needs_resize():
114
+ with gil:
115
+ self .resize()
113
116
114
- cdef class Float64Vector:
117
+ self .data[self .n] = x
118
+ self .n += 1
119
+
120
+ cdef class Float64Vector(Vector):
115
121
116
122
cdef:
117
- size_t n, m
118
- ndarray ao
119
123
float64_t * data
120
124
121
125
def __cinit__ (self ):
@@ -124,14 +128,6 @@ cdef class Float64Vector:
124
128
self .ao = np.empty(_INIT_VEC_CAP, dtype = np.float64)
125
129
self .data = < float64_t* > self .ao.data
126
130
127
- def __len__ (self ):
128
- return self .n
129
-
130
- def to_array (self ):
131
- self .ao.resize(self .n)
132
- self .m = self .n
133
- return self .ao
134
-
135
131
cdef inline append(self , float64_t x):
136
132
if self .n == self .m:
137
133
self .m = max (self .m * 2 , _INIT_VEC_CAP)
@@ -142,18 +138,17 @@ cdef class Float64Vector:
142
138
self .n += 1
143
139
144
140
145
- cdef class HashTable:
146
- pass
147
-
148
-
149
- cdef class StringHashTable(HashTable):
141
+ cdef class StringHashTable:
150
142
cdef kh_str_t * table
151
143
152
144
def __cinit__ (self , int size_hint = 1 ):
153
145
self .table = kh_init_str()
154
146
if size_hint is not None :
155
147
kh_resize_str(self .table, size_hint)
156
148
149
+ def __len__ (self ):
150
+ return self .table.size
151
+
157
152
def __dealloc__ (self ):
158
153
kh_destroy_str(self .table)
159
154
@@ -256,7 +251,7 @@ cdef class StringHashTable(HashTable):
256
251
257
252
return reverse, labels
258
253
259
- cdef class Int32HashTable(HashTable) :
254
+ cdef class Int32HashTable:
260
255
cdef kh_int32_t * table
261
256
262
257
def __init__ (self , size_hint = 1 ):
@@ -266,6 +261,9 @@ cdef class Int32HashTable(HashTable):
266
261
def __cinit__ (self ):
267
262
self .table = kh_init_int32()
268
263
264
+ def __len__ (self ):
265
+ return self .table.size
266
+
269
267
def __dealloc__ (self ):
270
268
kh_destroy_int32(self .table)
271
269
@@ -353,14 +351,16 @@ cdef class Int32HashTable(HashTable):
353
351
354
352
return reverse, labels
355
353
356
- cdef class Int64HashTable: # (HashTable):
357
- # cdef kh_int64_t *table
354
+ cdef class Int64HashTable:
358
355
359
356
def __cinit__ (self , size_hint = 1 ):
360
357
self .table = kh_init_int64()
361
358
if size_hint is not None :
362
359
kh_resize_int64(self .table, size_hint)
363
360
361
+ def __len__ (self ):
362
+ return self .table.size
363
+
364
364
def __dealloc__ (self ):
365
365
kh_destroy_int64(self .table)
366
366
@@ -369,9 +369,6 @@ cdef class Int64HashTable: #(HashTable):
369
369
k = kh_get_int64(self .table, key)
370
370
return k != self .table.n_buckets
371
371
372
- def __len__ (self ):
373
- return self .table.size
374
-
375
372
cpdef get_item(self , int64_t val):
376
373
cdef khiter_t k
377
374
k = kh_get_int64(self .table, val)
@@ -446,6 +443,7 @@ cdef class Int64HashTable: #(HashTable):
446
443
labels = self .get_labels(values, reverse, 0 )
447
444
return reverse, labels
448
445
446
+ @ cython.boundscheck (False )
449
447
def get_labels (self , ndarray[int64_t] values , Int64Vector uniques ,
450
448
Py_ssize_t count_prior , Py_ssize_t na_sentinel ):
451
449
cdef:
@@ -458,21 +456,23 @@ cdef class Int64HashTable: #(HashTable):
458
456
459
457
labels = np.empty(n, dtype = np.int64)
460
458
461
- for i in range (n):
462
- val = values[i]
463
- k = kh_get_int64(self .table, val)
464
- if k != self .table.n_buckets:
465
- idx = self .table.vals[k]
466
- labels[i] = idx
467
- else :
468
- k = kh_put_int64(self .table, val, & ret)
469
- self .table.vals[k] = count
470
- uniques.append(val)
471
- labels[i] = count
472
- count += 1
459
+ with nogil:
460
+ for i in range (n):
461
+ val = values[i]
462
+ k = kh_get_int64(self .table, val)
463
+ if k != self .table.n_buckets:
464
+ idx = self .table.vals[k]
465
+ labels[i] = idx
466
+ else :
467
+ k = kh_put_int64(self .table, val, & ret)
468
+ self .table.vals[k] = count
469
+ uniques.append(val)
470
+ labels[i] = count
471
+ count += 1
473
472
474
473
return labels
475
474
475
+ @ cython.boundscheck (False )
476
476
def get_labels_groupby (self , ndarray[int64_t] values ):
477
477
cdef:
478
478
Py_ssize_t i, n = len (values)
@@ -485,24 +485,25 @@ cdef class Int64HashTable: #(HashTable):
485
485
486
486
labels = np.empty(n, dtype = np.int64)
487
487
488
- for i in range (n):
489
- val = values[i]
490
-
491
- # specific for groupby
492
- if val < 0 :
493
- labels[i] = - 1
494
- continue
495
-
496
- k = kh_get_int64(self .table, val)
497
- if k != self .table.n_buckets:
498
- idx = self .table.vals[k]
499
- labels[i] = idx
500
- else :
501
- k = kh_put_int64(self .table, val, & ret)
502
- self .table.vals[k] = count
503
- uniques.append(val)
504
- labels[i] = count
505
- count += 1
488
+ with nogil:
489
+ for i in range (n):
490
+ val = values[i]
491
+
492
+ # specific for groupby
493
+ if val < 0 :
494
+ labels[i] = - 1
495
+ continue
496
+
497
+ k = kh_get_int64(self .table, val)
498
+ if k != self .table.n_buckets:
499
+ idx = self .table.vals[k]
500
+ labels[i] = idx
501
+ else :
502
+ k = kh_put_int64(self .table, val, & ret)
503
+ self .table.vals[k] = count
504
+ uniques.append(val)
505
+ labels[i] = count
506
+ count += 1
506
507
507
508
arr_uniques = uniques.to_array()
508
509
@@ -530,6 +531,7 @@ cdef class Int64HashTable: #(HashTable):
530
531
531
532
532
533
cdef class Float64HashTable(HashTable):
534
+
533
535
def __cinit__ (self , size_hint = 1 ):
534
536
self .table = kh_init_float64()
535
537
if size_hint is not None :
@@ -658,7 +660,6 @@ cdef class Float64HashTable(HashTable):
658
660
na_sentinel = object
659
661
660
662
cdef class PyObjectHashTable(HashTable):
661
- # cdef kh_pymap_t *table
662
663
663
664
def __init__ (self , size_hint = 1 ):
664
665
self .table = kh_init_pymap()
0 commit comments