Skip to content

Commit 4f0c2eb

Browse files
authored
Merge branch 'master' into CLN-libs-2
2 parents bc435ec + edcf1c8 commit 4f0c2eb

File tree

204 files changed

+7887
-6941
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

204 files changed

+7887
-6941
lines changed

.github/workflows/ci.yml

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,39 @@ jobs:
154154
echo "region = BHS" >> $RCLONE_CONFIG_PATH
155155
if: github.event_name == 'push'
156156

157-
- name: Sync web
157+
- name: Sync web with OVH
158158
run: rclone sync pandas_web ovh_cloud_pandas_web:dev
159159
if: github.event_name == 'push'
160+
161+
- name: Create git repo to upload the built docs to GitHub pages
162+
run: |
163+
cd pandas_web
164+
git init
165+
touch .nojekyll
166+
echo "dev.pandas.io" > CNAME
167+
printf "User-agent: *\nDisallow: /" > robots.txt
168+
git add --all .
169+
git config user.email "[email protected]"
170+
git config user.name "pandas-bot"
171+
git commit -m "pandas web and documentation in master"
172+
if: github.event_name == 'push'
173+
174+
# For this task to work, next steps are required:
175+
# 1. Generate a pair of private/public keys (i.e. `ssh-keygen -t rsa -b 4096 -C "[email protected]"`)
176+
# 2. Go to https://github.com/pandas-dev/pandas/settings/secrets
177+
# 3. Click on "Add a new secret"
178+
# 4. Name: "github_pagas_ssh_key", Value: <Content of the private ssh key>
179+
# 5. The public key needs to be upladed to https://github.com/pandas-dev/pandas-dev.github.io/settings/keys
180+
- name: Install GitHub pages ssh deployment key
181+
uses: shimataro/ssh-key-action@v2
182+
with:
183+
key: ${{ secrets.github_pages_ssh_key }}
184+
known_hosts: 'github.com,192.30.252.128 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ=='
185+
if: github.event_name == 'push'
186+
187+
- name: Publish web and docs to GitHub pages
188+
run: |
189+
cd pandas_web
190+
git remote add origin [email protected]:pandas-dev/pandas-dev.github.io.git
191+
git push -f origin master || true
192+
if: github.event_name == 'push'

asv_bench/benchmarks/algorithms.py

Lines changed: 35 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -31,83 +31,62 @@ def time_maybe_convert_objects(self):
3131

3232
class Factorize:
3333

34-
params = [[True, False], ["int", "uint", "float", "string"]]
35-
param_names = ["sort", "dtype"]
36-
37-
def setup(self, sort, dtype):
38-
N = 10 ** 5
39-
data = {
40-
"int": pd.Int64Index(np.arange(N).repeat(5)),
41-
"uint": pd.UInt64Index(np.arange(N).repeat(5)),
42-
"float": pd.Float64Index(np.random.randn(N).repeat(5)),
43-
"string": tm.makeStringIndex(N).repeat(5),
44-
}
45-
self.idx = data[dtype]
46-
47-
def time_factorize(self, sort, dtype):
48-
self.idx.factorize(sort=sort)
49-
50-
51-
class FactorizeUnique:
52-
53-
params = [[True, False], ["int", "uint", "float", "string"]]
54-
param_names = ["sort", "dtype"]
34+
params = [
35+
[True, False],
36+
[True, False],
37+
["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"],
38+
]
39+
param_names = ["unique", "sort", "dtype"]
5540

56-
def setup(self, sort, dtype):
41+
def setup(self, unique, sort, dtype):
5742
N = 10 ** 5
5843
data = {
5944
"int": pd.Int64Index(np.arange(N)),
6045
"uint": pd.UInt64Index(np.arange(N)),
61-
"float": pd.Float64Index(np.arange(N)),
46+
"float": pd.Float64Index(np.random.randn(N)),
6247
"string": tm.makeStringIndex(N),
63-
}
64-
self.idx = data[dtype]
65-
assert self.idx.is_unique
66-
67-
def time_factorize(self, sort, dtype):
48+
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
49+
"datetime64[ns, tz]": pd.date_range(
50+
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
51+
),
52+
}[dtype]
53+
if not unique:
54+
data = data.repeat(5)
55+
self.idx = data
56+
57+
def time_factorize(self, unique, sort, dtype):
6858
self.idx.factorize(sort=sort)
6959

7060

7161
class Duplicated:
7262

73-
params = [["first", "last", False], ["int", "uint", "float", "string"]]
74-
param_names = ["keep", "dtype"]
75-
76-
def setup(self, keep, dtype):
77-
N = 10 ** 5
78-
data = {
79-
"int": pd.Int64Index(np.arange(N).repeat(5)),
80-
"uint": pd.UInt64Index(np.arange(N).repeat(5)),
81-
"float": pd.Float64Index(np.random.randn(N).repeat(5)),
82-
"string": tm.makeStringIndex(N).repeat(5),
83-
}
84-
self.idx = data[dtype]
85-
# cache is_unique
86-
self.idx.is_unique
87-
88-
def time_duplicated(self, keep, dtype):
89-
self.idx.duplicated(keep=keep)
90-
91-
92-
class DuplicatedUniqueIndex:
93-
94-
params = ["int", "uint", "float", "string"]
95-
param_names = ["dtype"]
63+
params = [
64+
[True, False],
65+
["first", "last", False],
66+
["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"],
67+
]
68+
param_names = ["unique", "keep", "dtype"]
9669

97-
def setup(self, dtype):
70+
def setup(self, unique, keep, dtype):
9871
N = 10 ** 5
9972
data = {
10073
"int": pd.Int64Index(np.arange(N)),
10174
"uint": pd.UInt64Index(np.arange(N)),
10275
"float": pd.Float64Index(np.random.randn(N)),
10376
"string": tm.makeStringIndex(N),
104-
}
105-
self.idx = data[dtype]
77+
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
78+
"datetime64[ns, tz]": pd.date_range(
79+
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
80+
),
81+
}[dtype]
82+
if not unique:
83+
data = data.repeat(5)
84+
self.idx = data
10685
# cache is_unique
10786
self.idx.is_unique
10887

109-
def time_duplicated_unique(self, dtype):
110-
self.idx.duplicated()
88+
def time_duplicated(self, unique, keep, dtype):
89+
self.idx.duplicated(keep=keep)
11190

11291

11392
class Hashing:

asv_bench/benchmarks/binary_ops.py renamed to asv_bench/benchmarks/arithmetic.py

Lines changed: 178 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,23 @@
11
import operator
2+
import warnings
23

34
import numpy as np
45

5-
from pandas import DataFrame, Series, date_range
6+
import pandas as pd
7+
from pandas import DataFrame, Series, Timestamp, date_range, to_timedelta
8+
import pandas._testing as tm
69
from pandas.core.algorithms import checked_add_with_arr
710

11+
from .pandas_vb_common import numeric_dtypes
12+
813
try:
914
import pandas.core.computation.expressions as expr
1015
except ImportError:
1116
import pandas.computation.expressions as expr
17+
try:
18+
import pandas.tseries.holiday
19+
except ImportError:
20+
pass
1221

1322

1423
class IntFrameWithScalar:
@@ -151,6 +160,110 @@ def time_timestamp_ops_diff_with_shift(self, tz):
151160
self.s - self.s.shift()
152161

153162

163+
class IrregularOps:
164+
def setup(self):
165+
N = 10 ** 5
166+
idx = date_range(start="1/1/2000", periods=N, freq="s")
167+
s = Series(np.random.randn(N), index=idx)
168+
self.left = s.sample(frac=1)
169+
self.right = s.sample(frac=1)
170+
171+
def time_add(self):
172+
self.left + self.right
173+
174+
175+
class TimedeltaOps:
176+
def setup(self):
177+
self.td = to_timedelta(np.arange(1000000))
178+
self.ts = Timestamp("2000")
179+
180+
def time_add_td_ts(self):
181+
self.td + self.ts
182+
183+
184+
class CategoricalComparisons:
185+
params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
186+
param_names = ["op"]
187+
188+
def setup(self, op):
189+
N = 10 ** 5
190+
self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)
191+
192+
def time_categorical_op(self, op):
193+
getattr(self.cat, op)("b")
194+
195+
196+
class IndexArithmetic:
197+
198+
params = ["float", "int"]
199+
param_names = ["dtype"]
200+
201+
def setup(self, dtype):
202+
N = 10 ** 6
203+
indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"}
204+
self.index = getattr(tm, indexes[dtype])(N)
205+
206+
def time_add(self, dtype):
207+
self.index + 2
208+
209+
def time_subtract(self, dtype):
210+
self.index - 2
211+
212+
def time_multiply(self, dtype):
213+
self.index * 2
214+
215+
def time_divide(self, dtype):
216+
self.index / 2
217+
218+
def time_modulo(self, dtype):
219+
self.index % 2
220+
221+
222+
class NumericInferOps:
223+
# from GH 7332
224+
params = numeric_dtypes
225+
param_names = ["dtype"]
226+
227+
def setup(self, dtype):
228+
N = 5 * 10 ** 5
229+
self.df = DataFrame(
230+
{"A": np.arange(N).astype(dtype), "B": np.arange(N).astype(dtype)}
231+
)
232+
233+
def time_add(self, dtype):
234+
self.df["A"] + self.df["B"]
235+
236+
def time_subtract(self, dtype):
237+
self.df["A"] - self.df["B"]
238+
239+
def time_multiply(self, dtype):
240+
self.df["A"] * self.df["B"]
241+
242+
def time_divide(self, dtype):
243+
self.df["A"] / self.df["B"]
244+
245+
def time_modulo(self, dtype):
246+
self.df["A"] % self.df["B"]
247+
248+
249+
class DateInferOps:
250+
# from GH 7332
251+
def setup_cache(self):
252+
N = 5 * 10 ** 5
253+
df = DataFrame({"datetime64": np.arange(N).astype("datetime64[ms]")})
254+
df["timedelta"] = df["datetime64"] - df["datetime64"]
255+
return df
256+
257+
def time_subtract_datetimes(self, df):
258+
df["datetime64"] - df["datetime64"]
259+
260+
def time_timedelta_plus_datetime(self, df):
261+
df["timedelta"] + df["datetime64"]
262+
263+
def time_add_timedeltas(self, df):
264+
df["timedelta"] + df["timedelta"]
265+
266+
154267
class AddOverflowScalar:
155268

156269
params = [1, -1, 0]
@@ -188,4 +301,68 @@ def time_add_overflow_both_arg_nan(self):
188301
)
189302

190303

304+
hcal = pd.tseries.holiday.USFederalHolidayCalendar()
305+
# These offsets currently raise a NotImplimentedError with .apply_index()
306+
non_apply = [
307+
pd.offsets.Day(),
308+
pd.offsets.BYearEnd(),
309+
pd.offsets.BYearBegin(),
310+
pd.offsets.BQuarterEnd(),
311+
pd.offsets.BQuarterBegin(),
312+
pd.offsets.BMonthEnd(),
313+
pd.offsets.BMonthBegin(),
314+
pd.offsets.CustomBusinessDay(),
315+
pd.offsets.CustomBusinessDay(calendar=hcal),
316+
pd.offsets.CustomBusinessMonthBegin(calendar=hcal),
317+
pd.offsets.CustomBusinessMonthEnd(calendar=hcal),
318+
pd.offsets.CustomBusinessMonthEnd(calendar=hcal),
319+
]
320+
other_offsets = [
321+
pd.offsets.YearEnd(),
322+
pd.offsets.YearBegin(),
323+
pd.offsets.QuarterEnd(),
324+
pd.offsets.QuarterBegin(),
325+
pd.offsets.MonthEnd(),
326+
pd.offsets.MonthBegin(),
327+
pd.offsets.DateOffset(months=2, days=2),
328+
pd.offsets.BusinessDay(),
329+
pd.offsets.SemiMonthEnd(),
330+
pd.offsets.SemiMonthBegin(),
331+
]
332+
offsets = non_apply + other_offsets
333+
334+
335+
class OffsetArrayArithmetic:
336+
337+
params = offsets
338+
param_names = ["offset"]
339+
340+
def setup(self, offset):
341+
N = 10000
342+
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
343+
self.rng = rng
344+
self.ser = pd.Series(rng)
345+
346+
def time_add_series_offset(self, offset):
347+
with warnings.catch_warnings(record=True):
348+
self.ser + offset
349+
350+
def time_add_dti_offset(self, offset):
351+
with warnings.catch_warnings(record=True):
352+
self.rng + offset
353+
354+
355+
class ApplyIndex:
356+
params = other_offsets
357+
param_names = ["offset"]
358+
359+
def setup(self, offset):
360+
N = 10000
361+
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
362+
self.rng = rng
363+
364+
def time_apply_index(self, offset):
365+
offset.apply_index(self.rng)
366+
367+
191368
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/categoricals.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -63,18 +63,6 @@ def time_existing_series(self):
6363
pd.Categorical(self.series)
6464

6565

66-
class CategoricalOps:
67-
params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
68-
param_names = ["op"]
69-
70-
def setup(self, op):
71-
N = 10 ** 5
72-
self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)
73-
74-
def time_categorical_op(self, op):
75-
getattr(self.cat, op)("b")
76-
77-
7866
class Concat:
7967
def setup(self):
8068
N = 10 ** 5
@@ -270,9 +258,6 @@ def setup(self):
270258
def time_get_loc(self):
271259
self.index.get_loc(self.category)
272260

273-
def time_shape(self):
274-
self.index.shape
275-
276261
def time_shallow_copy(self):
277262
self.index._shallow_copy()
278263

0 commit comments

Comments
 (0)