Skip to content

Commit 8438fe7

Browse files
authored
CI: Make is_ci_environment less necessary (#56058)
* CI: Make is_ci_environment less necessary * Add back env settingg * Add back comment * Refactor test_read_csv_chunked_download
1 parent 91af4fa commit 8438fe7

File tree

7 files changed

+40
-109
lines changed

7 files changed

+40
-109
lines changed

asv_bench/benchmarks/io/csv.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,4 +621,15 @@ def time_read_csv_index_col(self):
621621
)
622622

623623

624+
class ReadCSVCParserLowMemory:
625+
# GH 16798
626+
def setup(self):
627+
self.csv = StringIO(
628+
"strings\n" + "\n".join(["x" * (1 << 20) for _ in range(2100)])
629+
)
630+
631+
def peakmem_over_2gb_input(self):
632+
read_csv(self.csv, engine="c", low_memory=False)
633+
634+
624635
from ..pandas_vb_common import setup # noqa: F401 isort:skip

pandas/tests/io/conftest.py

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -51,23 +51,7 @@ def xml_file(datapath):
5151

5252

5353
@pytest.fixture
54-
def s3so(worker_id):
55-
if is_ci_environment():
56-
url = "http://localhost:5000/"
57-
else:
58-
worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw")
59-
url = f"http://127.0.0.1:555{worker_id}/"
60-
return {"client_kwargs": {"endpoint_url": url}}
61-
62-
63-
@pytest.fixture(scope="function" if is_ci_environment() else "session")
64-
def monkeysession():
65-
with pytest.MonkeyPatch.context() as mp:
66-
yield mp
67-
68-
69-
@pytest.fixture(scope="function" if is_ci_environment() else "session")
70-
def s3_base(worker_id, monkeysession):
54+
def s3_base(worker_id, monkeypatch):
7155
"""
7256
Fixture for mocking S3 interaction.
7357
@@ -79,8 +63,8 @@ def s3_base(worker_id, monkeysession):
7963

8064
# temporary workaround as moto fails for botocore >= 1.11 otherwise,
8165
# see https://github.com/spulec/moto/issues/1924 & 1952
82-
monkeysession.setenv("AWS_ACCESS_KEY_ID", "foobar_key")
83-
monkeysession.setenv("AWS_SECRET_ACCESS_KEY", "foobar_secret")
66+
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "foobar_key")
67+
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "foobar_secret")
8468
if is_ci_environment():
8569
if is_platform_arm() or is_platform_mac() or is_platform_windows():
8670
# NOT RUN on Windows/macOS/ARM, only Ubuntu
@@ -93,6 +77,7 @@ def s3_base(worker_id, monkeysession):
9377
"Windows, macOS or ARM platforms"
9478
)
9579
else:
80+
# set in .github/workflows/unit-tests.yml
9681
yield "http://localhost:5000"
9782
else:
9883
requests = pytest.importorskip("requests")
@@ -128,6 +113,11 @@ def s3_base(worker_id, monkeysession):
128113
proc.terminate()
129114

130115

116+
@pytest.fixture
117+
def s3so(s3_base):
118+
return {"client_kwargs": {"endpoint_url": s3_base}}
119+
120+
131121
@pytest.fixture
132122
def s3_resource(s3_base):
133123
import boto3

pandas/tests/io/parser/test_c_parser_only.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import numpy as np
1818
import pytest
1919

20-
from pandas.compat import is_ci_environment
2120
from pandas.compat.numpy import np_version_gte1p24
2221
from pandas.errors import (
2322
ParserError,
@@ -531,24 +530,6 @@ def test_read_tarfile(c_parser_only, csv_dir_path, tar_suffix):
531530
tm.assert_frame_equal(out, expected)
532531

533532

534-
@pytest.mark.single_cpu
535-
@pytest.mark.skipif(is_ci_environment(), reason="Too memory intensive for CI.")
536-
def test_bytes_exceed_2gb(c_parser_only):
537-
# see gh-16798
538-
#
539-
# Read from a "CSV" that has a column larger than 2GB.
540-
parser = c_parser_only
541-
542-
if parser.low_memory:
543-
pytest.skip("not a low_memory test")
544-
545-
# csv takes 10 seconds to construct, spikes memory to 8GB+, the whole test
546-
# spikes up to 10.4GB on the c_high case
547-
csv = StringIO("strings\n" + "\n".join(["x" * (1 << 20) for _ in range(2100)]))
548-
df = parser.read_csv(csv)
549-
assert not df.empty
550-
551-
552533
def test_chunk_whitespace_on_boundary(c_parser_only):
553534
# see gh-9735: this issue is C parser-specific (bug when
554535
# parsing whitespace and characters at chunk boundary)

pandas/tests/io/parser/test_network.py

Lines changed: 17 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,13 @@
22
Tests parsers ability to read and parse non-local files
33
and hence require a network connection to be read.
44
"""
5-
from io import (
6-
BytesIO,
7-
StringIO,
8-
)
5+
from io import BytesIO
96
import logging
7+
import re
108

119
import numpy as np
1210
import pytest
1311

14-
from pandas.compat import is_ci_environment
1512
import pandas.util._test_decorators as td
1613

1714
from pandas import DataFrame
@@ -292,39 +289,23 @@ def test_read_csv_handles_boto_s3_object(
292289
tm.assert_frame_equal(result, expected)
293290

294291
@pytest.mark.single_cpu
295-
@pytest.mark.skipif(
296-
is_ci_environment(),
297-
reason="GH: 45651: This test can hang in our CI min_versions build",
298-
)
299292
def test_read_csv_chunked_download(self, s3_public_bucket, caplog, s3so):
300293
# 8 MB, S3FS uses 5MB chunks
301-
import s3fs
302-
303-
df = DataFrame(
304-
np.random.default_rng(2).standard_normal((100000, 4)), columns=list("abcd")
305-
)
306-
str_buf = StringIO()
307-
308-
df.to_csv(str_buf)
309-
310-
buf = BytesIO(str_buf.getvalue().encode("utf-8"))
311-
312-
s3_public_bucket.put_object(Key="large-file.csv", Body=buf)
313-
314-
# Possibly some state leaking in between tests.
315-
# If we don't clear this cache, we saw `GetObject operation: Forbidden`.
316-
# Presumably the s3fs instance is being cached, with the directory listing
317-
# from *before* we add the large-file.csv in the s3_public_bucket_with_data.
318-
s3fs.S3FileSystem.clear_instance_cache()
319-
320-
with caplog.at_level(logging.DEBUG, logger="s3fs"):
321-
read_csv(
322-
f"s3://{s3_public_bucket.name}/large-file.csv",
323-
nrows=5,
324-
storage_options=s3so,
325-
)
326-
# log of fetch_range (start, stop)
327-
assert (0, 5505024) in (x.args[-2:] for x in caplog.records)
294+
df = DataFrame(np.zeros((100000, 4)), columns=list("abcd"))
295+
with BytesIO(df.to_csv().encode("utf-8")) as buf:
296+
s3_public_bucket.put_object(Key="large-file.csv", Body=buf)
297+
uri = f"{s3_public_bucket.name}/large-file.csv"
298+
match_re = re.compile(rf"^Fetch: {uri}, 0-(?P<stop>\d+)$")
299+
with caplog.at_level(logging.DEBUG, logger="s3fs"):
300+
read_csv(
301+
f"s3://{uri}",
302+
nrows=5,
303+
storage_options=s3so,
304+
)
305+
for log in caplog.messages:
306+
if match := re.match(match_re, log):
307+
# Less than 8 MB
308+
assert int(match.group("stop")) < 8000000
328309

329310
def test_read_s3_with_hash_in_key(self, s3_public_bucket_with_data, tips_df, s3so):
330311
# GH 25945

pandas/tests/io/test_s3.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,10 @@ def test_read_without_creds_from_pub_bucket(s3_public_bucket_with_data, s3so):
3030

3131

3232
@pytest.mark.single_cpu
33-
def test_read_with_creds_from_pub_bucket(s3_public_bucket_with_data, monkeypatch, s3so):
33+
def test_read_with_creds_from_pub_bucket(s3_public_bucket_with_data, s3so):
3434
# Ensure we can read from a public bucket with credentials
3535
# GH 34626
36-
37-
# temporary workaround as moto fails for botocore >= 1.11 otherwise,
38-
# see https://github.com/spulec/moto/issues/1924 & 1952
3936
pytest.importorskip("s3fs")
40-
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "foobar_key")
41-
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "foobar_secret")
4237
df = read_csv(
4338
f"s3://{s3_public_bucket_with_data.name}/tips.csv",
4439
nrows=5,

pandas/tests/window/test_numba.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.compat import (
5-
is_ci_environment,
6-
is_platform_mac,
7-
is_platform_windows,
8-
)
94
from pandas.errors import NumbaUtilError
105
import pandas.util._test_decorators as td
116

@@ -17,15 +12,7 @@
1712
)
1813
import pandas._testing as tm
1914

20-
pytestmark = [
21-
pytest.mark.single_cpu,
22-
pytest.mark.skipif(
23-
is_ci_environment() and (is_platform_windows() or is_platform_mac()),
24-
reason="On GHA CI, Windows can fail with "
25-
"'Windows fatal exception: stack overflow' "
26-
"and macOS can timeout",
27-
),
28-
]
15+
pytestmark = pytest.mark.single_cpu
2916

3017

3118
@pytest.fixture(params=["single", "table"])

pandas/tests/window/test_online.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,13 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.compat import (
5-
is_ci_environment,
6-
is_platform_mac,
7-
is_platform_windows,
8-
)
9-
104
from pandas import (
115
DataFrame,
126
Series,
137
)
148
import pandas._testing as tm
159

16-
pytestmark = [
17-
pytest.mark.single_cpu,
18-
pytest.mark.skipif(
19-
is_ci_environment() and (is_platform_windows() or is_platform_mac()),
20-
reason="On GHA CI, Windows can fail with "
21-
"'Windows fatal exception: stack overflow' "
22-
"and macOS can timeout",
23-
),
24-
]
10+
pytestmark = pytest.mark.single_cpu
2511

2612
pytest.importorskip("numba")
2713

0 commit comments

Comments
 (0)