-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: Add support for reading 102-format Stata dta files #58978
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
94f9b5e
ce18f3f
9a998e6
a14eaf7
1a8da12
de44464
dc2f4c2
86fc3f6
72906a8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -267,7 +267,7 @@ def test_read_dta4(self, version, datapath): | |
# stata doesn't save .category metadata | ||
tm.assert_frame_equal(parsed, expected) | ||
|
||
@pytest.mark.parametrize("version", [103, 104, 105, 108]) | ||
@pytest.mark.parametrize("version", [102, 103, 104, 105, 108]) | ||
def test_readold_dta4(self, version, datapath): | ||
# This test is the same as test_read_dta4 above except that the columns | ||
# had to be renamed to match the restrictions in older file format | ||
|
@@ -2058,6 +2058,20 @@ def test_backward_compat_nodateconversion(version, datapath): | |
tm.assert_frame_equal(old_dta, expected, check_dtype=False) | ||
|
||
|
||
@pytest.mark.parametrize("version", [102]) | ||
def test_backward_compat_nostring(version, datapath): | ||
# The Stata data format prior to 105 did not support a date format | ||
# so read the raw values for comparison | ||
data_base = datapath("io", "data", "stata") | ||
ref = os.path.join(data_base, "stata-compat-118.dta") | ||
old = os.path.join(data_base, f"stata-compat-{version}.dta") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you make these 2 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have now made this change. Note that the equivalent tests for other format versions don't use |
||
expected = read_stata(ref, convert_dates=False) | ||
# The Stata data format prior to 103 did not support string data | ||
expected = expected.drop(columns=["s10"]) | ||
old_dta = read_stata(old, convert_dates=False) | ||
tm.assert_frame_equal(old_dta, expected, check_dtype=False) | ||
|
||
|
||
@pytest.mark.parametrize("version", [105, 108, 110, 111, 113, 114, 118]) | ||
def test_bigendian(version, datapath): | ||
ref = datapath("io", "data", "stata", f"stata-compat-{version}.dta") | ||
|
@@ -2067,6 +2081,7 @@ def test_bigendian(version, datapath): | |
tm.assert_frame_equal(big_dta, expected) | ||
|
||
|
||
# Note: 102 format does not support big-endian byte order | ||
@pytest.mark.parametrize("version", [103, 104]) | ||
def test_bigendian_nodateconversion(version, datapath): | ||
# The Stata data format prior to 105 did not support a date format | ||
|
Uh oh!
There was an error while loading. Please reload this page.