@@ -1144,6 +1144,9 @@ def __init__(
1144
1144
self ._preserve_dtypes = preserve_dtypes
1145
1145
self ._columns = columns
1146
1146
self ._order_categoricals = order_categoricals
1147
+ self ._original_path_or_buf = path_or_buf
1148
+ self ._compression = compression
1149
+ self ._storage_options = storage_options
1147
1150
self ._encoding = ""
1148
1151
self ._chunksize = chunksize
1149
1152
self ._using_iterator = False
@@ -1153,6 +1156,9 @@ def __init__(
1153
1156
raise ValueError ("chunksize must be a positive integer when set." )
1154
1157
1155
1158
# State variables for the file
1159
+ # NB: _path_or_buf is mistyped on purpose, since the alternative is to placate
1160
+ # mypy by having an assert before every read.
1161
+ self ._path_or_buf : IO [bytes ] = None # type: ignore[assignment]
1156
1162
self ._has_string_data = False
1157
1163
self ._missing_values = False
1158
1164
self ._can_read_value_labels = False
@@ -1163,12 +1169,24 @@ def __init__(
1163
1169
self ._lines_read = 0
1164
1170
1165
1171
self ._native_byteorder = _set_endianness (sys .byteorder )
1172
+
1173
+ def _ensure_open (self ) -> None :
1174
+ """
1175
+ Ensure the file has been opened and its header data read.
1176
+ """
1177
+ if self ._path_or_buf is None :
1178
+ self ._open_file ()
1179
+
1180
+ def _open_file (self ) -> None :
1181
+ """
1182
+ Open the file (with compression options, etc.), and read header information.
1183
+ """
1166
1184
with get_handle (
1167
- path_or_buf ,
1185
+ self . _original_path_or_buf ,
1168
1186
"rb" ,
1169
- storage_options = storage_options ,
1187
+ storage_options = self . _storage_options ,
1170
1188
is_text = False ,
1171
- compression = compression ,
1189
+ compression = self . _compression ,
1172
1190
) as handles :
1173
1191
# Copy to BytesIO, and ensure no encoding
1174
1192
self ._path_or_buf = BytesIO (handles .handle .read ())
@@ -1536,6 +1554,7 @@ def _decode(self, s: bytes) -> str:
1536
1554
return s .decode ("latin-1" )
1537
1555
1538
1556
def _read_value_labels (self ) -> None :
1557
+ self ._ensure_open ()
1539
1558
if self ._value_labels_read :
1540
1559
# Don't read twice
1541
1560
return
@@ -1655,6 +1674,7 @@ def read(
1655
1674
columns : Sequence [str ] | None = None ,
1656
1675
order_categoricals : bool | None = None ,
1657
1676
) -> DataFrame :
1677
+ self ._ensure_open ()
1658
1678
# Handle empty file or chunk. If reading incrementally raise
1659
1679
# StopIteration. If reading the whole thing return an empty
1660
1680
# data frame.
@@ -1983,57 +2003,72 @@ def data_label(self) -> str:
1983
2003
"""
1984
2004
Return data label of Stata file.
1985
2005
"""
2006
+ self ._ensure_open ()
1986
2007
return self ._data_label
1987
2008
1988
2009
@property
1989
2010
def typlist (self ) -> list [int | str ]:
1990
2011
"""
1991
2012
Return list of variable types.
1992
2013
"""
2014
+ self ._ensure_open ()
1993
2015
return self ._typlist
1994
2016
1995
2017
@property
1996
2018
def dtyplist (self ) -> list [str | np .dtype ]:
1997
2019
"""
1998
2020
Return list of variable types.
1999
2021
"""
2022
+ self ._ensure_open ()
2000
2023
return self ._dtyplist
2001
2024
2002
2025
@property
2003
2026
def lbllist (self ) -> list [str ]:
2004
2027
"""
2005
2028
Return list of variable labels.
2006
2029
"""
2030
+ self ._ensure_open ()
2007
2031
return self ._lbllist
2008
2032
2009
2033
@property
2010
2034
def varlist (self ) -> list [str ]:
2011
2035
"""
2012
2036
Return list of variable names.
2013
2037
"""
2038
+ self ._ensure_open ()
2014
2039
return self ._varlist
2015
2040
2016
2041
@property
2017
2042
def fmtlist (self ) -> list [str ]:
2018
2043
"""
2019
2044
Return list of variable formats.
2020
2045
"""
2046
+ self ._ensure_open ()
2021
2047
return self ._fmtlist
2022
2048
2023
2049
@property
2024
2050
def time_stamp (self ) -> str :
2025
2051
"""
2026
2052
Return time stamp of Stata file.
2027
2053
"""
2054
+ self ._ensure_open ()
2028
2055
return self ._time_stamp
2029
2056
2030
2057
@property
2031
2058
def format_version (self ) -> int :
2032
2059
"""
2033
2060
Return format version of Stata file.
2034
2061
"""
2062
+ self ._ensure_open ()
2035
2063
return self ._format_version
2036
2064
2065
+ @property
2066
+ def path_or_buf (self ) -> IO [bytes ]:
2067
+ """
2068
+ Return the file handle of the Stata file being read.
2069
+ """
2070
+ return self ._path_or_buf
2071
+
2037
2072
def variable_labels (self ) -> dict [str , str ]:
2038
2073
"""
2039
2074
Return a dict associating each variable name with corresponding label.
@@ -2042,6 +2077,7 @@ def variable_labels(self) -> dict[str, str]:
2042
2077
-------
2043
2078
dict
2044
2079
"""
2080
+ self ._ensure_open ()
2045
2081
return dict (zip (self ._varlist , self ._variable_labels ))
2046
2082
2047
2083
def value_labels (self ) -> dict [str , dict [float , str ]]:
0 commit comments