pandas-dev · jreback · Dec 30, 2020 · Dec 29, 2020 · Dec 29, 2020 · Dec 29, 2020
diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst
@@ -17,6 +17,7 @@ Fixed regressions
 - The deprecated attributes ``_AXIS_NAMES`` and ``_AXIS_NUMBERS`` of :class:`DataFrame` and :class:`Series` will no longer show up in ``dir`` or ``inspect.getmembers`` calls (:issue:`38740`)
 - :meth:`to_csv` created corrupted zip files when there were more rows than ``chunksize`` (issue:`38714`)
 - Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`)
+- Bug in :meth:`read_csv` with ``float_precision``="high" caused segfault or wrong parsing of long exponent strings (:issue:`38753`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c
@@ -1726,7 +1726,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
         // Process string of digits.
         num_digits = 0;
         n = 0;
-        while (isdigit_ascii(*p)) {
+        while (num_digits < max_digits && isdigit_ascii(*p)) {
             n = n * 10 + (*p - '0');
             num_digits++;
             p++;
@@ -1747,10 +1747,13 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
     } else if (exponent > 0) {
         number *= e[exponent];
     } else if (exponent < -308) {  // Subnormal
-        if (exponent < -616)       // Prevent invalid array access.
+        if (exponent < -616) {  // Prevent invalid array access.
             number = 0.;
-        number /= e[-308 - exponent];
-        number /= e[308];
+        } else {
+            number /= e[-308 - exponent];
+            number /= e[308];
+        }
+
     } else {
         number /= e[-exponent];
     }

diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
@@ -1351,6 +1351,30 @@ def test_numeric_range_too_wide(all_parsers, exp_data):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("neg_exp", [-617, -100000, -99999999999999999])
+def test_very_negative_exponent(all_parsers, neg_exp):
+    # GH#38753
+    parser = all_parsers
+    data = f"data\n10E{neg_exp}"
+    for precision in parser.float_precision_choices:
+        result = parser.read_csv(StringIO(data), float_precision=precision)
+        expected = DataFrame({"data": [0.0]})
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999])
+def test_too_many_exponent_digits(all_parsers, exp):
+    # GH#38753
+    parser = all_parsers
+    data = f"data\n10E{exp}"
+    for precision in parser.float_precision_choices:
+        if precision == "round_trip":
+            continue
+        result = parser.read_csv(StringIO(data), float_precision=precision)
+        expected = DataFrame({"data": [f"10E{exp}"]})
+        tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize("iterator", [True, False])
 def test_empty_with_nrows_chunksize(all_parsers, iterator):
     # see gh-9535