Skip to content

gh-134873: Fix quadratic complexity in os.path.expandvars() #134952

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 41 additions & 85 deletions Lib/ntpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,17 +400,23 @@ def expanduser(path):
# XXX With COMMAND.COM you can use any characters in a variable name,
# XXX except '^|<>='.

_varpattern = r"'[^']*'?|%(%|[^%]*%?)|\$(\$|[-\w]+|\{[^}]*\}?)"
_varsub = None
_varsubb = None

def expandvars(path):
"""Expand shell variables of the forms $var, ${var} and %var%.

Unknown variables are left unchanged."""
path = os.fspath(path)
global _varsub, _varsubb
if isinstance(path, bytes):
if b'$' not in path and b'%' not in path:
return path
import string
varchars = bytes(string.ascii_letters + string.digits + '_-', 'ascii')
quote = b'\''
if not _varsubb:
import re
_varsubb = re.compile(_varpattern.encode(), re.ASCII).sub
sub = _varsubb
percent = b'%'
brace = b'{'
rbrace = b'}'
Expand All @@ -419,94 +425,44 @@ def expandvars(path):
else:
if '$' not in path and '%' not in path:
return path
import string
varchars = string.ascii_letters + string.digits + '_-'
quote = '\''
if not _varsub:
import re
_varsub = re.compile(_varpattern, re.ASCII).sub
sub = _varsub
percent = '%'
brace = '{'
rbrace = '}'
dollar = '$'
environ = os.environ
res = path[:0]
index = 0
pathlen = len(path)
while index < pathlen:
c = path[index:index+1]
if c == quote: # no expansion within single quotes
path = path[index + 1:]
pathlen = len(path)
try:
index = path.index(c)
res += c + path[:index + 1]
except ValueError:
res += c + path
index = pathlen - 1
elif c == percent: # variable or '%'
if path[index + 1:index + 2] == percent:
res += c
index += 1
else:
path = path[index+1:]
pathlen = len(path)
try:
index = path.index(percent)
except ValueError:
res += percent + path
index = pathlen - 1
else:
var = path[:index]
try:
if environ is None:
value = os.fsencode(os.environ[os.fsdecode(var)])
else:
value = environ[var]
except KeyError:
value = percent + var + percent
res += value
elif c == dollar: # variable or '$$'
if path[index + 1:index + 2] == dollar:
res += c
index += 1
elif path[index + 1:index + 2] == brace:
path = path[index+2:]
pathlen = len(path)
try:
index = path.index(rbrace)
except ValueError:
res += dollar + brace + path
index = pathlen - 1
else:
var = path[:index]
try:
if environ is None:
value = os.fsencode(os.environ[os.fsdecode(var)])
else:
value = environ[var]
except KeyError:
value = dollar + brace + var + rbrace
res += value
else:
var = path[:0]
index += 1
c = path[index:index + 1]
while c and c in varchars:
var += c
index += 1
c = path[index:index + 1]
try:
if environ is None:
value = os.fsencode(os.environ[os.fsdecode(var)])
else:
value = environ[var]
except KeyError:
value = dollar + var
res += value
if c:
index -= 1

def repl(m):
lastindex = m.lastindex
if lastindex is None:
return m[0]
name = m[lastindex]
if lastindex == 1:
if name == percent:
return name
if not name.endswith(percent):
return m[0]
name = name[:-1]
else:
res += c
index += 1
return res
if name == dollar:
return name
if name.startswith(brace):
if not name.endswith(rbrace):
return m[0]
name = name[1:-1]

try:
if environ is None:
return os.fsencode(os.environ[os.fsdecode(name)])
else:
return environ[name]
except KeyError:
return m[0]

return sub(repl, path)


# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.
Expand Down
43 changes: 20 additions & 23 deletions Lib/posixpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,56 +284,53 @@ def expanduser(path):
# This expands the forms $variable and ${variable} only.
# Non-existent variables are left unchanged.

_varprog = None
_varprogb = None
_varpattern = r'\$(\w+|\{[^}]*\}?)'
_varsub = None
_varsubb = None

def expandvars(path):
"""Expand shell variables of form $var and ${var}. Unknown variables
are left unchanged."""
path = os.fspath(path)
global _varprog, _varprogb
global _varsub, _varsubb
if isinstance(path, bytes):
if b'$' not in path:
return path
if not _varprogb:
if not _varsubb:
import re
_varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
search = _varprogb.search
_varsubb = re.compile(_varpattern.encode(), re.ASCII).sub
sub = _varsubb
start = b'{'
end = b'}'
environ = getattr(os, 'environb', None)
else:
if '$' not in path:
return path
if not _varprog:
if not _varsub:
import re
_varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
search = _varprog.search
_varsub = re.compile(_varpattern, re.ASCII).sub
sub = _varsub
start = '{'
end = '}'
environ = os.environ
i = 0
while True:
m = search(path, i)
if not m:
break
i, j = m.span(0)
name = m.group(1)
if name.startswith(start) and name.endswith(end):

def repl(m):
name = m[1]
if name.startswith(start):
if not name.endswith(end):
return m[0]
name = name[1:-1]
try:
if environ is None:
value = os.fsencode(os.environ[os.fsdecode(name)])
else:
value = environ[name]
except KeyError:
i = j
return m[0]
else:
tail = path[j:]
path = path[:i] + value
i = len(path)
path += tail
return path
return value

return sub(repl, path)


# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
Expand Down
21 changes: 17 additions & 4 deletions Lib/test/test_genericpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
import sys
import unittest
import warnings
from test.support import (
is_apple, os_helper, warnings_helper
)
from test import support
from test.support import os_helper
from test.support import warnings_helper
from test.support.script_helper import assert_python_ok
from test.support.os_helper import FakePath

Expand Down Expand Up @@ -445,6 +445,19 @@ def check(value, expected):
os.fsencode('$bar%s bar' % nonascii))
check(b'$spam}bar', os.fsencode('%s}bar' % nonascii))

@support.requires_resource('cpu')
def test_expandvars_large(self):
expandvars = self.pathmodule.expandvars
with os_helper.EnvironmentVarGuard() as env:
env.clear()
env["A"] = "B"
n = 100_000
self.assertEqual(expandvars('$A'*n), 'B'*n)
self.assertEqual(expandvars('${A}'*n), 'B'*n)
self.assertEqual(expandvars('$A!'*n), 'B!'*n)
self.assertEqual(expandvars('${A}A'*n), 'BA'*n)
self.assertEqual(expandvars('${'*10*n), '${'*10*n)

def test_abspath(self):
self.assertIn("foo", self.pathmodule.abspath("foo"))
with warnings.catch_warnings():
Expand Down Expand Up @@ -502,7 +515,7 @@ def test_nonascii_abspath(self):
# directory (when the bytes name is used).
and sys.platform not in {
"win32", "emscripten", "wasi"
} and not is_apple
} and not support.is_apple
):
name = os_helper.TESTFN_UNDECODABLE
elif os_helper.TESTFN_NONASCII:
Expand Down
22 changes: 18 additions & 4 deletions Lib/test/test_ntpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
import unittest
import warnings
from ntpath import ALLOW_MISSING
from test.support import TestFailed, cpython_only, os_helper
from test import support
from test.support import os_helper
from test.support.os_helper import FakePath
from test import test_genericpath
from tempfile import TemporaryFile
Expand Down Expand Up @@ -57,7 +58,7 @@ def tester(fn, wantResult):
fn = fn.replace("\\", "\\\\")
gotResult = eval(fn)
if wantResult != gotResult and _norm(wantResult) != _norm(gotResult):
raise TestFailed("%s should return: %s but returned: %s" \
raise support.TestFailed("%s should return: %s but returned: %s" \
%(str(fn), str(wantResult), str(gotResult)))

# then with bytes
Expand All @@ -73,7 +74,7 @@ def tester(fn, wantResult):
warnings.simplefilter("ignore", DeprecationWarning)
gotResult = eval(fn)
if _norm(wantResult) != _norm(gotResult):
raise TestFailed("%s should return: %s but returned: %s" \
raise support.TestFailed("%s should return: %s but returned: %s" \
%(str(fn), str(wantResult), repr(gotResult)))


Expand Down Expand Up @@ -1037,6 +1038,19 @@ def check(value, expected):
check('%spam%bar', '%sbar' % nonascii)
check('%{}%bar'.format(nonascii), 'ham%sbar' % nonascii)

@support.requires_resource('cpu')
def test_expandvars_large(self):
expandvars = ntpath.expandvars
with os_helper.EnvironmentVarGuard() as env:
env.clear()
env["A"] = "B"
n = 100_000
self.assertEqual(expandvars('%A%'*n), 'B'*n)
self.assertEqual(expandvars('%A%A'*n), 'BA'*n)
self.assertEqual(expandvars("''"*n + '%%'), "''"*n + '%')
self.assertEqual(expandvars("%%"*n), "%"*n)
self.assertEqual(expandvars("$$"*n), "$"*n)

def test_expanduser(self):
tester('ntpath.expanduser("test")', 'test')

Expand Down Expand Up @@ -1454,7 +1468,7 @@ def test_con_device(self):
self.assertTrue(os.path.exists(r"\\.\CON"))

@unittest.skipIf(sys.platform != 'win32', "Fast paths are only for win32")
@cpython_only
@support.cpython_only
def test_fast_paths_in_use(self):
# There are fast paths of these functions implemented in posixmodule.c.
# Confirm that they are being used, and not the Python fallbacks in
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix quadratic complexity in :func:`os.path.expandvars`.
Loading