Skip to content

Commit 8e3a3b3

Browse files
Jayman2000adrienverge
authored andcommitted
decoder: Autodetect decoding of stdin
Before this change, yamllint would use a character encoding autodetection algorithm in order to determine the character encoding of all YAML files that it processed, unless the YAML file was sent to yamllint via stdin. This change makes it so that yamllint always uses the character encoding detection algorithm, even if the YAML file is sent to yamllint via stdin. Before this change, one of yamllint’s tests would replace sys.stdin with a StringIO object. This change makes it so that that test replaces sys.stdin with a file object instead of a StringIO object. Before this change, it was OK to use a StringIO object because yamllint never tried to access sys.stdin.buffer. It’s no longer OK to use a StringIO because yamllint now tries to access sys.stdin.buffer. File objects do have a buffer attribute, so we can use a file object instead.
1 parent a53fa80 commit 8e3a3b3

File tree

2 files changed

+71
-14
lines changed

2 files changed

+71
-14
lines changed

tests/test_cli.py

Lines changed: 67 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@
1414
# You should have received a copy of the GNU General Public License
1515
# along with this program. If not, see <http://www.gnu.org/licenses/>.
1616

17+
import glob
1718
import locale
1819
import os
1920
import pty
2021
import shutil
2122
import sys
2223
import tempfile
2324
import unittest
24-
from io import StringIO
2525

2626
from tests.common import (
2727
RunContext,
@@ -607,20 +607,32 @@ def test_github_actions_detection(self):
607607
(ctx.returncode, ctx.stdout, ctx.stderr), (1, expected_out, ''))
608608

609609
def test_run_read_from_stdin(self):
610-
# prepares stdin with an invalid yaml string so that we can check
611-
# for its specific error, and be assured that stdin was read
612610
self.addCleanup(setattr, sys, 'stdin', sys.__stdin__)
613-
sys.stdin = StringIO(
614-
'I am a string\n'
615-
'therefore: I am an error\n')
611+
# Normally, I would just use tempfile.TemporaryFile(), but here I want
612+
# to first open the file for writing and then open it for reading. In
613+
# other words, I need to make sure that the file continues to exist
614+
# after I close it for the fist time. That’s why I’m using
615+
# tempfile.TemporaryDirectory() instead of tempfile.TemporaryFile().
616+
with tempfile.TemporaryDirectory() as temp_dir_path:
617+
stdin_file_path = os.path.join(temp_dir_path, 'stdin')
618+
with open(stdin_file_path, mode='w', encoding='utf_8') as file:
619+
file.write(
620+
'I am a string\n'
621+
'therefore: I am an error\n')
622+
with open(stdin_file_path, mode='r', encoding='utf-8') as file:
623+
# prepares stdin with an invalid yaml string so that we can
624+
# check for its specific error, and be assured that stdin was
625+
# read
626+
sys.stdin = file
616627

617-
with RunContext(self) as ctx:
618-
cli.run(('-', '-f', 'parsable'))
619-
expected_out = (
620-
'stdin:2:10: [error] syntax error: '
621-
'mapping values are not allowed here (syntax)\n')
622-
self.assertEqual(
623-
(ctx.returncode, ctx.stdout, ctx.stderr), (1, expected_out, ''))
628+
with RunContext(self) as ctx:
629+
cli.run(('-', '-f', 'parsable'))
630+
expected_out = (
631+
'stdin:2:10: [error] syntax error: '
632+
'mapping values are not allowed here (syntax)\n')
633+
self.assertEqual(
634+
(ctx.returncode, ctx.stdout, ctx.stderr),
635+
(1, expected_out, ''))
624636

625637
def test_run_no_warnings(self):
626638
path = os.path.join(self.wd, 'a.yaml')
@@ -817,7 +829,35 @@ def tearDownClass(cls):
817829
super().tearDownClass()
818830
unregister_test_codecs()
819831

832+
def valid_encodings_stdin_test_helper(
833+
self,
834+
config_path,
835+
root_dir,
836+
old_stdin
837+
):
838+
for path in glob.glob(os.path.join(root_dir, '**')):
839+
# We purposely choose the wrong text encoding here because the text
840+
# encoding shouldn’t matter. yamllint should completely ignore the
841+
# text encoding of stdin.
842+
with open(path, mode="r", encoding="cp037") as file:
843+
sys.stdin = file
844+
with RunContext(self) as ctx:
845+
cli.run(('-c', config_path, '-'))
846+
sys.stdin = old_stdin
847+
if root_dir == 'sorted_correctly':
848+
self.assertEqual(ctx.returncode, 0)
849+
elif root_dir == 'sorted_incorrectly':
850+
self.assertNotEqual(ctx.returncode, 0)
851+
else:
852+
raise ValueError(
853+
f"root_dir was set to {repr(root_dir)}. It should only"
854+
"ever be set to 'sorted_correctly' or"
855+
"'sorted_incorrectly'."
856+
)
857+
820858
def test_valid_encodings(self):
859+
old_stdin = sys.stdin
860+
self.addCleanup(setattr, sys, 'stdin', old_stdin)
821861
conf = ('---\n'
822862
'rules:\n'
823863
' key-ordering: enable\n')
@@ -847,9 +887,23 @@ def test_valid_encodings(self):
847887

848888
with temp_workspace(workspace):
849889
for config_path in config_files.keys():
890+
# First, make sure that encoding autodetection works when the
891+
# file’s path is given as a command-line argument.
850892
with RunContext(self) as ctx:
851893
cli.run(('-c', config_path, 'sorted_correctly'))
852894
self.assertEqual(ctx.returncode, 0)
853895
with RunContext(self) as ctx:
854896
cli.run(('-c', config_path, 'sorted_incorrectly'))
855897
self.assertNotEqual(ctx.returncode, 0)
898+
# Second, make sure that encoding autodetection works when the
899+
# file is piped to yamllint via stdin.
900+
self.valid_encodings_stdin_test_helper(
901+
config_path,
902+
'sorted_correctly',
903+
old_stdin
904+
)
905+
self.valid_encodings_stdin_test_helper(
906+
config_path,
907+
'sorted_incorrectly',
908+
old_stdin
909+
)

yamllint/cli.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,10 @@ def run(argv=None):
231231
# read yaml from stdin
232232
if args.stdin:
233233
try:
234-
problems = linter.run(sys.stdin, conf, '')
234+
# The .buffer part makes sure that we get the raw bytes. We need to
235+
# get the raw bytes so that we can autodetect the character
236+
# encoding.
237+
problems = linter.run(sys.stdin.buffer, conf, '')
235238
except OSError as e:
236239
print(e, file=sys.stderr)
237240
sys.exit(-1)

0 commit comments

Comments
 (0)