in the default Python 2 case, manually check the source is ASCII (fixes #269)
This commit is contained in:
parent
6d1662e4b7
commit
0cf79b29cd
|
@ -1,5 +1,8 @@
|
||||||
Changes between 2.3.4 and 2.3.5dev
|
Changes between 2.3.4 and 2.3.5dev
|
||||||
-----------------------------------
|
-----------------------------------
|
||||||
|
- issue 259 - when assertion rewriting, be consistent with the default
|
||||||
|
source encoding of ASCII on Python 2
|
||||||
|
|
||||||
- issue 251 - report a skip instead of ignoring classes with init
|
- issue 251 - report a skip instead of ignoring classes with init
|
||||||
|
|
||||||
- issue250 unicode/str mixes in parametrization names and values now works
|
- issue250 unicode/str mixes in parametrization names and values now works
|
||||||
|
|
|
@ -6,6 +6,7 @@ import itertools
|
||||||
import imp
|
import imp
|
||||||
import marshal
|
import marshal
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import struct
|
import struct
|
||||||
import sys
|
import sys
|
||||||
import types
|
import types
|
||||||
|
@ -38,6 +39,7 @@ PYC_EXT = ".py" + (__debug__ and "c" or "o")
|
||||||
PYC_TAIL = "." + PYTEST_TAG + PYC_EXT
|
PYC_TAIL = "." + PYTEST_TAG + PYC_EXT
|
||||||
|
|
||||||
REWRITE_NEWLINES = sys.version_info[:2] != (2, 7) and sys.version_info < (3, 2)
|
REWRITE_NEWLINES = sys.version_info[:2] != (2, 7) and sys.version_info < (3, 2)
|
||||||
|
ASCII_IS_DEFAULT_ENCODING = sys.version_info[0] < 3
|
||||||
|
|
||||||
class AssertionRewritingHook(object):
|
class AssertionRewritingHook(object):
|
||||||
"""PEP302 Import hook which rewrites asserts."""
|
"""PEP302 Import hook which rewrites asserts."""
|
||||||
|
@ -187,12 +189,37 @@ def _write_pyc(co, source_path, pyc):
|
||||||
RN = "\r\n".encode("utf-8")
|
RN = "\r\n".encode("utf-8")
|
||||||
N = "\n".encode("utf-8")
|
N = "\n".encode("utf-8")
|
||||||
|
|
||||||
|
cookie_re = re.compile("coding[:=]\s*[-\w.]+")
|
||||||
|
BOM_UTF8 = '\xef\xbb\xbf'
|
||||||
|
|
||||||
def _rewrite_test(state, fn):
|
def _rewrite_test(state, fn):
|
||||||
"""Try to read and rewrite *fn* and return the code object."""
|
"""Try to read and rewrite *fn* and return the code object."""
|
||||||
try:
|
try:
|
||||||
source = fn.read("rb")
|
source = fn.read("rb")
|
||||||
except EnvironmentError:
|
except EnvironmentError:
|
||||||
return None
|
return None
|
||||||
|
if ASCII_IS_DEFAULT_ENCODING:
|
||||||
|
# ASCII is the default encoding in Python 2. Without a coding
|
||||||
|
# declaration, Python 2 will complain about any bytes in the file
|
||||||
|
# outside the ASCII range. Sadly, this behavior does not extend to
|
||||||
|
# compile() or ast.parse(), which prefer to interpret the bytes as
|
||||||
|
# latin-1. (At least they properly handle explicit coding cookies.) To
|
||||||
|
# preserve this error behavior, we could force ast.parse() to use ASCII
|
||||||
|
# as the encoding by inserting a coding cookie. Unfortunately, that
|
||||||
|
# messes up line numbers. Thus, we have to check ourselves if anything
|
||||||
|
# is outside the ASCII range in the case no encoding is explicitly
|
||||||
|
# declared. For more context, see issue #269. Yay for Python 3 which
|
||||||
|
# gets this right.
|
||||||
|
end1 = source.find("\n")
|
||||||
|
end2 = source.find("\n", end1 + 1)
|
||||||
|
if (not source.startswith(BOM_UTF8) and
|
||||||
|
(not cookie_re.match(source[0:end1]) or
|
||||||
|
not cookie_re.match(source[end1:end2]))):
|
||||||
|
try:
|
||||||
|
source.decode("ascii")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
# Let it fail in real import.
|
||||||
|
return None
|
||||||
# On Python versions which are not 2.7 and less than or equal to 3.1, the
|
# On Python versions which are not 2.7 and less than or equal to 3.1, the
|
||||||
# parser expects *nix newlines.
|
# parser expects *nix newlines.
|
||||||
if REWRITE_NEWLINES:
|
if REWRITE_NEWLINES:
|
||||||
|
|
|
@ -394,3 +394,11 @@ def test_rewritten():
|
||||||
b = content.encode("utf-8")
|
b = content.encode("utf-8")
|
||||||
testdir.tmpdir.join("test_newlines.py").write(b, "wb")
|
testdir.tmpdir.join("test_newlines.py").write(b, "wb")
|
||||||
assert testdir.runpytest().ret == 0
|
assert testdir.runpytest().ret == 0
|
||||||
|
|
||||||
|
@pytest.mark.skipif("sys.version_info[0] >= 3")
|
||||||
|
def test_assume_ascii(self, testdir):
|
||||||
|
content = "u'\xe2\x99\xa5'"
|
||||||
|
testdir.tmpdir.join("test_encoding.py").write(content, "wb")
|
||||||
|
res = testdir.runpytest()
|
||||||
|
assert res.ret != 0
|
||||||
|
assert "SyntaxError: Non-ASCII character" in res.stdout.str()
|
||||||
|
|
Loading…
Reference in New Issue