From 4405dd0ffe6605e86c5a60b1c195c7cb56802dbd Mon Sep 17 00:00:00 2001 From: Ceridwen Date: Tue, 22 Mar 2016 01:31:48 -0400 Subject: [PATCH 1/5] Escape both bytes and unicode strings for "ids" in Metafunc.parametrize --- CHANGELOG.rst | 4 ++- _pytest/python.py | 80 +++++++++++++++++++++++++++-------------------- 2 files changed, 49 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c1f573f18..2163c6680 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -23,7 +23,9 @@ **Changes** -* +* Fix (`#1351 `_): + explicitly passed parametrize ids do not get escaped to ascii. + Thanks `@ceridwen`_ for the PR. * diff --git a/_pytest/python.py b/_pytest/python.py index 2e6c8d87e..d6736d98b 100644 --- a/_pytest/python.py +++ b/_pytest/python.py @@ -1025,9 +1025,12 @@ class Metafunc(FuncargnamesCompatAttr): if callable(ids): idfn = ids ids = None - if ids and len(ids) != len(argvalues): - raise ValueError('%d tests specified with %d ids' %( - len(argvalues), len(ids))) + if ids: + if len(ids) != len(argvalues): + raise ValueError('%d tests specified with %d ids' %( + len(argvalues), len(ids))) + else: + ids = [_escape_strings(i) for i in ids] if not ids: ids = idmaker(argnames, argvalues, idfn) newcalls = [] @@ -1078,38 +1081,55 @@ class Metafunc(FuncargnamesCompatAttr): self._calls.append(cs) + if _PY3: import codecs - def _escape_bytes(val): - """ - If val is pure ascii, returns it as a str(), otherwise escapes - into a sequence of escaped bytes: + def _escape_strings(val): + """If val is pure ascii, returns it as a str(). Otherwise, escapes + bytes objects into a sequence of escaped bytes: + b'\xc3\xb4\xc5\xd6' -> u'\\xc3\\xb4\\xc5\\xd6' + and escapes unicode objects into a sequence of escaped unicode + ids, e.g.: + + '4\\nV\\U00043efa\\x0eMXWB\\x1e\\u3028\\u15fd\\xcd\\U0007d944' + note: the obvious "v.decode('unicode-escape')" will return - valid utf-8 unicode if it finds them in the string, but we + valid utf-8 unicode if it finds them in bytes, but we want to return escaped bytes for any byte, even if they match a utf-8 string. + """ - if val: - # source: http://goo.gl/bGsnwC - encoded_bytes, _ = codecs.escape_encode(val) - return encoded_bytes.decode('ascii') + if isinstance(val, bytes): + if val: + # source: http://goo.gl/bGsnwC + encoded_bytes, _ = codecs.escape_encode(val) + return encoded_bytes.decode('ascii') + else: + # empty bytes crashes codecs.escape_encode (#1087) + return '' else: - # empty bytes crashes codecs.escape_encode (#1087) - return '' + return val.encode('unicode_escape').decode('ascii') else: - def _escape_bytes(val): + def _escape_strings(val): + """In py2 bytes and str are the same type, so return if it's a bytes + object, return it unchanged if it is a full ascii string, + otherwise escape it into its binary form. + + If it's a unicode string, change the unicode characters into + unicode escapes. + """ - In py2 bytes and str are the same type, so return it unchanged if it - is a full ascii string, otherwise escape it into its binary form. - """ - try: - return val.decode('ascii') - except UnicodeDecodeError: - return val.encode('string-escape') + if isinstance(val, bytes): + try: + return val.decode('ascii') + except UnicodeDecodeError: + return val.encode('string-escape') + else: + return val.encode('unicode-escape') def _idval(val, argname, idx, idfn): @@ -1117,28 +1137,20 @@ def _idval(val, argname, idx, idfn): try: s = idfn(val) if s: - return s + return _escape_strings(s) except Exception: pass - if isinstance(val, bytes): - return _escape_bytes(val) + if isinstance(val, bytes) or (_PY2 and isinstance(val, unicode)): + return _escape_strings(val) elif isinstance(val, (float, int, str, bool, NoneType)): return str(val) elif isinstance(val, REGEX_TYPE): - return val.pattern + return _escape_strings(val.pattern) elif enum is not None and isinstance(val, enum.Enum): return str(val) elif isclass(val) and hasattr(val, '__name__'): return val.__name__ - elif _PY2 and isinstance(val, unicode): - # special case for python 2: if a unicode string is - # convertible to ascii, return it as an str() object instead - try: - return str(val) - except UnicodeError: - # fallthrough - pass return str(argname)+str(idx) def _idvalset(idx, valset, argnames, idfn): From 9b438d56e8cba5712cd5cc61786f9e527bb45880 Mon Sep 17 00:00:00 2001 From: Ceridwen Date: Fri, 1 Apr 2016 12:27:17 -0400 Subject: [PATCH 2/5] Fix a test_unicode_idval_python2 (now test_unicode_idval) and associated string handling on Python 3 --- _pytest/python.py | 4 ++-- testing/python/metafunc.py | 17 ++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/_pytest/python.py b/_pytest/python.py index 3d71a0afe..53e11a348 100644 --- a/_pytest/python.py +++ b/_pytest/python.py @@ -1138,9 +1138,9 @@ def _idval(val, argname, idx, idfn): except Exception: pass - if isinstance(val, bytes) or (_PY2 and isinstance(val, unicode)): + if isinstance(val, (bytes, str)) or (_PY2 and isinstance(val, unicode)): return _escape_strings(val) - elif isinstance(val, (float, int, str, bool, NoneType)): + elif isinstance(val, (float, int, bool, NoneType)): return str(val) elif isinstance(val, REGEX_TYPE): return _escape_strings(val.pattern) diff --git a/testing/python/metafunc.py b/testing/python/metafunc.py index f0f3dbd4c..da4228ed8 100644 --- a/testing/python/metafunc.py +++ b/testing/python/metafunc.py @@ -121,20 +121,19 @@ class TestMetafunc: assert metafunc._calls[2].id == "x1-a" assert metafunc._calls[3].id == "x1-b" - @pytest.mark.skipif('sys.version_info[0] >= 3') - def test_unicode_idval_python2(self): - """unittest for the expected behavior to obtain ids for parametrized - unicode values in Python 2: if convertible to ascii, they should appear - as ascii values, otherwise fallback to hide the value behind the name - of the parametrized variable name. #1086 + def test_unicode_idval(self): + """This tests that Unicode strings outside the ASCII character set get + escaped, using byte escapes if they're in that range or unicode + escapes if they're not. + """ from _pytest.python import _idval values = [ (u'', ''), (u'ascii', 'ascii'), - (u'ação', 'a6'), - (u'josé@blah.com', 'a6'), - (u'δοκ.ιμή@παράδειγμα.δοκιμή', 'a6'), + (u'ação', 'a\\xe7\\xe3o'), + (u'josé@blah.com', 'jos\\xe9@blah.com'), + (u'δοκ.ιμή@παράδειγμα.δοκιμή', '\\u03b4\\u03bf\\u03ba.\\u03b9\\u03bc\\u03ae@\\u03c0\\u03b1\\u03c1\\u03ac\\u03b4\\u03b5\\u03b9\\u03b3\\u03bc\\u03b1.\\u03b4\\u03bf\\u03ba\\u03b9\\u03bc\\u03ae'), ] for val, expected in values: assert _idval(val, 'a', 6, None) == expected From b631fc0bc1ad720b408b84964994641a631d3a74 Mon Sep 17 00:00:00 2001 From: Ceridwen Date: Fri, 1 Apr 2016 17:57:42 -0400 Subject: [PATCH 3/5] Fix test_escaped_parametrized_names_xml --- testing/test_junitxml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/test_junitxml.py b/testing/test_junitxml.py index e84734dfa..2436b60f5 100644 --- a/testing/test_junitxml.py +++ b/testing/test_junitxml.py @@ -617,7 +617,7 @@ def test_escaped_parametrized_names_xml(testdir): result, dom = runandparse(testdir) assert result.ret == 0 node = dom.find_first_by_tag("testcase") - node.assert_attr(name="test_func[#x00]") + node.assert_attr(name="test_func[\\x00]") def test_double_colon_split_function_issue469(testdir): From 491b30c5d97a807e2d885bc6127c6f7e08e41a82 Mon Sep 17 00:00:00 2001 From: Ceridwen Date: Fri, 1 Apr 2016 22:45:44 -0400 Subject: [PATCH 4/5] Add Hypothesis test for _idval and fix bug it found --- _pytest/python.py | 2 +- testing/python/metafunc.py | 17 +++++++++++++++++ testing/test_junitxml.py | 2 +- tox.ini | 1 + 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/_pytest/python.py b/_pytest/python.py index 53e11a348..070c54715 100644 --- a/_pytest/python.py +++ b/_pytest/python.py @@ -1122,7 +1122,7 @@ else: """ if isinstance(val, bytes): try: - return val.decode('ascii') + return val.encode('ascii') except UnicodeDecodeError: return val.encode('string-escape') else: diff --git a/testing/python/metafunc.py b/testing/python/metafunc.py index da4228ed8..59e05dd57 100644 --- a/testing/python/metafunc.py +++ b/testing/python/metafunc.py @@ -1,11 +1,18 @@ # -*- coding: utf-8 -*- import re +import sys import _pytest._code import py import pytest from _pytest import python as funcargs +import hypothesis +from hypothesis import strategies + +PY3 = sys.version_info >= (3, 0) + + class TestMetafunc: def Metafunc(self, func): # the unit tests of this class check if things work correctly @@ -121,6 +128,16 @@ class TestMetafunc: assert metafunc._calls[2].id == "x1-a" assert metafunc._calls[3].id == "x1-b" + @hypothesis.given(strategies.text() | strategies.binary()) + def test_idval_hypothesis(self, value): + from _pytest.python import _idval + escaped = _idval(value, 'a', 6, None) + assert isinstance(escaped, str) + if PY3: + escaped.encode('ascii') + else: + escaped.decode('ascii') + def test_unicode_idval(self): """This tests that Unicode strings outside the ASCII character set get escaped, using byte escapes if they're in that range or unicode diff --git a/testing/test_junitxml.py b/testing/test_junitxml.py index 2436b60f5..8eda22f7f 100644 --- a/testing/test_junitxml.py +++ b/testing/test_junitxml.py @@ -610,7 +610,7 @@ def test_logxml_makedir(testdir): def test_escaped_parametrized_names_xml(testdir): testdir.makepyfile(""" import pytest - @pytest.mark.parametrize('char', ["\\x00"]) + @pytest.mark.parametrize('char', [u"\\x00"]) def test_func(char): assert char """) diff --git a/tox.ini b/tox.ini index 5f65446e4..82fe34b22 100644 --- a/tox.ini +++ b/tox.ini @@ -10,6 +10,7 @@ envlist= commands= py.test --lsof -rfsxX {posargs:testing} passenv = USER USERNAME deps= + hypothesis nose mock requests From 08671fcf4adeb32cac4217db68d847df5bcd1f81 Mon Sep 17 00:00:00 2001 From: Ceridwen Date: Sat, 2 Apr 2016 10:52:28 -0400 Subject: [PATCH 5/5] Fix the changelog and dependencies for tox --- CHANGELOG.rst | 5 +++-- tox.ini | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0a1178ecc..7bbfb4b0e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -23,7 +23,7 @@ **Changes** -* Fix (`#1351 `_): +* Fix (`#1351`_): explicitly passed parametrize ids do not get escaped to ascii. Thanks `@ceridwen`_ for the PR. @@ -38,12 +38,13 @@ .. _@novas0x2a: https://github.com/novas0x2a .. _@kalekundert: https://github.com/kalekundert .. _@tareqalayan: https://github.com/tareqalayan +.. _@ceridwen: https://github.com/ceridwen .. _#1428: https://github.com/pytest-dev/pytest/pull/1428 .. _#1444: https://github.com/pytest-dev/pytest/pull/1444 .. _#1441: https://github.com/pytest-dev/pytest/pull/1441 .. _#1454: https://github.com/pytest-dev/pytest/pull/1454 - +.. _#1351: https://github.com/pytest-dev/pytest/issues/1351 2.9.2.dev1 ========== diff --git a/tox.ini b/tox.ini index 82fe34b22..0876f510c 100644 --- a/tox.ini +++ b/tox.ini @@ -18,6 +18,7 @@ deps= [testenv:py26] commands= py.test --lsof -rfsxX {posargs:testing} deps= + hypothesis<3.03 nose mock<1.1 # last supported version for py26 @@ -44,6 +45,7 @@ commands = flake8 pytest.py _pytest testing deps=pytest-xdist>=1.13 mock nose + hypothesis commands= py.test -n1 -rfsxX {posargs:testing} @@ -68,6 +70,7 @@ commands= [testenv:py27-nobyte] deps=pytest-xdist>=1.13 + hypothesis distribute=true setenv= PYTHONDONTWRITEBYTECODE=1