From 825ea9bfa1784da8444e7d89b84dd3e083730707 Mon Sep 17 00:00:00 2001 From: Floris Bruynooghe Date: Wed, 29 Jan 2014 00:42:58 +0000 Subject: [PATCH] Fix assertrepr for mojibake If the compared text was in bytes and not actually valid text (i.e. could not be encoded to text/unicode using the default encoding) then the assertrepr would fail with an EncodingError. This ensures that the internal string is always valid unicode, converting any bytes safely to valid unicode. This is done using repr() which then needs post-processing to fix the encompassing quotes and un-escape newlines. This fixes issue 429. --- _pytest/assertion/util.py | 8 +++++++- testing/test_assertion.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/_pytest/assertion/util.py b/_pytest/assertion/util.py index 258fee8a8..13a31a4a9 100644 --- a/_pytest/assertion/util.py +++ b/_pytest/assertion/util.py @@ -162,12 +162,18 @@ def assertrepr_compare(config, op, left, right): def _diff_text(left, right, verbose=False): - """Return the explanation for the diff between text + """Return the explanation for the diff between text or bytes Unless --verbose is used this will skip leading and trailing characters which are identical to keep the diff minimal. + + If the input are bytes they will be safely converted to text. """ explanation = [] + if isinstance(left, py.builtin.bytes): + left = u(repr(left)[1:-1]).replace(r'\n', '\n') + if isinstance(right, py.builtin.bytes): + right = u(repr(right)[1:-1]).replace(r'\n', '\n') if not verbose: i = 0 # just in case left or right has zero length for i in range(min(len(left), len(right))): diff --git a/testing/test_assertion.py b/testing/test_assertion.py index fbe44eb73..abd745415 100644 --- a/testing/test_assertion.py +++ b/testing/test_assertion.py @@ -185,6 +185,19 @@ class TestAssert_reprcompare: assert expl[1] == py.builtin._totext('- £€', 'utf-8') assert expl[2] == py.builtin._totext('+ £', 'utf-8') + def test_mojibake(self): + # issue 429 + left = 'e' + right = '\xc3\xa9' + if not isinstance(left, py.builtin.bytes): + left = py.builtin.bytes(left, 'utf-8') + right = py.builtin.bytes(right, 'utf-8') + expl = callequal(left, right) + for line in expl: + assert isinstance(line, py.builtin.text) + msg = py.builtin._totext('\n').join(expl) + assert msg + def test_python25_compile_issue257(testdir): testdir.makepyfile("""