Improve our own wcwidth implementation and remove dependency on wcwidth package
`TerminalWriter`, imported recently from `py`, contains its own incomplete wcwidth (`char_with`/`get_line_width`) implementation. The `TerminalReporter` also needs this, but uses the external `wcwidth` package. This commit brings the `TerminalWriter` implementation up-to-par with `wcwidth`, moves to implementation to a new file `_pytest._io.wcwidth` which is used everywhere, and removes the dependency. The differences compared to the `wcwidth` package are: - Normalizes the string before counting. - Uses Python's `unicodedata` instead of vendored Unicode tables. This means the data corresponds to the Python's version Unicode version instead of the `wcwidth`'s package version. - Apply some optimizations.
This commit is contained in:
		
							parent
							
								
									54ae27f081
								
							
						
					
					
						commit
						aca534c67d
					
				|  | @ -0,0 +1 @@ | ||||||
|  | The dependency on the ``wcwidth`` package has been removed. | ||||||
							
								
								
									
										1
									
								
								setup.py
								
								
								
								
							
							
						
						
									
										1
									
								
								setup.py
								
								
								
								
							|  | @ -12,7 +12,6 @@ INSTALL_REQUIRES = [ | ||||||
|     'colorama;sys_platform=="win32"', |     'colorama;sys_platform=="win32"', | ||||||
|     "pluggy>=0.12,<1.0", |     "pluggy>=0.12,<1.0", | ||||||
|     'importlib-metadata>=0.12;python_version<"3.8"', |     'importlib-metadata>=0.12;python_version<"3.8"', | ||||||
|     "wcwidth", |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -2,12 +2,12 @@ | ||||||
| import os | import os | ||||||
| import shutil | import shutil | ||||||
| import sys | import sys | ||||||
| import unicodedata |  | ||||||
| from functools import lru_cache |  | ||||||
| from typing import Optional | from typing import Optional | ||||||
| from typing import Sequence | from typing import Sequence | ||||||
| from typing import TextIO | from typing import TextIO | ||||||
| 
 | 
 | ||||||
|  | from .wcwidth import wcswidth | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| # This code was initially copied from py 1.8.1, file _io/terminalwriter.py. | # This code was initially copied from py 1.8.1, file _io/terminalwriter.py. | ||||||
| 
 | 
 | ||||||
|  | @ -22,17 +22,6 @@ def get_terminal_width() -> int: | ||||||
|     return width |     return width | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @lru_cache(100) |  | ||||||
| def char_width(c: str) -> int: |  | ||||||
|     # Fullwidth and Wide -> 2, all else (including Ambiguous) -> 1. |  | ||||||
|     return 2 if unicodedata.east_asian_width(c) in ("F", "W") else 1 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def get_line_width(text: str) -> int: |  | ||||||
|     text = unicodedata.normalize("NFC", text) |  | ||||||
|     return sum(char_width(c) for c in text) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def should_do_markup(file: TextIO) -> bool: | def should_do_markup(file: TextIO) -> bool: | ||||||
|     if os.environ.get("PY_COLORS") == "1": |     if os.environ.get("PY_COLORS") == "1": | ||||||
|         return True |         return True | ||||||
|  | @ -99,7 +88,7 @@ class TerminalWriter: | ||||||
|     @property |     @property | ||||||
|     def width_of_current_line(self) -> int: |     def width_of_current_line(self) -> int: | ||||||
|         """Return an estimate of the width so far in the current line.""" |         """Return an estimate of the width so far in the current line.""" | ||||||
|         return get_line_width(self._current_line) |         return wcswidth(self._current_line) | ||||||
| 
 | 
 | ||||||
|     def markup(self, text: str, **markup: bool) -> str: |     def markup(self, text: str, **markup: bool) -> str: | ||||||
|         for name in markup: |         for name in markup: | ||||||
|  |  | ||||||
|  | @ -0,0 +1,55 @@ | ||||||
|  | import unicodedata | ||||||
|  | from functools import lru_cache | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @lru_cache(100) | ||||||
|  | def wcwidth(c: str) -> int: | ||||||
|  |     """Determine how many columns are needed to display a character in a terminal. | ||||||
|  | 
 | ||||||
|  |     Returns -1 if the character is not printable. | ||||||
|  |     Returns 0, 1 or 2 for other characters. | ||||||
|  |     """ | ||||||
|  |     o = ord(c) | ||||||
|  | 
 | ||||||
|  |     # ASCII fast path. | ||||||
|  |     if 0x20 <= o < 0x07F: | ||||||
|  |         return 1 | ||||||
|  | 
 | ||||||
|  |     # Some Cf/Zp/Zl characters which should be zero-width. | ||||||
|  |     if ( | ||||||
|  |         o == 0x0000 | ||||||
|  |         or 0x200B <= o <= 0x200F | ||||||
|  |         or 0x2028 <= o <= 0x202E | ||||||
|  |         or 0x2060 <= o <= 0x2063 | ||||||
|  |     ): | ||||||
|  |         return 0 | ||||||
|  | 
 | ||||||
|  |     category = unicodedata.category(c) | ||||||
|  | 
 | ||||||
|  |     # Control characters. | ||||||
|  |     if category == "Cc": | ||||||
|  |         return -1 | ||||||
|  | 
 | ||||||
|  |     # Combining characters with zero width. | ||||||
|  |     if category in ("Me", "Mn"): | ||||||
|  |         return 0 | ||||||
|  | 
 | ||||||
|  |     # Full/Wide east asian characters. | ||||||
|  |     if unicodedata.east_asian_width(c) in ("F", "W"): | ||||||
|  |         return 2 | ||||||
|  | 
 | ||||||
|  |     return 1 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def wcswidth(s: str) -> int: | ||||||
|  |     """Determine how many columns are needed to display a string in a terminal. | ||||||
|  | 
 | ||||||
|  |     Returns -1 if the string contains non-printable characters. | ||||||
|  |     """ | ||||||
|  |     width = 0 | ||||||
|  |     for c in unicodedata.normalize("NFC", s): | ||||||
|  |         wc = wcwidth(c) | ||||||
|  |         if wc < 0: | ||||||
|  |             return -1 | ||||||
|  |         width += wc | ||||||
|  |     return width | ||||||
|  | @ -27,6 +27,7 @@ from more_itertools import collapse | ||||||
| import pytest | import pytest | ||||||
| from _pytest import nodes | from _pytest import nodes | ||||||
| from _pytest._io import TerminalWriter | from _pytest._io import TerminalWriter | ||||||
|  | from _pytest._io.wcwidth import wcswidth | ||||||
| from _pytest.compat import order_preserving_dict | from _pytest.compat import order_preserving_dict | ||||||
| from _pytest.config import Config | from _pytest.config import Config | ||||||
| from _pytest.config import ExitCode | from _pytest.config import ExitCode | ||||||
|  | @ -1122,8 +1123,6 @@ def _get_pos(config, rep): | ||||||
| 
 | 
 | ||||||
| def _get_line_with_reprcrash_message(config, rep, termwidth): | def _get_line_with_reprcrash_message(config, rep, termwidth): | ||||||
|     """Get summary line for a report, trying to add reprcrash message.""" |     """Get summary line for a report, trying to add reprcrash message.""" | ||||||
|     from wcwidth import wcswidth |  | ||||||
| 
 |  | ||||||
|     verbose_word = rep._get_verbose_word(config) |     verbose_word = rep._get_verbose_word(config) | ||||||
|     pos = _get_pos(config, rep) |     pos = _get_pos(config, rep) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -0,0 +1,38 @@ | ||||||
|  | import pytest | ||||||
|  | from _pytest._io.wcwidth import wcswidth | ||||||
|  | from _pytest._io.wcwidth import wcwidth | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @pytest.mark.parametrize( | ||||||
|  |     ("c", "expected"), | ||||||
|  |     [ | ||||||
|  |         ("\0", 0), | ||||||
|  |         ("\n", -1), | ||||||
|  |         ("a", 1), | ||||||
|  |         ("1", 1), | ||||||
|  |         ("א", 1), | ||||||
|  |         ("\u200B", 0), | ||||||
|  |         ("\u1ABE", 0), | ||||||
|  |         ("\u0591", 0), | ||||||
|  |         ("🉐", 2), | ||||||
|  |         ("$", 2), | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | def test_wcwidth(c: str, expected: int) -> None: | ||||||
|  |     assert wcwidth(c) == expected | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @pytest.mark.parametrize( | ||||||
|  |     ("s", "expected"), | ||||||
|  |     [ | ||||||
|  |         ("", 0), | ||||||
|  |         ("hello, world!", 13), | ||||||
|  |         ("hello, world!\n", -1), | ||||||
|  |         ("0123456789", 10), | ||||||
|  |         ("שלום, עולם!", 11), | ||||||
|  |         ("שְבֻעָיים", 6), | ||||||
|  |         ("🉐🉐🉐", 6), | ||||||
|  |     ], | ||||||
|  | ) | ||||||
|  | def test_wcswidth(s: str, expected: int) -> None: | ||||||
|  |     assert wcswidth(s) == expected | ||||||
|  | @ -14,7 +14,9 @@ import pluggy | ||||||
| import py | import py | ||||||
| 
 | 
 | ||||||
| import _pytest.config | import _pytest.config | ||||||
|  | import _pytest.terminal | ||||||
| import pytest | import pytest | ||||||
|  | from _pytest._io.wcwidth import wcswidth | ||||||
| from _pytest.config import ExitCode | from _pytest.config import ExitCode | ||||||
| from _pytest.pytester import Testdir | from _pytest.pytester import Testdir | ||||||
| from _pytest.reports import BaseReport | from _pytest.reports import BaseReport | ||||||
|  | @ -2027,9 +2029,6 @@ def test_skip_reasons_folding(): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_line_with_reprcrash(monkeypatch): | def test_line_with_reprcrash(monkeypatch): | ||||||
|     import _pytest.terminal |  | ||||||
|     from wcwidth import wcswidth |  | ||||||
| 
 |  | ||||||
|     mocked_verbose_word = "FAILED" |     mocked_verbose_word = "FAILED" | ||||||
| 
 | 
 | ||||||
|     mocked_pos = "some::nodeid" |     mocked_pos = "some::nodeid" | ||||||
|  | @ -2079,19 +2078,19 @@ def test_line_with_reprcrash(monkeypatch): | ||||||
|     check("some\nmessage", 80, "FAILED some::nodeid - some") |     check("some\nmessage", 80, "FAILED some::nodeid - some") | ||||||
| 
 | 
 | ||||||
|     # Test unicode safety. |     # Test unicode safety. | ||||||
|     check("😄😄😄😄😄\n2nd line", 25, "FAILED some::nodeid - ...") |     check("🉐🉐🉐🉐🉐\n2nd line", 25, "FAILED some::nodeid - ...") | ||||||
|     check("😄😄😄😄😄\n2nd line", 26, "FAILED some::nodeid - ...") |     check("🉐🉐🉐🉐🉐\n2nd line", 26, "FAILED some::nodeid - ...") | ||||||
|     check("😄😄😄😄😄\n2nd line", 27, "FAILED some::nodeid - 😄...") |     check("🉐🉐🉐🉐🉐\n2nd line", 27, "FAILED some::nodeid - 🉐...") | ||||||
|     check("😄😄😄😄😄\n2nd line", 28, "FAILED some::nodeid - 😄...") |     check("🉐🉐🉐🉐🉐\n2nd line", 28, "FAILED some::nodeid - 🉐...") | ||||||
|     check("😄😄😄😄😄\n2nd line", 29, "FAILED some::nodeid - 😄😄...") |     check("🉐🉐🉐🉐🉐\n2nd line", 29, "FAILED some::nodeid - 🉐🉐...") | ||||||
| 
 | 
 | ||||||
|     # NOTE: constructed, not sure if this is supported. |     # NOTE: constructed, not sure if this is supported. | ||||||
|     mocked_pos = "nodeid::😄::withunicode" |     mocked_pos = "nodeid::🉐::withunicode" | ||||||
|     check("😄😄😄😄😄\n2nd line", 29, "FAILED nodeid::😄::withunicode") |     check("🉐🉐🉐🉐🉐\n2nd line", 29, "FAILED nodeid::🉐::withunicode") | ||||||
|     check("😄😄😄😄😄\n2nd line", 40, "FAILED nodeid::😄::withunicode - 😄😄...") |     check("🉐🉐🉐🉐🉐\n2nd line", 40, "FAILED nodeid::🉐::withunicode - 🉐🉐...") | ||||||
|     check("😄😄😄😄😄\n2nd line", 41, "FAILED nodeid::😄::withunicode - 😄😄...") |     check("🉐🉐🉐🉐🉐\n2nd line", 41, "FAILED nodeid::🉐::withunicode - 🉐🉐...") | ||||||
|     check("😄😄😄😄😄\n2nd line", 42, "FAILED nodeid::😄::withunicode - 😄😄😄...") |     check("🉐🉐🉐🉐🉐\n2nd line", 42, "FAILED nodeid::🉐::withunicode - 🉐🉐🉐...") | ||||||
|     check("😄😄😄😄😄\n2nd line", 80, "FAILED nodeid::😄::withunicode - 😄😄😄😄😄") |     check("🉐🉐🉐🉐🉐\n2nd line", 80, "FAILED nodeid::🉐::withunicode - 🉐🉐🉐🉐🉐") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue