pathlib: make visit() independent of py.path.local, use os.scandir

`os.scandir()`, introduced in Python 3.5, is much faster than
`os.listdir()`. See https://www.python.org/dev/peps/pep-0471/.

It also has a `DirEntry` which can be used to further reduce syscalls in
some cases.
This commit is contained in:
Ran Benita 2020-07-05 23:11:47 +03:00
parent c15bb5d3de
commit 3633b691d8
4 changed files with 26 additions and 22 deletions

View File

@ -618,11 +618,13 @@ class Session(nodes.FSCollector):
assert not names, "invalid arg {!r}".format((argpath, names)) assert not names, "invalid arg {!r}".format((argpath, names))
seen_dirs = set() # type: Set[py.path.local] seen_dirs = set() # type: Set[py.path.local]
for path in visit(argpath, self._recurse): for direntry in visit(str(argpath), self._recurse):
if not path.check(file=1): if not direntry.is_file():
continue continue
path = py.path.local(direntry.path)
dirpath = path.dirpath() dirpath = path.dirpath()
if dirpath not in seen_dirs: if dirpath not in seen_dirs:
# Collect packages first. # Collect packages first.
seen_dirs.add(dirpath) seen_dirs.add(dirpath)

View File

@ -562,17 +562,18 @@ class FSCollector(Collector):
def gethookproxy(self, fspath: py.path.local): def gethookproxy(self, fspath: py.path.local):
raise NotImplementedError() raise NotImplementedError()
def _recurse(self, dirpath: py.path.local) -> bool: def _recurse(self, direntry: "os.DirEntry[str]") -> bool:
if dirpath.basename == "__pycache__": if direntry.name == "__pycache__":
return False return False
ihook = self._gethookproxy(dirpath.dirpath()) path = py.path.local(direntry.path)
if ihook.pytest_ignore_collect(path=dirpath, config=self.config): ihook = self._gethookproxy(path.dirpath())
if ihook.pytest_ignore_collect(path=path, config=self.config):
return False return False
for pat in self._norecursepatterns: for pat in self._norecursepatterns:
if dirpath.check(fnmatch=pat): if path.check(fnmatch=pat):
return False return False
ihook = self._gethookproxy(dirpath) ihook = self._gethookproxy(path)
ihook.pytest_collect_directory(path=dirpath, parent=self) ihook.pytest_collect_directory(path=path, parent=self)
return True return True
def isinitpath(self, path: py.path.local) -> bool: def isinitpath(self, path: py.path.local) -> bool:

View File

@ -560,14 +560,14 @@ def resolve_package_path(path: Path) -> Optional[Path]:
def visit( def visit(
path: py.path.local, recurse: Callable[[py.path.local], bool], path: str, recurse: Callable[["os.DirEntry[str]"], bool]
) -> Iterator[py.path.local]: ) -> Iterator["os.DirEntry[str]"]:
"""Walk path recursively, in breadth-first order. """Walk a directory recursively, in breadth-first order.
Entries at each directory level are sorted. Entries at each directory level are sorted.
""" """
entries = sorted(path.listdir()) entries = sorted(os.scandir(path), key=lambda entry: entry.name)
yield from entries yield from entries
for entry in entries: for entry in entries:
if entry.check(dir=1) and recurse(entry): if entry.is_dir(follow_symlinks=False) and recurse(entry):
yield from visit(entry, recurse) yield from visit(entry.path, recurse)

View File

@ -642,23 +642,24 @@ class Package(Module):
): ):
yield Module.from_parent(self, fspath=init_module) yield Module.from_parent(self, fspath=init_module)
pkg_prefixes = set() # type: Set[py.path.local] pkg_prefixes = set() # type: Set[py.path.local]
for path in visit(this_path, recurse=self._recurse): for direntry in visit(str(this_path), recurse=self._recurse):
path = py.path.local(direntry.path)
# We will visit our own __init__.py file, in which case we skip it. # We will visit our own __init__.py file, in which case we skip it.
is_file = path.isfile() if direntry.is_file():
if is_file: if direntry.name == "__init__.py" and path.dirpath() == this_path:
if path.basename == "__init__.py" and path.dirpath() == this_path:
continue continue
parts_ = parts(path.strpath) parts_ = parts(direntry.path)
if any( if any(
str(pkg_prefix) in parts_ and pkg_prefix.join("__init__.py") != path str(pkg_prefix) in parts_ and pkg_prefix.join("__init__.py") != path
for pkg_prefix in pkg_prefixes for pkg_prefix in pkg_prefixes
): ):
continue continue
if is_file: if direntry.is_file():
yield from self._collectfile(path) yield from self._collectfile(path)
elif not path.isdir(): elif not direntry.is_dir():
# Broken symlink or invalid/missing file. # Broken symlink or invalid/missing file.
continue continue
elif path.join("__init__.py").check(file=1): elif path.join("__init__.py").check(file=1):