pathlib: make visit() independent of py.path.local, use os.scandir
`os.scandir()`, introduced in Python 3.5, is much faster than `os.listdir()`. See https://www.python.org/dev/peps/pep-0471/. It also has a `DirEntry` which can be used to further reduce syscalls in some cases.
This commit is contained in:
		
							parent
							
								
									c15bb5d3de
								
							
						
					
					
						commit
						3633b691d8
					
				| 
						 | 
				
			
			@ -618,11 +618,13 @@ class Session(nodes.FSCollector):
 | 
			
		|||
            assert not names, "invalid arg {!r}".format((argpath, names))
 | 
			
		||||
 | 
			
		||||
            seen_dirs = set()  # type: Set[py.path.local]
 | 
			
		||||
            for path in visit(argpath, self._recurse):
 | 
			
		||||
                if not path.check(file=1):
 | 
			
		||||
            for direntry in visit(str(argpath), self._recurse):
 | 
			
		||||
                if not direntry.is_file():
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
                path = py.path.local(direntry.path)
 | 
			
		||||
                dirpath = path.dirpath()
 | 
			
		||||
 | 
			
		||||
                if dirpath not in seen_dirs:
 | 
			
		||||
                    # Collect packages first.
 | 
			
		||||
                    seen_dirs.add(dirpath)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -562,17 +562,18 @@ class FSCollector(Collector):
 | 
			
		|||
    def gethookproxy(self, fspath: py.path.local):
 | 
			
		||||
        raise NotImplementedError()
 | 
			
		||||
 | 
			
		||||
    def _recurse(self, dirpath: py.path.local) -> bool:
 | 
			
		||||
        if dirpath.basename == "__pycache__":
 | 
			
		||||
    def _recurse(self, direntry: "os.DirEntry[str]") -> bool:
 | 
			
		||||
        if direntry.name == "__pycache__":
 | 
			
		||||
            return False
 | 
			
		||||
        ihook = self._gethookproxy(dirpath.dirpath())
 | 
			
		||||
        if ihook.pytest_ignore_collect(path=dirpath, config=self.config):
 | 
			
		||||
        path = py.path.local(direntry.path)
 | 
			
		||||
        ihook = self._gethookproxy(path.dirpath())
 | 
			
		||||
        if ihook.pytest_ignore_collect(path=path, config=self.config):
 | 
			
		||||
            return False
 | 
			
		||||
        for pat in self._norecursepatterns:
 | 
			
		||||
            if dirpath.check(fnmatch=pat):
 | 
			
		||||
            if path.check(fnmatch=pat):
 | 
			
		||||
                return False
 | 
			
		||||
        ihook = self._gethookproxy(dirpath)
 | 
			
		||||
        ihook.pytest_collect_directory(path=dirpath, parent=self)
 | 
			
		||||
        ihook = self._gethookproxy(path)
 | 
			
		||||
        ihook.pytest_collect_directory(path=path, parent=self)
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def isinitpath(self, path: py.path.local) -> bool:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -560,14 +560,14 @@ def resolve_package_path(path: Path) -> Optional[Path]:
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
def visit(
 | 
			
		||||
    path: py.path.local, recurse: Callable[[py.path.local], bool],
 | 
			
		||||
) -> Iterator[py.path.local]:
 | 
			
		||||
    """Walk path recursively, in breadth-first order.
 | 
			
		||||
    path: str, recurse: Callable[["os.DirEntry[str]"], bool]
 | 
			
		||||
) -> Iterator["os.DirEntry[str]"]:
 | 
			
		||||
    """Walk a directory recursively, in breadth-first order.
 | 
			
		||||
 | 
			
		||||
    Entries at each directory level are sorted.
 | 
			
		||||
    """
 | 
			
		||||
    entries = sorted(path.listdir())
 | 
			
		||||
    entries = sorted(os.scandir(path), key=lambda entry: entry.name)
 | 
			
		||||
    yield from entries
 | 
			
		||||
    for entry in entries:
 | 
			
		||||
        if entry.check(dir=1) and recurse(entry):
 | 
			
		||||
            yield from visit(entry, recurse)
 | 
			
		||||
        if entry.is_dir(follow_symlinks=False) and recurse(entry):
 | 
			
		||||
            yield from visit(entry.path, recurse)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -642,23 +642,24 @@ class Package(Module):
 | 
			
		|||
        ):
 | 
			
		||||
            yield Module.from_parent(self, fspath=init_module)
 | 
			
		||||
        pkg_prefixes = set()  # type: Set[py.path.local]
 | 
			
		||||
        for path in visit(this_path, recurse=self._recurse):
 | 
			
		||||
        for direntry in visit(str(this_path), recurse=self._recurse):
 | 
			
		||||
            path = py.path.local(direntry.path)
 | 
			
		||||
 | 
			
		||||
            # We will visit our own __init__.py file, in which case we skip it.
 | 
			
		||||
            is_file = path.isfile()
 | 
			
		||||
            if is_file:
 | 
			
		||||
                if path.basename == "__init__.py" and path.dirpath() == this_path:
 | 
			
		||||
            if direntry.is_file():
 | 
			
		||||
                if direntry.name == "__init__.py" and path.dirpath() == this_path:
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
            parts_ = parts(path.strpath)
 | 
			
		||||
            parts_ = parts(direntry.path)
 | 
			
		||||
            if any(
 | 
			
		||||
                str(pkg_prefix) in parts_ and pkg_prefix.join("__init__.py") != path
 | 
			
		||||
                for pkg_prefix in pkg_prefixes
 | 
			
		||||
            ):
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            if is_file:
 | 
			
		||||
            if direntry.is_file():
 | 
			
		||||
                yield from self._collectfile(path)
 | 
			
		||||
            elif not path.isdir():
 | 
			
		||||
            elif not direntry.is_dir():
 | 
			
		||||
                # Broken symlink or invalid/missing file.
 | 
			
		||||
                continue
 | 
			
		||||
            elif path.join("__init__.py").check(file=1):
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue