197 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			197 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			Go
		
	
	
	
| // Copyright 2016 The Go Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| // Package fastwalk provides a faster version of filepath.Walk for file system
 | |
| // scanning tools.
 | |
| package fastwalk
 | |
| 
 | |
| import (
 | |
| 	"errors"
 | |
| 	"os"
 | |
| 	"path/filepath"
 | |
| 	"runtime"
 | |
| 	"sync"
 | |
| )
 | |
| 
 | |
| // TraverseLink is used as a return value from WalkFuncs to indicate that the
 | |
| // symlink named in the call may be traversed.
 | |
| var TraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory")
 | |
| 
 | |
| // SkipFiles is a used as a return value from WalkFuncs to indicate that the
 | |
| // callback should not be called for any other files in the current directory.
 | |
| // Child directories will still be traversed.
 | |
| var SkipFiles = errors.New("fastwalk: skip remaining files in directory")
 | |
| 
 | |
| // Walk is a faster implementation of filepath.Walk.
 | |
| //
 | |
| // filepath.Walk's design necessarily calls os.Lstat on each file,
 | |
| // even if the caller needs less info.
 | |
| // Many tools need only the type of each file.
 | |
| // On some platforms, this information is provided directly by the readdir
 | |
| // system call, avoiding the need to stat each file individually.
 | |
| // fastwalk_unix.go contains a fork of the syscall routines.
 | |
| //
 | |
| // See golang.org/issue/16399
 | |
| //
 | |
| // Walk walks the file tree rooted at root, calling walkFn for
 | |
| // each file or directory in the tree, including root.
 | |
| //
 | |
| // If fastWalk returns filepath.SkipDir, the directory is skipped.
 | |
| //
 | |
| // Unlike filepath.Walk:
 | |
| //   * file stat calls must be done by the user.
 | |
| //     The only provided metadata is the file type, which does not include
 | |
| //     any permission bits.
 | |
| //   * multiple goroutines stat the filesystem concurrently. The provided
 | |
| //     walkFn must be safe for concurrent use.
 | |
| //   * fastWalk can follow symlinks if walkFn returns the TraverseLink
 | |
| //     sentinel error. It is the walkFn's responsibility to prevent
 | |
| //     fastWalk from going into symlink cycles.
 | |
| func Walk(root string, walkFn func(path string, typ os.FileMode) error) error {
 | |
| 	// TODO(bradfitz): make numWorkers configurable? We used a
 | |
| 	// minimum of 4 to give the kernel more info about multiple
 | |
| 	// things we want, in hopes its I/O scheduling can take
 | |
| 	// advantage of that. Hopefully most are in cache. Maybe 4 is
 | |
| 	// even too low of a minimum. Profile more.
 | |
| 	numWorkers := 4
 | |
| 	if n := runtime.NumCPU(); n > numWorkers {
 | |
| 		numWorkers = n
 | |
| 	}
 | |
| 
 | |
| 	// Make sure to wait for all workers to finish, otherwise
 | |
| 	// walkFn could still be called after returning. This Wait call
 | |
| 	// runs after close(e.donec) below.
 | |
| 	var wg sync.WaitGroup
 | |
| 	defer wg.Wait()
 | |
| 
 | |
| 	w := &walker{
 | |
| 		fn:       walkFn,
 | |
| 		enqueuec: make(chan walkItem, numWorkers), // buffered for performance
 | |
| 		workc:    make(chan walkItem, numWorkers), // buffered for performance
 | |
| 		donec:    make(chan struct{}),
 | |
| 
 | |
| 		// buffered for correctness & not leaking goroutines:
 | |
| 		resc: make(chan error, numWorkers),
 | |
| 	}
 | |
| 	defer close(w.donec)
 | |
| 
 | |
| 	for i := 0; i < numWorkers; i++ {
 | |
| 		wg.Add(1)
 | |
| 		go w.doWork(&wg)
 | |
| 	}
 | |
| 	todo := []walkItem{{dir: root}}
 | |
| 	out := 0
 | |
| 	for {
 | |
| 		workc := w.workc
 | |
| 		var workItem walkItem
 | |
| 		if len(todo) == 0 {
 | |
| 			workc = nil
 | |
| 		} else {
 | |
| 			workItem = todo[len(todo)-1]
 | |
| 		}
 | |
| 		select {
 | |
| 		case workc <- workItem:
 | |
| 			todo = todo[:len(todo)-1]
 | |
| 			out++
 | |
| 		case it := <-w.enqueuec:
 | |
| 			todo = append(todo, it)
 | |
| 		case err := <-w.resc:
 | |
| 			out--
 | |
| 			if err != nil {
 | |
| 				return err
 | |
| 			}
 | |
| 			if out == 0 && len(todo) == 0 {
 | |
| 				// It's safe to quit here, as long as the buffered
 | |
| 				// enqueue channel isn't also readable, which might
 | |
| 				// happen if the worker sends both another unit of
 | |
| 				// work and its result before the other select was
 | |
| 				// scheduled and both w.resc and w.enqueuec were
 | |
| 				// readable.
 | |
| 				select {
 | |
| 				case it := <-w.enqueuec:
 | |
| 					todo = append(todo, it)
 | |
| 				default:
 | |
| 					return nil
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // doWork reads directories as instructed (via workc) and runs the
 | |
| // user's callback function.
 | |
| func (w *walker) doWork(wg *sync.WaitGroup) {
 | |
| 	defer wg.Done()
 | |
| 	for {
 | |
| 		select {
 | |
| 		case <-w.donec:
 | |
| 			return
 | |
| 		case it := <-w.workc:
 | |
| 			select {
 | |
| 			case <-w.donec:
 | |
| 				return
 | |
| 			case w.resc <- w.walk(it.dir, !it.callbackDone):
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| type walker struct {
 | |
| 	fn func(path string, typ os.FileMode) error
 | |
| 
 | |
| 	donec    chan struct{} // closed on fastWalk's return
 | |
| 	workc    chan walkItem // to workers
 | |
| 	enqueuec chan walkItem // from workers
 | |
| 	resc     chan error    // from workers
 | |
| }
 | |
| 
 | |
| type walkItem struct {
 | |
| 	dir          string
 | |
| 	callbackDone bool // callback already called; don't do it again
 | |
| }
 | |
| 
 | |
| func (w *walker) enqueue(it walkItem) {
 | |
| 	select {
 | |
| 	case w.enqueuec <- it:
 | |
| 	case <-w.donec:
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error {
 | |
| 	joined := dirName + string(os.PathSeparator) + baseName
 | |
| 	if typ == os.ModeDir {
 | |
| 		w.enqueue(walkItem{dir: joined})
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	err := w.fn(joined, typ)
 | |
| 	if typ == os.ModeSymlink {
 | |
| 		if err == TraverseLink {
 | |
| 			// Set callbackDone so we don't call it twice for both the
 | |
| 			// symlink-as-symlink and the symlink-as-directory later:
 | |
| 			w.enqueue(walkItem{dir: joined, callbackDone: true})
 | |
| 			return nil
 | |
| 		}
 | |
| 		if err == filepath.SkipDir {
 | |
| 			// Permit SkipDir on symlinks too.
 | |
| 			return nil
 | |
| 		}
 | |
| 	}
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| func (w *walker) walk(root string, runUserCallback bool) error {
 | |
| 	if runUserCallback {
 | |
| 		err := w.fn(root, os.ModeDir)
 | |
| 		if err == filepath.SkipDir {
 | |
| 			return nil
 | |
| 		}
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return readDir(root, w.onDirEnt)
 | |
| }
 |