godoc: feed indexer concurrently, add selective indexing hook, tests

On big corpuses, the indexer was spending most of its time waiting
for filesystem operations (especially with network filesystems)
and not actually indexing.  This keeps the filesystem busy and indexer
running in different goroutines.

Also, add a hook to let godoc hosts disable indexing of certain
directories.

And finally, start adding tests for godoc, which required
fleshing out (and testing) the mapfs code.

R=golang-dev, adg, bgarcia
CC=golang-dev
https://golang.org/cl/21520045
This commit is contained in:
Brad Fitzpatrick 2013-11-05 09:35:58 -05:00
parent 2afbb1cd5f
commit 964f0f559c
5 changed files with 352 additions and 51 deletions

View File

@ -32,6 +32,11 @@ type Corpus struct {
// order. // order.
IndexFiles string IndexFiles string
// IndexThrottle specifies the indexing throttle value
// between 0.0 and 1.0. At 0.0, the indexer always sleeps.
// At 1.0, the indexer never sleeps. Because 0.0 is useless
// and redundant with setting IndexEnabled to false, the
// zero value for IndexThrottle means 0.9.
IndexThrottle float64 IndexThrottle float64
// MaxResults optionally specifies the maximum results for indexing. // MaxResults optionally specifies the maximum results for indexing.
@ -50,6 +55,13 @@ type Corpus struct {
// package listing. // package listing.
SummarizePackage func(pkg string) (summary string, showList, ok bool) SummarizePackage func(pkg string) (summary string, showList, ok bool)
// IndexDirectory optionally specifies a function to determine
// whether the provided directory should be indexed. The dir
// will be of the form "/src/cmd/6a", "/doc/play",
// "/src/pkg/io", etc.
// If nil, all directories are indexed if indexing is enabled.
IndexDirectory func(dir string) bool
testDir string // TODO(bradfitz,adg): migrate old godoc flag? looks unused. testDir string // TODO(bradfitz,adg): migrate old godoc flag? looks unused.
// Send a value on this channel to trigger a metadata refresh. // Send a value on this channel to trigger a metadata refresh.

View File

@ -56,10 +56,12 @@ import (
"runtime" "runtime"
"sort" "sort"
"strings" "strings"
"sync"
"time" "time"
"unicode" "unicode"
"code.google.com/p/go.tools/godoc/util" "code.google.com/p/go.tools/godoc/util"
"code.google.com/p/go.tools/godoc/vfs"
) )
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
@ -371,8 +373,11 @@ type Statistics struct {
// interface for walking file trees, and the ast.Visitor interface for // interface for walking file trees, and the ast.Visitor interface for
// walking Go ASTs. // walking Go ASTs.
type Indexer struct { type Indexer struct {
c *Corpus c *Corpus
fset *token.FileSet // file set for all indexed files fset *token.FileSet // file set for all indexed files
fsOpenGate chan bool // send pre fs.Open; receive on close
mu sync.Mutex // guards all the following
sources bytes.Buffer // concatenated sources sources bytes.Buffer // concatenated sources
packages map[string]*Pak // map of canonicalized *Paks packages map[string]*Pak // map of canonicalized *Paks
words map[string]*IndexResult // RunLists of Spots words map[string]*IndexResult // RunLists of Spots
@ -381,6 +386,7 @@ type Indexer struct {
file *File // AST for current file file *File // AST for current file
decl ast.Decl // AST for current decl decl ast.Decl // AST for current decl
stats Statistics stats Statistics
throttle *util.Throttle
} }
func (x *Indexer) lookupPackage(path, name string) *Pak { func (x *Indexer) lookupPackage(path, name string) *Pak {
@ -535,12 +541,7 @@ func pkgName(filename string) string {
// addFile adds a file to the index if possible and returns the file set file // addFile adds a file to the index if possible and returns the file set file
// and the file's AST if it was successfully parsed as a Go file. If addFile // and the file's AST if it was successfully parsed as a Go file. If addFile
// failed (that is, if the file was not added), it returns file == nil. // failed (that is, if the file was not added), it returns file == nil.
func (x *Indexer) addFile(filename string, goFile bool) (file *token.File, ast *ast.File) { func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) {
// open file
f, err := x.c.fs.Open(filename)
if err != nil {
return
}
defer f.Close() defer f.Close()
// The file set's base offset and x.sources size must be in lock-step; // The file set's base offset and x.sources size must be in lock-step;
@ -641,17 +642,17 @@ func isWhitelisted(filename string) bool {
return whitelisted[key] return whitelisted[key]
} }
func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) { func (x *Indexer) visitFile(dirname string, fi os.FileInfo, fulltextIndex bool) {
if f.IsDir() { if fi.IsDir() {
return return
} }
filename := pathpkg.Join(dirname, f.Name()) filename := pathpkg.Join(dirname, fi.Name())
goFile := false goFile := false
switch { switch {
case isGoFile(f): case isGoFile(fi):
if !includeTestFiles && (!isPkgFile(f) || strings.HasPrefix(filename, "test/")) { if !includeTestFiles && (!isPkgFile(fi) || strings.HasPrefix(filename, "test/")) {
return return
} }
if !includeMainPackages && pkgName(filename) == "main" { if !includeMainPackages && pkgName(filename) == "main" {
@ -659,11 +660,25 @@ func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) {
} }
goFile = true goFile = true
case !fulltextIndex || !isWhitelisted(f.Name()): case !fulltextIndex || !isWhitelisted(fi.Name()):
return return
} }
file, fast := x.addFile(filename, goFile) x.fsOpenGate <- true
defer func() { <-x.fsOpenGate }()
// open file
f, err := x.c.fs.Open(filename)
if err != nil {
return
}
x.mu.Lock()
defer x.mu.Unlock()
x.throttle.Throttle()
file, fast := x.addFile(f, filename, goFile)
if file == nil { if file == nil {
return // addFile failed return // addFile failed
} }
@ -672,7 +687,7 @@ func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) {
// we've got a Go file to index // we've got a Go file to index
x.current = file x.current = file
pak := x.lookupPackage(dirname, fast.Name.Name) pak := x.lookupPackage(dirname, fast.Name.Name)
x.file = &File{f.Name(), pak} x.file = &File{fi.Name(), pak}
ast.Walk(x, fast) ast.Walk(x, fast)
} }
@ -701,33 +716,59 @@ type Index struct {
func canonical(w string) string { return strings.ToLower(w) } func canonical(w string) string { return strings.ToLower(w) }
// Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems
// consuming file descriptors, where some systems have low 256 or 512 limits.
// Go should have a built-in way to cap fd usage under the ulimit.
const (
maxOpenFiles = 200
maxOpenDirs = 50
)
// NewIndex creates a new index for the .go files // NewIndex creates a new index for the .go files
// in the directories given by dirnames. // in the directories given by dirnames.
// // The throttle parameter specifies a value between 0.0 and 1.0 that controls
// artificial sleeping. If 0.0, the indexer always sleeps. If 1.0, the indexer
// never sleeps.
func NewIndex(c *Corpus, dirnames <-chan string, fulltextIndex bool, throttle float64) *Index { func NewIndex(c *Corpus, dirnames <-chan string, fulltextIndex bool, throttle float64) *Index {
var x Indexer
th := util.NewThrottle(throttle, 100*time.Millisecond) // run at least 0.1s at a time
// initialize Indexer // initialize Indexer
// (use some reasonably sized maps to start) // (use some reasonably sized maps to start)
x.c = c x := &Indexer{
x.fset = token.NewFileSet() c: c,
x.packages = make(map[string]*Pak, 256) fset: token.NewFileSet(),
x.words = make(map[string]*IndexResult, 8192) fsOpenGate: make(chan bool, maxOpenFiles),
packages: make(map[string]*Pak, 256),
words: make(map[string]*IndexResult, 8192),
throttle: util.NewThrottle(throttle, 100*time.Millisecond), // run at least 0.1s at a time
}
// index all files in the directories given by dirnames // index all files in the directories given by dirnames
var wg sync.WaitGroup // outstanding ReadDir + visitFile
dirGate := make(chan bool, maxOpenDirs)
for dirname := range dirnames { for dirname := range dirnames {
list, err := c.fs.ReadDir(dirname) if c.IndexDirectory != nil && !c.IndexDirectory(dirname) {
if err != nil { continue
continue // ignore this directory
} }
for _, f := range list { dirGate <- true
if !f.IsDir() { wg.Add(1)
x.visitFile(dirname, f, fulltextIndex) go func(dirname string) {
defer func() { <-dirGate }()
defer wg.Done()
list, err := c.fs.ReadDir(dirname)
if err != nil {
log.Printf("ReadDir(%q): %v; skipping directory", dirname, err)
return // ignore this directory
} }
th.Throttle() for _, fi := range list {
} wg.Add(1)
go func(fi os.FileInfo) {
defer wg.Done()
x.visitFile(dirname, fi, fulltextIndex)
}(fi)
}
}(dirname)
} }
wg.Wait()
if !fulltextIndex { if !fulltextIndex {
// the file set, the current file, and the sources are // the file set, the current file, and the sources are
@ -751,7 +792,7 @@ func NewIndex(c *Corpus, dirnames <-chan string, fulltextIndex bool, throttle fl
Others: others, Others: others,
} }
wlist = append(wlist, &wordPair{canonical(w), w}) wlist = append(wlist, &wordPair{canonical(w), w})
th.Throttle() x.throttle.Throttle()
} }
x.stats.Words = len(words) x.stats.Words = len(words)
@ -1094,7 +1135,13 @@ func (c *Corpus) UpdateIndex() {
log.Printf("updating index...") log.Printf("updating index...")
} }
start := time.Now() start := time.Now()
index := NewIndex(c, c.fsDirnames(), c.MaxResults > 0, c.IndexThrottle) throttle := c.IndexThrottle
if throttle <= 0 {
throttle = 0.9
} else if throttle > 1.0 {
throttle = 1.0
}
index := NewIndex(c, c.fsDirnames(), c.MaxResults > 0, throttle)
stop := time.Now() stop := time.Now()
c.searchIndex.Set(index) c.searchIndex.Set(index)
if c.Verbose { if c.Verbose {
@ -1105,10 +1152,14 @@ func (c *Corpus) UpdateIndex() {
} }
memstats := new(runtime.MemStats) memstats := new(runtime.MemStats)
runtime.ReadMemStats(memstats) runtime.ReadMemStats(memstats)
log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) if c.Verbose {
log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
}
runtime.GC() runtime.GC()
runtime.ReadMemStats(memstats) runtime.ReadMemStats(memstats)
log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) if c.Verbose {
log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
}
} }
// RunIndexer runs forever, indexing. // RunIndexer runs forever, indexing.

56
godoc/index_test.go Normal file
View File

@ -0,0 +1,56 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package godoc
import (
"reflect"
"strings"
"testing"
"code.google.com/p/go.tools/godoc/vfs/mapfs"
)
func TestIndex(t *testing.T) {
c := NewCorpus(mapfs.New(map[string]string{
"src/pkg/foo/foo.go": `// Package foo is an example.
package foo
// Foo is stuff.
type Foo struct{}
func New() *Foo {
return new(Foo)
}
`,
"src/pkg/bar/bar.go": `// Package bar is another example to test races.
package bar
`,
"src/pkg/skip/skip.go": `// Package skip should be skipped.
package skip
func Skip() {}
`,
}))
c.IndexEnabled = true
c.IndexDirectory = func(dir string) bool {
return !strings.Contains(dir, "skip")
}
if err := c.Init(); err != nil {
t.Fatal(err)
}
c.UpdateIndex()
ix, _ := c.CurrentIndex()
if ix == nil {
t.Fatal("no index")
}
t.Logf("Got: %#v", ix)
wantStats := Statistics{Bytes: 179, Files: 2, Lines: 11, Words: 5, Spots: 7}
if !reflect.DeepEqual(ix.Stats(), wantStats) {
t.Errorf("Stats = %#v; want %#v", ix.Stats(), wantStats)
}
if _, ok := ix.words["Skip"]; ok {
t.Errorf("the word Skip was found; expected it to be skipped")
}
}

View File

@ -9,12 +9,17 @@ package mapfs
import ( import (
"io" "io"
"os" "os"
pathpkg "path"
"sort"
"strings" "strings"
"time" "time"
"code.google.com/p/go.tools/godoc/vfs" "code.google.com/p/go.tools/godoc/vfs"
) )
// New returns a new FileSystem from the provided map.
// Map keys should be forward slash-separated pathnames
// and not contain a leading slash.
func New(m map[string]string) vfs.FileSystem { func New(m map[string]string) vfs.FileSystem {
return mapFS(m) return mapFS(m)
} }
@ -27,10 +32,7 @@ func (fs mapFS) String() string { return "mapfs" }
func (fs mapFS) Close() error { return nil } func (fs mapFS) Close() error { return nil }
func filename(p string) string { func filename(p string) string {
if len(p) > 0 && p[0] == '/' { return strings.TrimPrefix(p, "/")
p = p[1:]
}
return p
} }
func (fs mapFS) Open(p string) (vfs.ReadSeekCloser, error) { func (fs mapFS) Open(p string) (vfs.ReadSeekCloser, error) {
@ -41,22 +43,85 @@ func (fs mapFS) Open(p string) (vfs.ReadSeekCloser, error) {
return nopCloser{strings.NewReader(b)}, nil return nopCloser{strings.NewReader(b)}, nil
} }
func fileInfo(name, contents string) os.FileInfo {
return mapFI{name: pathpkg.Base(name), size: len(contents)}
}
func dirInfo(name string) os.FileInfo {
return mapFI{name: pathpkg.Base(name), dir: true}
}
func (fs mapFS) Lstat(p string) (os.FileInfo, error) { func (fs mapFS) Lstat(p string) (os.FileInfo, error) {
b, ok := fs[filename(p)] b, ok := fs[filename(p)]
if !ok { if ok {
return nil, os.ErrNotExist return fileInfo(p, b), nil
} }
return mapFI{name: p, size: int64(len(b))}, nil ents, _ := fs.ReadDir(p)
if len(ents) > 0 {
return dirInfo(p), nil
}
return nil, os.ErrNotExist
} }
func (fs mapFS) Stat(p string) (os.FileInfo, error) { func (fs mapFS) Stat(p string) (os.FileInfo, error) {
return fs.Lstat(p) return fs.Lstat(p)
} }
// slashdir returns path.Dir(p), but special-cases paths not beginning
// with a slash to be in the root.
func slashdir(p string) string {
d := pathpkg.Dir(p)
if d == "." {
return "/"
}
if strings.HasPrefix(p, "/") {
return d
}
return "/" + d
}
func (fs mapFS) ReadDir(p string) ([]os.FileInfo, error) { func (fs mapFS) ReadDir(p string) ([]os.FileInfo, error) {
var list []os.FileInfo p = pathpkg.Clean(p)
var ents []string
fim := make(map[string]os.FileInfo) // base -> fi
for fn, b := range fs { for fn, b := range fs {
list = append(list, mapFI{name: fn, size: int64(len(b))}) dir := slashdir(fn)
isFile := true
var lastBase string
for {
if dir == p {
base := lastBase
if isFile {
base = pathpkg.Base(fn)
}
if fim[base] == nil {
var fi os.FileInfo
if isFile {
fi = fileInfo(fn, b)
} else {
fi = dirInfo(base)
}
ents = append(ents, base)
fim[base] = fi
}
}
if dir == "/" {
break
} else {
isFile = false
lastBase = pathpkg.Base(dir)
dir = pathpkg.Dir(dir)
}
}
}
if len(ents) == 0 {
return nil, os.ErrNotExist
}
sort.Strings(ents)
var list []os.FileInfo
for _, dir := range ents {
list = append(list, fim[dir])
} }
return list, nil return list, nil
} }
@ -64,15 +129,21 @@ func (fs mapFS) ReadDir(p string) ([]os.FileInfo, error) {
// mapFI is the map-based implementation of FileInfo. // mapFI is the map-based implementation of FileInfo.
type mapFI struct { type mapFI struct {
name string name string
size int64 size int
dir bool
} }
func (fi mapFI) IsDir() bool { return false } func (fi mapFI) IsDir() bool { return fi.dir }
func (fi mapFI) ModTime() time.Time { return time.Time{} } func (fi mapFI) ModTime() time.Time { return time.Time{} }
func (fi mapFI) Mode() os.FileMode { return 0444 } func (fi mapFI) Mode() os.FileMode {
func (fi mapFI) Name() string { return fi.name } if fi.IsDir() {
func (fi mapFI) Size() int64 { return fi.size } return 0755 | os.ModeDir
func (fi mapFI) Sys() interface{} { return nil } }
return 0444
}
func (fi mapFI) Name() string { return pathpkg.Base(fi.name) }
func (fi mapFI) Size() int64 { return int64(fi.size) }
func (fi mapFI) Sys() interface{} { return nil }
type nopCloser struct { type nopCloser struct {
io.ReadSeeker io.ReadSeeker

View File

@ -0,0 +1,111 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mapfs
import (
"io/ioutil"
"os"
"reflect"
"testing"
)
func TestOpenRoot(t *testing.T) {
fs := New(map[string]string{
"foo/bar/three.txt": "a",
"foo/bar.txt": "b",
"top.txt": "c",
"other-top.txt": "d",
})
tests := []struct {
path string
want string
}{
{"/foo/bar/three.txt", "a"},
{"foo/bar/three.txt", "a"},
{"foo/bar.txt", "b"},
{"top.txt", "c"},
{"/top.txt", "c"},
{"other-top.txt", "d"},
{"/other-top.txt", "d"},
}
for _, tt := range tests {
rsc, err := fs.Open(tt.path)
if err != nil {
t.Errorf("Open(%q) = %v", tt.path, err)
continue
}
slurp, err := ioutil.ReadAll(rsc)
if err != nil {
t.Error(err)
}
if string(slurp) != tt.want {
t.Errorf("Read(%q) = %q; want %q", tt.path, tt.want, slurp)
}
rsc.Close()
}
_, err := fs.Open("/xxxx")
if !os.IsNotExist(err) {
t.Errorf("ReadDir /xxxx = %v; want os.IsNotExist error", err)
}
}
func TestReaddir(t *testing.T) {
fs := New(map[string]string{
"foo/bar/three.txt": "333",
"foo/bar.txt": "22",
"top.txt": "top.txt file",
"other-top.txt": "other-top.txt file",
})
tests := []struct {
dir string
want []os.FileInfo
}{
{
dir: "/",
want: []os.FileInfo{
mapFI{name: "foo", dir: true},
mapFI{name: "other-top.txt", size: len("other-top.txt file")},
mapFI{name: "top.txt", size: len("top.txt file")},
},
},
{
dir: "/foo",
want: []os.FileInfo{
mapFI{name: "bar", dir: true},
mapFI{name: "bar.txt", size: 2},
},
},
{
dir: "/foo/",
want: []os.FileInfo{
mapFI{name: "bar", dir: true},
mapFI{name: "bar.txt", size: 2},
},
},
{
dir: "/foo/bar",
want: []os.FileInfo{
mapFI{name: "three.txt", size: 3},
},
},
}
for _, tt := range tests {
fis, err := fs.ReadDir(tt.dir)
if err != nil {
t.Errorf("ReadDir(%q) = %v", tt.dir, err)
continue
}
if !reflect.DeepEqual(fis, tt.want) {
t.Errorf("ReadDir(%q) = %#v; want %#v", tt.dir, fis, tt.want)
continue
}
}
_, err := fs.ReadDir("/xxxx")
if !os.IsNotExist(err) {
t.Errorf("ReadDir /xxxx = %v; want os.IsNotExist error", err)
}
}