diff --git a/godoc/corpus.go b/godoc/corpus.go index 106c6b62..ed268947 100644 --- a/godoc/corpus.go +++ b/godoc/corpus.go @@ -32,6 +32,11 @@ type Corpus struct { // order. IndexFiles string + // IndexThrottle specifies the indexing throttle value + // between 0.0 and 1.0. At 0.0, the indexer always sleeps. + // At 1.0, the indexer never sleeps. Because 0.0 is useless + // and redundant with setting IndexEnabled to false, the + // zero value for IndexThrottle means 0.9. IndexThrottle float64 // MaxResults optionally specifies the maximum results for indexing. @@ -50,6 +55,13 @@ type Corpus struct { // package listing. SummarizePackage func(pkg string) (summary string, showList, ok bool) + // IndexDirectory optionally specifies a function to determine + // whether the provided directory should be indexed. The dir + // will be of the form "/src/cmd/6a", "/doc/play", + // "/src/pkg/io", etc. + // If nil, all directories are indexed if indexing is enabled. + IndexDirectory func(dir string) bool + testDir string // TODO(bradfitz,adg): migrate old godoc flag? looks unused. // Send a value on this channel to trigger a metadata refresh. diff --git a/godoc/index.go b/godoc/index.go index 5611489f..da97ce0a 100644 --- a/godoc/index.go +++ b/godoc/index.go @@ -56,10 +56,12 @@ import ( "runtime" "sort" "strings" + "sync" "time" "unicode" "code.google.com/p/go.tools/godoc/util" + "code.google.com/p/go.tools/godoc/vfs" ) // ---------------------------------------------------------------------------- @@ -371,8 +373,11 @@ type Statistics struct { // interface for walking file trees, and the ast.Visitor interface for // walking Go ASTs. type Indexer struct { - c *Corpus - fset *token.FileSet // file set for all indexed files + c *Corpus + fset *token.FileSet // file set for all indexed files + fsOpenGate chan bool // send pre fs.Open; receive on close + + mu sync.Mutex // guards all the following sources bytes.Buffer // concatenated sources packages map[string]*Pak // map of canonicalized *Paks words map[string]*IndexResult // RunLists of Spots @@ -381,6 +386,7 @@ type Indexer struct { file *File // AST for current file decl ast.Decl // AST for current decl stats Statistics + throttle *util.Throttle } func (x *Indexer) lookupPackage(path, name string) *Pak { @@ -535,12 +541,7 @@ func pkgName(filename string) string { // addFile adds a file to the index if possible and returns the file set file // and the file's AST if it was successfully parsed as a Go file. If addFile // failed (that is, if the file was not added), it returns file == nil. -func (x *Indexer) addFile(filename string, goFile bool) (file *token.File, ast *ast.File) { - // open file - f, err := x.c.fs.Open(filename) - if err != nil { - return - } +func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) { defer f.Close() // The file set's base offset and x.sources size must be in lock-step; @@ -641,17 +642,17 @@ func isWhitelisted(filename string) bool { return whitelisted[key] } -func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) { - if f.IsDir() { +func (x *Indexer) visitFile(dirname string, fi os.FileInfo, fulltextIndex bool) { + if fi.IsDir() { return } - filename := pathpkg.Join(dirname, f.Name()) + filename := pathpkg.Join(dirname, fi.Name()) goFile := false switch { - case isGoFile(f): - if !includeTestFiles && (!isPkgFile(f) || strings.HasPrefix(filename, "test/")) { + case isGoFile(fi): + if !includeTestFiles && (!isPkgFile(fi) || strings.HasPrefix(filename, "test/")) { return } if !includeMainPackages && pkgName(filename) == "main" { @@ -659,11 +660,25 @@ func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) { } goFile = true - case !fulltextIndex || !isWhitelisted(f.Name()): + case !fulltextIndex || !isWhitelisted(fi.Name()): return } - file, fast := x.addFile(filename, goFile) + x.fsOpenGate <- true + defer func() { <-x.fsOpenGate }() + + // open file + f, err := x.c.fs.Open(filename) + if err != nil { + return + } + + x.mu.Lock() + defer x.mu.Unlock() + + x.throttle.Throttle() + + file, fast := x.addFile(f, filename, goFile) if file == nil { return // addFile failed } @@ -672,7 +687,7 @@ func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) { // we've got a Go file to index x.current = file pak := x.lookupPackage(dirname, fast.Name.Name) - x.file = &File{f.Name(), pak} + x.file = &File{fi.Name(), pak} ast.Walk(x, fast) } @@ -701,33 +716,59 @@ type Index struct { func canonical(w string) string { return strings.ToLower(w) } +// Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems +// consuming file descriptors, where some systems have low 256 or 512 limits. +// Go should have a built-in way to cap fd usage under the ulimit. +const ( + maxOpenFiles = 200 + maxOpenDirs = 50 +) + // NewIndex creates a new index for the .go files // in the directories given by dirnames. -// +// The throttle parameter specifies a value between 0.0 and 1.0 that controls +// artificial sleeping. If 0.0, the indexer always sleeps. If 1.0, the indexer +// never sleeps. func NewIndex(c *Corpus, dirnames <-chan string, fulltextIndex bool, throttle float64) *Index { - var x Indexer - th := util.NewThrottle(throttle, 100*time.Millisecond) // run at least 0.1s at a time - // initialize Indexer // (use some reasonably sized maps to start) - x.c = c - x.fset = token.NewFileSet() - x.packages = make(map[string]*Pak, 256) - x.words = make(map[string]*IndexResult, 8192) + x := &Indexer{ + c: c, + fset: token.NewFileSet(), + fsOpenGate: make(chan bool, maxOpenFiles), + packages: make(map[string]*Pak, 256), + words: make(map[string]*IndexResult, 8192), + throttle: util.NewThrottle(throttle, 100*time.Millisecond), // run at least 0.1s at a time + } // index all files in the directories given by dirnames + var wg sync.WaitGroup // outstanding ReadDir + visitFile + dirGate := make(chan bool, maxOpenDirs) for dirname := range dirnames { - list, err := c.fs.ReadDir(dirname) - if err != nil { - continue // ignore this directory + if c.IndexDirectory != nil && !c.IndexDirectory(dirname) { + continue } - for _, f := range list { - if !f.IsDir() { - x.visitFile(dirname, f, fulltextIndex) + dirGate <- true + wg.Add(1) + go func(dirname string) { + defer func() { <-dirGate }() + defer wg.Done() + + list, err := c.fs.ReadDir(dirname) + if err != nil { + log.Printf("ReadDir(%q): %v; skipping directory", dirname, err) + return // ignore this directory } - th.Throttle() - } + for _, fi := range list { + wg.Add(1) + go func(fi os.FileInfo) { + defer wg.Done() + x.visitFile(dirname, fi, fulltextIndex) + }(fi) + } + }(dirname) } + wg.Wait() if !fulltextIndex { // the file set, the current file, and the sources are @@ -751,7 +792,7 @@ func NewIndex(c *Corpus, dirnames <-chan string, fulltextIndex bool, throttle fl Others: others, } wlist = append(wlist, &wordPair{canonical(w), w}) - th.Throttle() + x.throttle.Throttle() } x.stats.Words = len(words) @@ -1094,7 +1135,13 @@ func (c *Corpus) UpdateIndex() { log.Printf("updating index...") } start := time.Now() - index := NewIndex(c, c.fsDirnames(), c.MaxResults > 0, c.IndexThrottle) + throttle := c.IndexThrottle + if throttle <= 0 { + throttle = 0.9 + } else if throttle > 1.0 { + throttle = 1.0 + } + index := NewIndex(c, c.fsDirnames(), c.MaxResults > 0, throttle) stop := time.Now() c.searchIndex.Set(index) if c.Verbose { @@ -1105,10 +1152,14 @@ func (c *Corpus) UpdateIndex() { } memstats := new(runtime.MemStats) runtime.ReadMemStats(memstats) - log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) + if c.Verbose { + log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) + } runtime.GC() runtime.ReadMemStats(memstats) - log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) + if c.Verbose { + log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) + } } // RunIndexer runs forever, indexing. diff --git a/godoc/index_test.go b/godoc/index_test.go new file mode 100644 index 00000000..001d2a77 --- /dev/null +++ b/godoc/index_test.go @@ -0,0 +1,56 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package godoc + +import ( + "reflect" + "strings" + "testing" + + "code.google.com/p/go.tools/godoc/vfs/mapfs" +) + +func TestIndex(t *testing.T) { + c := NewCorpus(mapfs.New(map[string]string{ + "src/pkg/foo/foo.go": `// Package foo is an example. +package foo + +// Foo is stuff. +type Foo struct{} + +func New() *Foo { + return new(Foo) +} +`, + "src/pkg/bar/bar.go": `// Package bar is another example to test races. +package bar +`, + "src/pkg/skip/skip.go": `// Package skip should be skipped. +package skip +func Skip() {} +`, + })) + c.IndexEnabled = true + c.IndexDirectory = func(dir string) bool { + return !strings.Contains(dir, "skip") + } + + if err := c.Init(); err != nil { + t.Fatal(err) + } + c.UpdateIndex() + ix, _ := c.CurrentIndex() + if ix == nil { + t.Fatal("no index") + } + t.Logf("Got: %#v", ix) + wantStats := Statistics{Bytes: 179, Files: 2, Lines: 11, Words: 5, Spots: 7} + if !reflect.DeepEqual(ix.Stats(), wantStats) { + t.Errorf("Stats = %#v; want %#v", ix.Stats(), wantStats) + } + if _, ok := ix.words["Skip"]; ok { + t.Errorf("the word Skip was found; expected it to be skipped") + } +} diff --git a/godoc/vfs/mapfs/mapfs.go b/godoc/vfs/mapfs/mapfs.go index 5dd20fdd..550c4baf 100644 --- a/godoc/vfs/mapfs/mapfs.go +++ b/godoc/vfs/mapfs/mapfs.go @@ -9,12 +9,17 @@ package mapfs import ( "io" "os" + pathpkg "path" + "sort" "strings" "time" "code.google.com/p/go.tools/godoc/vfs" ) +// New returns a new FileSystem from the provided map. +// Map keys should be forward slash-separated pathnames +// and not contain a leading slash. func New(m map[string]string) vfs.FileSystem { return mapFS(m) } @@ -27,10 +32,7 @@ func (fs mapFS) String() string { return "mapfs" } func (fs mapFS) Close() error { return nil } func filename(p string) string { - if len(p) > 0 && p[0] == '/' { - p = p[1:] - } - return p + return strings.TrimPrefix(p, "/") } func (fs mapFS) Open(p string) (vfs.ReadSeekCloser, error) { @@ -41,22 +43,85 @@ func (fs mapFS) Open(p string) (vfs.ReadSeekCloser, error) { return nopCloser{strings.NewReader(b)}, nil } +func fileInfo(name, contents string) os.FileInfo { + return mapFI{name: pathpkg.Base(name), size: len(contents)} +} + +func dirInfo(name string) os.FileInfo { + return mapFI{name: pathpkg.Base(name), dir: true} +} + func (fs mapFS) Lstat(p string) (os.FileInfo, error) { b, ok := fs[filename(p)] - if !ok { - return nil, os.ErrNotExist + if ok { + return fileInfo(p, b), nil } - return mapFI{name: p, size: int64(len(b))}, nil + ents, _ := fs.ReadDir(p) + if len(ents) > 0 { + return dirInfo(p), nil + } + return nil, os.ErrNotExist } func (fs mapFS) Stat(p string) (os.FileInfo, error) { return fs.Lstat(p) } +// slashdir returns path.Dir(p), but special-cases paths not beginning +// with a slash to be in the root. +func slashdir(p string) string { + d := pathpkg.Dir(p) + if d == "." { + return "/" + } + if strings.HasPrefix(p, "/") { + return d + } + return "/" + d +} + func (fs mapFS) ReadDir(p string) ([]os.FileInfo, error) { - var list []os.FileInfo + p = pathpkg.Clean(p) + var ents []string + fim := make(map[string]os.FileInfo) // base -> fi for fn, b := range fs { - list = append(list, mapFI{name: fn, size: int64(len(b))}) + dir := slashdir(fn) + isFile := true + var lastBase string + for { + if dir == p { + base := lastBase + if isFile { + base = pathpkg.Base(fn) + } + if fim[base] == nil { + var fi os.FileInfo + if isFile { + fi = fileInfo(fn, b) + } else { + fi = dirInfo(base) + } + ents = append(ents, base) + fim[base] = fi + } + } + if dir == "/" { + break + } else { + isFile = false + lastBase = pathpkg.Base(dir) + dir = pathpkg.Dir(dir) + } + } + } + if len(ents) == 0 { + return nil, os.ErrNotExist + } + + sort.Strings(ents) + var list []os.FileInfo + for _, dir := range ents { + list = append(list, fim[dir]) } return list, nil } @@ -64,15 +129,21 @@ func (fs mapFS) ReadDir(p string) ([]os.FileInfo, error) { // mapFI is the map-based implementation of FileInfo. type mapFI struct { name string - size int64 + size int + dir bool } -func (fi mapFI) IsDir() bool { return false } +func (fi mapFI) IsDir() bool { return fi.dir } func (fi mapFI) ModTime() time.Time { return time.Time{} } -func (fi mapFI) Mode() os.FileMode { return 0444 } -func (fi mapFI) Name() string { return fi.name } -func (fi mapFI) Size() int64 { return fi.size } -func (fi mapFI) Sys() interface{} { return nil } +func (fi mapFI) Mode() os.FileMode { + if fi.IsDir() { + return 0755 | os.ModeDir + } + return 0444 +} +func (fi mapFI) Name() string { return pathpkg.Base(fi.name) } +func (fi mapFI) Size() int64 { return int64(fi.size) } +func (fi mapFI) Sys() interface{} { return nil } type nopCloser struct { io.ReadSeeker diff --git a/godoc/vfs/mapfs/mapfs_test.go b/godoc/vfs/mapfs/mapfs_test.go new file mode 100644 index 00000000..6b7db290 --- /dev/null +++ b/godoc/vfs/mapfs/mapfs_test.go @@ -0,0 +1,111 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mapfs + +import ( + "io/ioutil" + "os" + "reflect" + "testing" +) + +func TestOpenRoot(t *testing.T) { + fs := New(map[string]string{ + "foo/bar/three.txt": "a", + "foo/bar.txt": "b", + "top.txt": "c", + "other-top.txt": "d", + }) + tests := []struct { + path string + want string + }{ + {"/foo/bar/three.txt", "a"}, + {"foo/bar/three.txt", "a"}, + {"foo/bar.txt", "b"}, + {"top.txt", "c"}, + {"/top.txt", "c"}, + {"other-top.txt", "d"}, + {"/other-top.txt", "d"}, + } + for _, tt := range tests { + rsc, err := fs.Open(tt.path) + if err != nil { + t.Errorf("Open(%q) = %v", tt.path, err) + continue + } + slurp, err := ioutil.ReadAll(rsc) + if err != nil { + t.Error(err) + } + if string(slurp) != tt.want { + t.Errorf("Read(%q) = %q; want %q", tt.path, tt.want, slurp) + } + rsc.Close() + } + + _, err := fs.Open("/xxxx") + if !os.IsNotExist(err) { + t.Errorf("ReadDir /xxxx = %v; want os.IsNotExist error", err) + } +} + +func TestReaddir(t *testing.T) { + fs := New(map[string]string{ + "foo/bar/three.txt": "333", + "foo/bar.txt": "22", + "top.txt": "top.txt file", + "other-top.txt": "other-top.txt file", + }) + tests := []struct { + dir string + want []os.FileInfo + }{ + { + dir: "/", + want: []os.FileInfo{ + mapFI{name: "foo", dir: true}, + mapFI{name: "other-top.txt", size: len("other-top.txt file")}, + mapFI{name: "top.txt", size: len("top.txt file")}, + }, + }, + { + dir: "/foo", + want: []os.FileInfo{ + mapFI{name: "bar", dir: true}, + mapFI{name: "bar.txt", size: 2}, + }, + }, + { + dir: "/foo/", + want: []os.FileInfo{ + mapFI{name: "bar", dir: true}, + mapFI{name: "bar.txt", size: 2}, + }, + }, + { + dir: "/foo/bar", + want: []os.FileInfo{ + mapFI{name: "three.txt", size: 3}, + }, + }, + } + for _, tt := range tests { + fis, err := fs.ReadDir(tt.dir) + if err != nil { + t.Errorf("ReadDir(%q) = %v", tt.dir, err) + continue + } + if !reflect.DeepEqual(fis, tt.want) { + t.Errorf("ReadDir(%q) = %#v; want %#v", tt.dir, fis, tt.want) + continue + } + } + + _, err := fs.ReadDir("/xxxx") + if !os.IsNotExist(err) { + t.Errorf("ReadDir /xxxx = %v; want os.IsNotExist error", err) + } +}