1591 lines
		
	
	
		
			42 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			1591 lines
		
	
	
		
			42 KiB
		
	
	
	
		
			Go
		
	
	
	
| // Copyright 2009 The Go Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| // This file contains the infrastructure to create an
 | |
| // identifier and full-text index for a set of Go files.
 | |
| //
 | |
| // Algorithm for identifier index:
 | |
| // - traverse all .go files of the file tree specified by root
 | |
| // - for each identifier (word) encountered, collect all occurrences (spots)
 | |
| //   into a list; this produces a list of spots for each word
 | |
| // - reduce the lists: from a list of spots to a list of FileRuns,
 | |
| //   and from a list of FileRuns into a list of PakRuns
 | |
| // - make a HitList from the PakRuns
 | |
| //
 | |
| // Details:
 | |
| // - keep two lists per word: one containing package-level declarations
 | |
| //   that have snippets, and one containing all other spots
 | |
| // - keep the snippets in a separate table indexed by snippet index
 | |
| //   and store the snippet index in place of the line number in a SpotInfo
 | |
| //   (the line number for spots with snippets is stored in the snippet)
 | |
| // - at the end, create lists of alternative spellings for a given
 | |
| //   word
 | |
| //
 | |
| // Algorithm for full text index:
 | |
| // - concatenate all source code in a byte buffer (in memory)
 | |
| // - add the files to a file set in lockstep as they are added to the byte
 | |
| //   buffer such that a byte buffer offset corresponds to the Pos value for
 | |
| //   that file location
 | |
| // - create a suffix array from the concatenated sources
 | |
| //
 | |
| // String lookup in full text index:
 | |
| // - use the suffix array to lookup a string's offsets - the offsets
 | |
| //   correspond to the Pos values relative to the file set
 | |
| // - translate the Pos values back into file and line information and
 | |
| //   sort the result
 | |
| 
 | |
| package godoc
 | |
| 
 | |
| import (
 | |
| 	"bufio"
 | |
| 	"bytes"
 | |
| 	"encoding/gob"
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"go/ast"
 | |
| 	"go/doc"
 | |
| 	"go/parser"
 | |
| 	"go/token"
 | |
| 	"index/suffixarray"
 | |
| 	"io"
 | |
| 	"log"
 | |
| 	"os"
 | |
| 	pathpkg "path"
 | |
| 	"path/filepath"
 | |
| 	"regexp"
 | |
| 	"runtime"
 | |
| 	"sort"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"sync"
 | |
| 	"time"
 | |
| 	"unicode"
 | |
| 
 | |
| 	"golang.org/x/tools/godoc/util"
 | |
| 	"golang.org/x/tools/godoc/vfs"
 | |
| )
 | |
| 
 | |
| // ----------------------------------------------------------------------------
 | |
| // InterfaceSlice is a helper type for sorting interface
 | |
| // slices according to some slice-specific sort criteria.
 | |
| 
 | |
| type comparer func(x, y interface{}) bool
 | |
| 
 | |
| type interfaceSlice struct {
 | |
| 	slice []interface{}
 | |
| 	less  comparer
 | |
| }
 | |
| 
 | |
| // ----------------------------------------------------------------------------
 | |
| // RunList
 | |
| 
 | |
| // A RunList is a list of entries that can be sorted according to some
 | |
| // criteria. A RunList may be compressed by grouping "runs" of entries
 | |
| // which are equal (according to the sort critera) into a new RunList of
 | |
| // runs. For instance, a RunList containing pairs (x, y) may be compressed
 | |
| // into a RunList containing pair runs (x, {y}) where each run consists of
 | |
| // a list of y's with the same x.
 | |
| type RunList []interface{}
 | |
| 
 | |
| func (h RunList) sort(less comparer) {
 | |
| 	sort.Sort(&interfaceSlice{h, less})
 | |
| }
 | |
| 
 | |
| func (p *interfaceSlice) Len() int           { return len(p.slice) }
 | |
| func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) }
 | |
| func (p *interfaceSlice) Swap(i, j int)      { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] }
 | |
| 
 | |
| // Compress entries which are the same according to a sort criteria
 | |
| // (specified by less) into "runs".
 | |
| func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList {
 | |
| 	if len(h) == 0 {
 | |
| 		return nil
 | |
| 	}
 | |
| 	// len(h) > 0
 | |
| 
 | |
| 	// create runs of entries with equal values
 | |
| 	h.sort(less)
 | |
| 
 | |
| 	// for each run, make a new run object and collect them in a new RunList
 | |
| 	var hh RunList
 | |
| 	i, x := 0, h[0]
 | |
| 	for j, y := range h {
 | |
| 		if less(x, y) {
 | |
| 			hh = append(hh, newRun(h[i:j]))
 | |
| 			i, x = j, h[j] // start a new run
 | |
| 		}
 | |
| 	}
 | |
| 	// add final run, if any
 | |
| 	if i < len(h) {
 | |
| 		hh = append(hh, newRun(h[i:]))
 | |
| 	}
 | |
| 
 | |
| 	return hh
 | |
| }
 | |
| 
 | |
| // ----------------------------------------------------------------------------
 | |
| // KindRun
 | |
| 
 | |
| // Debugging support. Disable to see multiple entries per line.
 | |
| const removeDuplicates = true
 | |
| 
 | |
| // A KindRun is a run of SpotInfos of the same kind in a given file.
 | |
| // The kind (3 bits) is stored in each SpotInfo element; to find the
 | |
| // kind of a KindRun, look at any of its elements.
 | |
| type KindRun []SpotInfo
 | |
| 
 | |
| // KindRuns are sorted by line number or index. Since the isIndex bit
 | |
| // is always the same for all infos in one list we can compare lori's.
 | |
| func (k KindRun) Len() int           { return len(k) }
 | |
| func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() }
 | |
| func (k KindRun) Swap(i, j int)      { k[i], k[j] = k[j], k[i] }
 | |
| 
 | |
| // FileRun contents are sorted by Kind for the reduction into KindRuns.
 | |
| func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() }
 | |
| 
 | |
| // newKindRun allocates a new KindRun from the SpotInfo run h.
 | |
| func newKindRun(h RunList) interface{} {
 | |
| 	run := make(KindRun, len(h))
 | |
| 	for i, x := range h {
 | |
| 		run[i] = x.(SpotInfo)
 | |
| 	}
 | |
| 
 | |
| 	// Spots were sorted by file and kind to create this run.
 | |
| 	// Within this run, sort them by line number or index.
 | |
| 	sort.Sort(run)
 | |
| 
 | |
| 	if removeDuplicates {
 | |
| 		// Since both the lori and kind field must be
 | |
| 		// same for duplicates, and since the isIndex
 | |
| 		// bit is always the same for all infos in one
 | |
| 		// list we can simply compare the entire info.
 | |
| 		k := 0
 | |
| 		prev := SpotInfo(1<<32 - 1) // an unlikely value
 | |
| 		for _, x := range run {
 | |
| 			if x != prev {
 | |
| 				run[k] = x
 | |
| 				k++
 | |
| 				prev = x
 | |
| 			}
 | |
| 		}
 | |
| 		run = run[0:k]
 | |
| 	}
 | |
| 
 | |
| 	return run
 | |
| }
 | |
| 
 | |
| // ----------------------------------------------------------------------------
 | |
| // FileRun
 | |
| 
 | |
| // A Pak describes a Go package.
 | |
| type Pak struct {
 | |
| 	Path string // path of directory containing the package
 | |
| 	Name string // package name as declared by package clause
 | |
| }
 | |
| 
 | |
| // Paks are sorted by name (primary key) and by import path (secondary key).
 | |
| func (p *Pak) less(q *Pak) bool {
 | |
| 	return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path
 | |
| }
 | |
| 
 | |
| // A File describes a Go file.
 | |
| type File struct {
 | |
| 	Name string // directory-local file name
 | |
| 	Pak  *Pak   // the package to which the file belongs
 | |
| }
 | |
| 
 | |
| // Path returns the file path of f.
 | |
| func (f *File) Path() string {
 | |
| 	return pathpkg.Join(f.Pak.Path, f.Name)
 | |
| }
 | |
| 
 | |
| // A Spot describes a single occurrence of a word.
 | |
| type Spot struct {
 | |
| 	File *File
 | |
| 	Info SpotInfo
 | |
| }
 | |
| 
 | |
| // A FileRun is a list of KindRuns belonging to the same file.
 | |
| type FileRun struct {
 | |
| 	File   *File
 | |
| 	Groups []KindRun
 | |
| }
 | |
| 
 | |
| // Spots are sorted by file path for the reduction into FileRuns.
 | |
| func lessSpot(x, y interface{}) bool {
 | |
| 	fx := x.(Spot).File
 | |
| 	fy := y.(Spot).File
 | |
| 	// same as "return fx.Path() < fy.Path()" but w/o computing the file path first
 | |
| 	px := fx.Pak.Path
 | |
| 	py := fy.Pak.Path
 | |
| 	return px < py || px == py && fx.Name < fy.Name
 | |
| }
 | |
| 
 | |
| // newFileRun allocates a new FileRun from the Spot run h.
 | |
| func newFileRun(h RunList) interface{} {
 | |
| 	file := h[0].(Spot).File
 | |
| 
 | |
| 	// reduce the list of Spots into a list of KindRuns
 | |
| 	h1 := make(RunList, len(h))
 | |
| 	for i, x := range h {
 | |
| 		h1[i] = x.(Spot).Info
 | |
| 	}
 | |
| 	h2 := h1.reduce(lessKind, newKindRun)
 | |
| 
 | |
| 	// create the FileRun
 | |
| 	groups := make([]KindRun, len(h2))
 | |
| 	for i, x := range h2 {
 | |
| 		groups[i] = x.(KindRun)
 | |
| 	}
 | |
| 	return &FileRun{file, groups}
 | |
| }
 | |
| 
 | |
| // ----------------------------------------------------------------------------
 | |
| // PakRun
 | |
| 
 | |
| // A PakRun describes a run of *FileRuns of a package.
 | |
| type PakRun struct {
 | |
| 	Pak   *Pak
 | |
| 	Files []*FileRun
 | |
| }
 | |
| 
 | |
| // Sorting support for files within a PakRun.
 | |
| func (p *PakRun) Len() int           { return len(p.Files) }
 | |
| func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
 | |
| func (p *PakRun) Swap(i, j int)      { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }
 | |
| 
 | |
| // FileRuns are sorted by package for the reduction into PakRuns.
 | |
| func lessFileRun(x, y interface{}) bool {
 | |
| 	return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak)
 | |
| }
 | |
| 
 | |
| // newPakRun allocates a new PakRun from the *FileRun run h.
 | |
| func newPakRun(h RunList) interface{} {
 | |
| 	pak := h[0].(*FileRun).File.Pak
 | |
| 	files := make([]*FileRun, len(h))
 | |
| 	for i, x := range h {
 | |
| 		files[i] = x.(*FileRun)
 | |
| 	}
 | |
| 	run := &PakRun{pak, files}
 | |
| 	sort.Sort(run) // files were sorted by package; sort them by file now
 | |
| 	return run
 | |
| }
 | |
| 
 | |
| // ----------------------------------------------------------------------------
 | |
| // HitList
 | |
| 
 | |
| // A HitList describes a list of PakRuns.
 | |
| type HitList []*PakRun
 | |
| 
 | |
| // PakRuns are sorted by package.
 | |
| func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) }
 | |
| 
 | |
| func reduce(h0 RunList) HitList {
 | |
| 	// reduce a list of Spots into a list of FileRuns
 | |
| 	h1 := h0.reduce(lessSpot, newFileRun)
 | |
| 	// reduce a list of FileRuns into a list of PakRuns
 | |
| 	h2 := h1.reduce(lessFileRun, newPakRun)
 | |
| 	// sort the list of PakRuns by package
 | |
| 	h2.sort(lessPakRun)
 | |
| 	// create a HitList
 | |
| 	h := make(HitList, len(h2))
 | |
| 	for i, p := range h2 {
 | |
| 		h[i] = p.(*PakRun)
 | |
| 	}
 | |
| 	return h
 | |
| }
 | |
| 
 | |
| // filter returns a new HitList created by filtering
 | |
| // all PakRuns from h that have a matching pakname.
 | |
| func (h HitList) filter(pakname string) HitList {
 | |
| 	var hh HitList
 | |
| 	for _, p := range h {
 | |
| 		if p.Pak.Name == pakname {
 | |
| 			hh = append(hh, p)
 | |
| 		}
 | |
| 	}
 | |
| 	return hh
 | |
| }
 | |
| 
 | |
| // ----------------------------------------------------------------------------
 | |
| // AltWords
 | |
| 
 | |
| type wordPair struct {
 | |
| 	canon string // canonical word spelling (all lowercase)
 | |
| 	alt   string // alternative spelling
 | |
| }
 | |
| 
 | |
| // An AltWords describes a list of alternative spellings for a
 | |
| // canonical (all lowercase) spelling of a word.
 | |
| type AltWords struct {
 | |
| 	Canon string   // canonical word spelling (all lowercase)
 | |
| 	Alts  []string // alternative spelling for the same word
 | |
| }
 | |
| 
 | |
| // wordPairs are sorted by their canonical spelling.
 | |
| func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon }
 | |
| 
 | |
| // newAltWords allocates a new AltWords from the *wordPair run h.
 | |
| func newAltWords(h RunList) interface{} {
 | |
| 	canon := h[0].(*wordPair).canon
 | |
| 	alts := make([]string, len(h))
 | |
| 	for i, x := range h {
 | |
| 		alts[i] = x.(*wordPair).alt
 | |
| 	}
 | |
| 	return &AltWords{canon, alts}
 | |
| }
 | |
| 
 | |
| func (a *AltWords) filter(s string) *AltWords {
 | |
| 	var alts []string
 | |
| 	for _, w := range a.Alts {
 | |
| 		if w != s {
 | |
| 			alts = append(alts, w)
 | |
| 		}
 | |
| 	}
 | |
| 	if len(alts) > 0 {
 | |
| 		return &AltWords{a.Canon, alts}
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Ident stores information about external identifiers in order to create
 | |
| // links to package documentation.
 | |
| type Ident struct {
 | |
| 	Path    string // e.g. "net/http"
 | |
| 	Package string // e.g. "http"
 | |
| 	Name    string // e.g. "NewRequest"
 | |
| 	Doc     string // e.g. "NewRequest returns a new Request..."
 | |
| }
 | |
| 
 | |
| // byImportCount sorts the given slice of Idents by the import
 | |
| // counts of the packages to which they belong.
 | |
| type byImportCount struct {
 | |
| 	Idents      []Ident
 | |
| 	ImportCount map[string]int
 | |
| }
 | |
| 
 | |
| func (ic byImportCount) Len() int {
 | |
| 	return len(ic.Idents)
 | |
| }
 | |
| 
 | |
| func (ic byImportCount) Less(i, j int) bool {
 | |
| 	ri := ic.ImportCount[ic.Idents[i].Path]
 | |
| 	rj := ic.ImportCount[ic.Idents[j].Path]
 | |
| 	if ri == rj {
 | |
| 		return ic.Idents[i].Path < ic.Idents[j].Path
 | |
| 	}
 | |
| 	return ri > rj
 | |
| }
 | |
| 
 | |
| func (ic byImportCount) Swap(i, j int) {
 | |
| 	ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i]
 | |
| }
 | |
| 
 | |
| func (ic byImportCount) String() string {
 | |
| 	buf := bytes.NewBuffer([]byte("["))
 | |
| 	for _, v := range ic.Idents {
 | |
| 		buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path]))
 | |
| 	}
 | |
| 	buf.WriteString("\n]")
 | |
| 	return buf.String()
 | |
| }
 | |
| 
 | |
| // filter creates a new Ident list where the results match the given
 | |
| // package name.
 | |
| func (ic byImportCount) filter(pakname string) []Ident {
 | |
| 	if ic.Idents == nil {
 | |
| 		return nil
 | |
| 	}
 | |
| 	var res []Ident
 | |
| 	for _, i := range ic.Idents {
 | |
| 		if i.Package == pakname {
 | |
| 			res = append(res, i)
 | |
| 		}
 | |
| 	}
 | |
| 	return res
 | |
| }
 | |
| 
 | |
| // top returns the top n identifiers.
 | |
| func (ic byImportCount) top(n int) []Ident {
 | |
| 	if len(ic.Idents) > n {
 | |
| 		return ic.Idents[:n]
 | |
| 	}
 | |
| 	return ic.Idents
 | |
| }
 | |
| 
 | |
| // ----------------------------------------------------------------------------
 | |
| // Indexer
 | |
| 
 | |
| type IndexResult struct {
 | |
| 	Decls  RunList // package-level declarations (with snippets)
 | |
| 	Others RunList // all other occurrences
 | |
| }
 | |
| 
 | |
| // Statistics provides statistics information for an index.
 | |
| type Statistics struct {
 | |
| 	Bytes int // total size of indexed source files
 | |
| 	Files int // number of indexed source files
 | |
| 	Lines int // number of lines (all files)
 | |
| 	Words int // number of different identifiers
 | |
| 	Spots int // number of identifier occurrences
 | |
| }
 | |
| 
 | |
| // An Indexer maintains the data structures and provides the machinery
 | |
| // for indexing .go files under a file tree. It implements the path.Visitor
 | |
| // interface for walking file trees, and the ast.Visitor interface for
 | |
| // walking Go ASTs.
 | |
| type Indexer struct {
 | |
| 	c          *Corpus
 | |
| 	fset       *token.FileSet // file set for all indexed files
 | |
| 	fsOpenGate chan bool      // send pre fs.Open; receive on close
 | |
| 
 | |
| 	mu            sync.Mutex              // guards all the following
 | |
| 	sources       bytes.Buffer            // concatenated sources
 | |
| 	strings       map[string]string       // interned string
 | |
| 	packages      map[Pak]*Pak            // interned *Paks
 | |
| 	words         map[string]*IndexResult // RunLists of Spots
 | |
| 	snippets      []*Snippet              // indices are stored in SpotInfos
 | |
| 	current       *token.File             // last file added to file set
 | |
| 	file          *File                   // AST for current file
 | |
| 	decl          ast.Decl                // AST for current decl
 | |
| 	stats         Statistics
 | |
| 	throttle      *util.Throttle
 | |
| 	importCount   map[string]int                 // package path ("net/http") => count
 | |
| 	packagePath   map[string]map[string]bool     // "template" => "text/template" => true
 | |
| 	exports       map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
 | |
| 	curPkgExports map[string]SpotKind
 | |
| 	idents        map[SpotKind]map[string][]Ident // kind => name => list of Idents
 | |
| }
 | |
| 
 | |
| func (x *Indexer) intern(s string) string {
 | |
| 	if s, ok := x.strings[s]; ok {
 | |
| 		return s
 | |
| 	}
 | |
| 	x.strings[s] = s
 | |
| 	return s
 | |
| }
 | |
| 
 | |
| func (x *Indexer) lookupPackage(path, name string) *Pak {
 | |
| 	// In the source directory tree, more than one package may
 | |
| 	// live in the same directory. For the packages map, construct
 | |
| 	// a key that includes both the directory path and the package
 | |
| 	// name.
 | |
| 	key := Pak{Path: x.intern(path), Name: x.intern(name)}
 | |
| 	pak := x.packages[key]
 | |
| 	if pak == nil {
 | |
| 		pak = &key
 | |
| 		x.packages[key] = pak
 | |
| 	}
 | |
| 	return pak
 | |
| }
 | |
| 
 | |
| func (x *Indexer) addSnippet(s *Snippet) int {
 | |
| 	index := len(x.snippets)
 | |
| 	x.snippets = append(x.snippets, s)
 | |
| 	return index
 | |
| }
 | |
| 
 | |
| func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
 | |
| 	if id == nil {
 | |
| 		return
 | |
| 	}
 | |
| 	name := x.intern(id.Name)
 | |
| 
 | |
| 	switch kind {
 | |
| 	case TypeDecl, FuncDecl, ConstDecl, VarDecl:
 | |
| 		x.curPkgExports[name] = kind
 | |
| 	}
 | |
| 
 | |
| 	lists, found := x.words[name]
 | |
| 	if !found {
 | |
| 		lists = new(IndexResult)
 | |
| 		x.words[name] = lists
 | |
| 	}
 | |
| 
 | |
| 	if kind == Use || x.decl == nil {
 | |
| 		if x.c.IndexGoCode {
 | |
| 			// not a declaration or no snippet required
 | |
| 			info := makeSpotInfo(kind, x.current.Line(id.Pos()), false)
 | |
| 			lists.Others = append(lists.Others, Spot{x.file, info})
 | |
| 		}
 | |
| 	} else {
 | |
| 		// a declaration with snippet
 | |
| 		index := x.addSnippet(NewSnippet(x.fset, x.decl, id))
 | |
| 		info := makeSpotInfo(kind, index, true)
 | |
| 		lists.Decls = append(lists.Decls, Spot{x.file, info})
 | |
| 	}
 | |
| 
 | |
| 	x.stats.Spots++
 | |
| }
 | |
| 
 | |
| func (x *Indexer) visitFieldList(kind SpotKind, flist *ast.FieldList) {
 | |
| 	for _, f := range flist.List {
 | |
| 		x.decl = nil // no snippets for fields
 | |
| 		for _, name := range f.Names {
 | |
| 			x.visitIdent(kind, name)
 | |
| 		}
 | |
| 		ast.Walk(x, f.Type)
 | |
| 		// ignore tag - not indexed at the moment
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) {
 | |
| 	switch n := spec.(type) {
 | |
| 	case *ast.ImportSpec:
 | |
| 		x.visitIdent(ImportDecl, n.Name)
 | |
| 		if n.Path != nil {
 | |
| 			if imp, err := strconv.Unquote(n.Path.Value); err == nil {
 | |
| 				x.importCount[x.intern(imp)]++
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 	case *ast.ValueSpec:
 | |
| 		for _, n := range n.Names {
 | |
| 			x.visitIdent(kind, n)
 | |
| 		}
 | |
| 		ast.Walk(x, n.Type)
 | |
| 		for _, v := range n.Values {
 | |
| 			ast.Walk(x, v)
 | |
| 		}
 | |
| 
 | |
| 	case *ast.TypeSpec:
 | |
| 		x.visitIdent(TypeDecl, n.Name)
 | |
| 		ast.Walk(x, n.Type)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (x *Indexer) visitGenDecl(decl *ast.GenDecl) {
 | |
| 	kind := VarDecl
 | |
| 	if decl.Tok == token.CONST {
 | |
| 		kind = ConstDecl
 | |
| 	}
 | |
| 	x.decl = decl
 | |
| 	for _, s := range decl.Specs {
 | |
| 		x.visitSpec(kind, s)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (x *Indexer) Visit(node ast.Node) ast.Visitor {
 | |
| 	switch n := node.(type) {
 | |
| 	case nil:
 | |
| 		// nothing to do
 | |
| 
 | |
| 	case *ast.Ident:
 | |
| 		x.visitIdent(Use, n)
 | |
| 
 | |
| 	case *ast.FieldList:
 | |
| 		x.visitFieldList(VarDecl, n)
 | |
| 
 | |
| 	case *ast.InterfaceType:
 | |
| 		x.visitFieldList(MethodDecl, n.Methods)
 | |
| 
 | |
| 	case *ast.DeclStmt:
 | |
| 		// local declarations should only be *ast.GenDecls;
 | |
| 		// ignore incorrect ASTs
 | |
| 		if decl, ok := n.Decl.(*ast.GenDecl); ok {
 | |
| 			x.decl = nil // no snippets for local declarations
 | |
| 			x.visitGenDecl(decl)
 | |
| 		}
 | |
| 
 | |
| 	case *ast.GenDecl:
 | |
| 		x.decl = n
 | |
| 		x.visitGenDecl(n)
 | |
| 
 | |
| 	case *ast.FuncDecl:
 | |
| 		kind := FuncDecl
 | |
| 		if n.Recv != nil {
 | |
| 			kind = MethodDecl
 | |
| 			ast.Walk(x, n.Recv)
 | |
| 		}
 | |
| 		x.decl = n
 | |
| 		x.visitIdent(kind, n.Name)
 | |
| 		ast.Walk(x, n.Type)
 | |
| 		if n.Body != nil {
 | |
| 			ast.Walk(x, n.Body)
 | |
| 		}
 | |
| 
 | |
| 	case *ast.File:
 | |
| 		x.decl = nil
 | |
| 		x.visitIdent(PackageClause, n.Name)
 | |
| 		for _, d := range n.Decls {
 | |
| 			ast.Walk(x, d)
 | |
| 		}
 | |
| 
 | |
| 	default:
 | |
| 		return x
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // addFile adds a file to the index if possible and returns the file set file
 | |
| // and the file's AST if it was successfully parsed as a Go file. If addFile
 | |
| // failed (that is, if the file was not added), it returns file == nil.
 | |
| func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) {
 | |
| 	defer f.Close()
 | |
| 
 | |
| 	// The file set's base offset and x.sources size must be in lock-step;
 | |
| 	// this permits the direct mapping of suffix array lookup results to
 | |
| 	// to corresponding Pos values.
 | |
| 	//
 | |
| 	// When a file is added to the file set, its offset base increases by
 | |
| 	// the size of the file + 1; and the initial base offset is 1. Add an
 | |
| 	// extra byte to the sources here.
 | |
| 	x.sources.WriteByte(0)
 | |
| 
 | |
| 	// If the sources length doesn't match the file set base at this point
 | |
| 	// the file set implementation changed or we have another error.
 | |
| 	base := x.fset.Base()
 | |
| 	if x.sources.Len() != base {
 | |
| 		panic("internal error: file base incorrect")
 | |
| 	}
 | |
| 
 | |
| 	// append file contents (src) to x.sources
 | |
| 	if _, err := x.sources.ReadFrom(f); err == nil {
 | |
| 		src := x.sources.Bytes()[base:]
 | |
| 
 | |
| 		if goFile {
 | |
| 			// parse the file and in the process add it to the file set
 | |
| 			if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil {
 | |
| 				file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file
 | |
| 				return
 | |
| 			}
 | |
| 			// file has parse errors, and the AST may be incorrect -
 | |
| 			// set lines information explicitly and index as ordinary
 | |
| 			// text file (cannot fall through to the text case below
 | |
| 			// because the file has already been added to the file set
 | |
| 			// by the parser)
 | |
| 			file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file
 | |
| 			file.SetLinesForContent(src)
 | |
| 			ast = nil
 | |
| 			return
 | |
| 		}
 | |
| 
 | |
| 		if util.IsText(src) {
 | |
| 			// only add the file to the file set (for the full text index)
 | |
| 			file = x.fset.AddFile(filename, x.fset.Base(), len(src))
 | |
| 			file.SetLinesForContent(src)
 | |
| 			return
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// discard possibly added data
 | |
| 	x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added
 | |
| 	return
 | |
| }
 | |
| 
 | |
| // Design note: Using an explicit white list of permitted files for indexing
 | |
| // makes sure that the important files are included and massively reduces the
 | |
| // number of files to index. The advantage over a blacklist is that unexpected
 | |
| // (non-blacklisted) files won't suddenly explode the index.
 | |
| 
 | |
| // Files are whitelisted if they have a file name or extension
 | |
| // present as key in whitelisted.
 | |
| var whitelisted = map[string]bool{
 | |
| 	".bash":        true,
 | |
| 	".c":           true,
 | |
| 	".cc":          true,
 | |
| 	".cpp":         true,
 | |
| 	".cxx":         true,
 | |
| 	".css":         true,
 | |
| 	".go":          true,
 | |
| 	".goc":         true,
 | |
| 	".h":           true,
 | |
| 	".hh":          true,
 | |
| 	".hpp":         true,
 | |
| 	".hxx":         true,
 | |
| 	".html":        true,
 | |
| 	".js":          true,
 | |
| 	".out":         true,
 | |
| 	".py":          true,
 | |
| 	".s":           true,
 | |
| 	".sh":          true,
 | |
| 	".txt":         true,
 | |
| 	".xml":         true,
 | |
| 	"AUTHORS":      true,
 | |
| 	"CONTRIBUTORS": true,
 | |
| 	"LICENSE":      true,
 | |
| 	"Makefile":     true,
 | |
| 	"PATENTS":      true,
 | |
| 	"README":       true,
 | |
| }
 | |
| 
 | |
| // isWhitelisted returns true if a file is on the list
 | |
| // of "permitted" files for indexing. The filename must
 | |
| // be the directory-local name of the file.
 | |
| func isWhitelisted(filename string) bool {
 | |
| 	key := pathpkg.Ext(filename)
 | |
| 	if key == "" {
 | |
| 		// file has no extension - use entire filename
 | |
| 		key = filename
 | |
| 	}
 | |
| 	return whitelisted[key]
 | |
| }
 | |
| 
 | |
| func (x *Indexer) indexDocs(dirname string, filename string, astFile *ast.File) {
 | |
| 	pkgName := x.intern(astFile.Name.Name)
 | |
| 	if pkgName == "main" {
 | |
| 		return
 | |
| 	}
 | |
| 	pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
 | |
| 	astPkg := ast.Package{
 | |
| 		Name: pkgName,
 | |
| 		Files: map[string]*ast.File{
 | |
| 			filename: astFile,
 | |
| 		},
 | |
| 	}
 | |
| 	var m doc.Mode
 | |
| 	docPkg := doc.New(&astPkg, dirname, m)
 | |
| 	addIdent := func(sk SpotKind, name string, docstr string) {
 | |
| 		if x.idents[sk] == nil {
 | |
| 			x.idents[sk] = make(map[string][]Ident)
 | |
| 		}
 | |
| 		name = x.intern(name)
 | |
| 		x.idents[sk][name] = append(x.idents[sk][name], Ident{
 | |
| 			Path:    pkgPath,
 | |
| 			Package: pkgName,
 | |
| 			Name:    name,
 | |
| 			Doc:     doc.Synopsis(docstr),
 | |
| 		})
 | |
| 	}
 | |
| 
 | |
| 	if x.idents[PackageClause] == nil {
 | |
| 		x.idents[PackageClause] = make(map[string][]Ident)
 | |
| 	}
 | |
| 	// List of words under which the package identifier will be stored.
 | |
| 	// This includes the package name and the components of the directory
 | |
| 	// in which it resides.
 | |
| 	words := strings.Split(pathpkg.Dir(pkgPath), "/")
 | |
| 	if words[0] == "." {
 | |
| 		words = []string{}
 | |
| 	}
 | |
| 	name := x.intern(docPkg.Name)
 | |
| 	synopsis := doc.Synopsis(docPkg.Doc)
 | |
| 	words = append(words, name)
 | |
| 	pkgIdent := Ident{
 | |
| 		Path:    pkgPath,
 | |
| 		Package: pkgName,
 | |
| 		Name:    name,
 | |
| 		Doc:     synopsis,
 | |
| 	}
 | |
| 	for _, word := range words {
 | |
| 		word = x.intern(word)
 | |
| 		found := false
 | |
| 		pkgs := x.idents[PackageClause][word]
 | |
| 		for i, p := range pkgs {
 | |
| 			if p.Path == pkgPath {
 | |
| 				if docPkg.Doc != "" {
 | |
| 					p.Doc = synopsis
 | |
| 					pkgs[i] = p
 | |
| 				}
 | |
| 				found = true
 | |
| 				break
 | |
| 			}
 | |
| 		}
 | |
| 		if !found {
 | |
| 			x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	for _, c := range docPkg.Consts {
 | |
| 		for _, name := range c.Names {
 | |
| 			addIdent(ConstDecl, name, c.Doc)
 | |
| 		}
 | |
| 	}
 | |
| 	for _, t := range docPkg.Types {
 | |
| 		addIdent(TypeDecl, t.Name, t.Doc)
 | |
| 		for _, c := range t.Consts {
 | |
| 			for _, name := range c.Names {
 | |
| 				addIdent(ConstDecl, name, c.Doc)
 | |
| 			}
 | |
| 		}
 | |
| 		for _, v := range t.Vars {
 | |
| 			for _, name := range v.Names {
 | |
| 				addIdent(VarDecl, name, v.Doc)
 | |
| 			}
 | |
| 		}
 | |
| 		for _, f := range t.Funcs {
 | |
| 			addIdent(FuncDecl, f.Name, f.Doc)
 | |
| 		}
 | |
| 		for _, f := range t.Methods {
 | |
| 			addIdent(MethodDecl, f.Name, f.Doc)
 | |
| 			// Change the name of methods to be "<typename>.<methodname>".
 | |
| 			// They will still be indexed as <methodname>.
 | |
| 			idents := x.idents[MethodDecl][f.Name]
 | |
| 			idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name)
 | |
| 		}
 | |
| 	}
 | |
| 	for _, v := range docPkg.Vars {
 | |
| 		for _, name := range v.Names {
 | |
| 			addIdent(VarDecl, name, v.Doc)
 | |
| 		}
 | |
| 	}
 | |
| 	for _, f := range docPkg.Funcs {
 | |
| 		addIdent(FuncDecl, f.Name, f.Doc)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (x *Indexer) indexGoFile(dirname string, filename string, file *token.File, astFile *ast.File) {
 | |
| 	pkgName := astFile.Name.Name
 | |
| 
 | |
| 	if x.c.IndexGoCode {
 | |
| 		x.current = file
 | |
| 		pak := x.lookupPackage(dirname, pkgName)
 | |
| 		x.file = &File{filename, pak}
 | |
| 		ast.Walk(x, astFile)
 | |
| 	}
 | |
| 
 | |
| 	if x.c.IndexDocs {
 | |
| 		// Test files are already filtered out in visitFile if IndexGoCode and
 | |
| 		// IndexFullText are false.  Otherwise, check here.
 | |
| 		isTestFile := (x.c.IndexGoCode || x.c.IndexFullText) &&
 | |
| 			(strings.HasSuffix(filename, "_test.go") || strings.HasPrefix(dirname, "/test/"))
 | |
| 		if !isTestFile {
 | |
| 			x.indexDocs(dirname, filename, astFile)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	ppKey := x.intern(pkgName)
 | |
| 	if _, ok := x.packagePath[ppKey]; !ok {
 | |
| 		x.packagePath[ppKey] = make(map[string]bool)
 | |
| 	}
 | |
| 	pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
 | |
| 	x.packagePath[ppKey][pkgPath] = true
 | |
| 
 | |
| 	// Merge in exported symbols found walking this file into
 | |
| 	// the map for that package.
 | |
| 	if len(x.curPkgExports) > 0 {
 | |
| 		dest, ok := x.exports[pkgPath]
 | |
| 		if !ok {
 | |
| 			dest = make(map[string]SpotKind)
 | |
| 			x.exports[pkgPath] = dest
 | |
| 		}
 | |
| 		for k, v := range x.curPkgExports {
 | |
| 			dest[k] = v
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (x *Indexer) visitFile(dirname string, fi os.FileInfo) {
 | |
| 	if fi.IsDir() || !x.c.IndexEnabled {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	filename := pathpkg.Join(dirname, fi.Name())
 | |
| 	goFile := isGoFile(fi)
 | |
| 
 | |
| 	switch {
 | |
| 	case x.c.IndexFullText:
 | |
| 		if !isWhitelisted(fi.Name()) {
 | |
| 			return
 | |
| 		}
 | |
| 	case x.c.IndexGoCode:
 | |
| 		if !goFile {
 | |
| 			return
 | |
| 		}
 | |
| 	case x.c.IndexDocs:
 | |
| 		if !goFile ||
 | |
| 			strings.HasSuffix(fi.Name(), "_test.go") ||
 | |
| 			strings.HasPrefix(dirname, "/test/") {
 | |
| 			return
 | |
| 		}
 | |
| 	default:
 | |
| 		// No indexing turned on.
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	x.fsOpenGate <- true
 | |
| 	defer func() { <-x.fsOpenGate }()
 | |
| 
 | |
| 	// open file
 | |
| 	f, err := x.c.fs.Open(filename)
 | |
| 	if err != nil {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	x.mu.Lock()
 | |
| 	defer x.mu.Unlock()
 | |
| 
 | |
| 	x.throttle.Throttle()
 | |
| 
 | |
| 	x.curPkgExports = make(map[string]SpotKind)
 | |
| 	file, fast := x.addFile(f, filename, goFile)
 | |
| 	if file == nil {
 | |
| 		return // addFile failed
 | |
| 	}
 | |
| 
 | |
| 	if fast != nil {
 | |
| 		x.indexGoFile(dirname, fi.Name(), file, fast)
 | |
| 	}
 | |
| 
 | |
| 	// update statistics
 | |
| 	x.stats.Bytes += file.Size()
 | |
| 	x.stats.Files++
 | |
| 	x.stats.Lines += file.LineCount()
 | |
| }
 | |
| 
 | |
| // indexOptions contains information that affects the contents of an index.
 | |
| type indexOptions struct {
 | |
| 	// Docs provides documentation search results.
 | |
| 	// It is only consulted if IndexEnabled is true.
 | |
| 	// The default values is true.
 | |
| 	Docs bool
 | |
| 
 | |
| 	// GoCode provides Go source code search results.
 | |
| 	// It is only consulted if IndexEnabled is true.
 | |
| 	// The default values is true.
 | |
| 	GoCode bool
 | |
| 
 | |
| 	// FullText provides search results from all files.
 | |
| 	// It is only consulted if IndexEnabled is true.
 | |
| 	// The default values is true.
 | |
| 	FullText bool
 | |
| 
 | |
| 	// MaxResults optionally specifies the maximum results for indexing.
 | |
| 	// The default is 1000.
 | |
| 	MaxResults int
 | |
| }
 | |
| 
 | |
| // ----------------------------------------------------------------------------
 | |
| // Index
 | |
| 
 | |
| type LookupResult struct {
 | |
| 	Decls  HitList // package-level declarations (with snippets)
 | |
| 	Others HitList // all other occurrences
 | |
| }
 | |
| 
 | |
| type Index struct {
 | |
| 	fset        *token.FileSet           // file set used during indexing; nil if no textindex
 | |
| 	suffixes    *suffixarray.Index       // suffixes for concatenated sources; nil if no textindex
 | |
| 	words       map[string]*LookupResult // maps words to hit lists
 | |
| 	alts        map[string]*AltWords     // maps canonical(words) to lists of alternative spellings
 | |
| 	snippets    []*Snippet               // all snippets, indexed by snippet index
 | |
| 	stats       Statistics
 | |
| 	importCount map[string]int                 // package path ("net/http") => count
 | |
| 	packagePath map[string]map[string]bool     // "template" => "text/template" => true
 | |
| 	exports     map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
 | |
| 	idents      map[SpotKind]map[string][]Ident
 | |
| 	opts        indexOptions
 | |
| }
 | |
| 
 | |
| func canonical(w string) string { return strings.ToLower(w) }
 | |
| 
 | |
| // Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems
 | |
| // consuming file descriptors, where some systems have low 256 or 512 limits.
 | |
| // Go should have a built-in way to cap fd usage under the ulimit.
 | |
| const (
 | |
| 	maxOpenFiles = 200
 | |
| 	maxOpenDirs  = 50
 | |
| )
 | |
| 
 | |
| func (c *Corpus) throttle() float64 {
 | |
| 	if c.IndexThrottle <= 0 {
 | |
| 		return 0.9
 | |
| 	}
 | |
| 	if c.IndexThrottle > 1.0 {
 | |
| 		return 1.0
 | |
| 	}
 | |
| 	return c.IndexThrottle
 | |
| }
 | |
| 
 | |
| // NewIndex creates a new index for the .go files provided by the corpus.
 | |
| func (c *Corpus) NewIndex() *Index {
 | |
| 	// initialize Indexer
 | |
| 	// (use some reasonably sized maps to start)
 | |
| 	x := &Indexer{
 | |
| 		c:           c,
 | |
| 		fset:        token.NewFileSet(),
 | |
| 		fsOpenGate:  make(chan bool, maxOpenFiles),
 | |
| 		strings:     make(map[string]string),
 | |
| 		packages:    make(map[Pak]*Pak, 256),
 | |
| 		words:       make(map[string]*IndexResult, 8192),
 | |
| 		throttle:    util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time
 | |
| 		importCount: make(map[string]int),
 | |
| 		packagePath: make(map[string]map[string]bool),
 | |
| 		exports:     make(map[string]map[string]SpotKind),
 | |
| 		idents:      make(map[SpotKind]map[string][]Ident, 4),
 | |
| 	}
 | |
| 
 | |
| 	// index all files in the directories given by dirnames
 | |
| 	var wg sync.WaitGroup // outstanding ReadDir + visitFile
 | |
| 	dirGate := make(chan bool, maxOpenDirs)
 | |
| 	for dirname := range c.fsDirnames() {
 | |
| 		if c.IndexDirectory != nil && !c.IndexDirectory(dirname) {
 | |
| 			continue
 | |
| 		}
 | |
| 		dirGate <- true
 | |
| 		wg.Add(1)
 | |
| 		go func(dirname string) {
 | |
| 			defer func() { <-dirGate }()
 | |
| 			defer wg.Done()
 | |
| 
 | |
| 			list, err := c.fs.ReadDir(dirname)
 | |
| 			if err != nil {
 | |
| 				log.Printf("ReadDir(%q): %v; skipping directory", dirname, err)
 | |
| 				return // ignore this directory
 | |
| 			}
 | |
| 			for _, fi := range list {
 | |
| 				wg.Add(1)
 | |
| 				go func(fi os.FileInfo) {
 | |
| 					defer wg.Done()
 | |
| 					x.visitFile(dirname, fi)
 | |
| 				}(fi)
 | |
| 			}
 | |
| 		}(dirname)
 | |
| 	}
 | |
| 	wg.Wait()
 | |
| 
 | |
| 	if !c.IndexFullText {
 | |
| 		// the file set, the current file, and the sources are
 | |
| 		// not needed after indexing if no text index is built -
 | |
| 		// help GC and clear them
 | |
| 		x.fset = nil
 | |
| 		x.sources.Reset()
 | |
| 		x.current = nil // contains reference to fset!
 | |
| 	}
 | |
| 
 | |
| 	// for each word, reduce the RunLists into a LookupResult;
 | |
| 	// also collect the word with its canonical spelling in a
 | |
| 	// word list for later computation of alternative spellings
 | |
| 	words := make(map[string]*LookupResult)
 | |
| 	var wlist RunList
 | |
| 	for w, h := range x.words {
 | |
| 		decls := reduce(h.Decls)
 | |
| 		others := reduce(h.Others)
 | |
| 		words[w] = &LookupResult{
 | |
| 			Decls:  decls,
 | |
| 			Others: others,
 | |
| 		}
 | |
| 		wlist = append(wlist, &wordPair{canonical(w), w})
 | |
| 		x.throttle.Throttle()
 | |
| 	}
 | |
| 	x.stats.Words = len(words)
 | |
| 
 | |
| 	// reduce the word list {canonical(w), w} into
 | |
| 	// a list of AltWords runs {canonical(w), {w}}
 | |
| 	alist := wlist.reduce(lessWordPair, newAltWords)
 | |
| 
 | |
| 	// convert alist into a map of alternative spellings
 | |
| 	alts := make(map[string]*AltWords)
 | |
| 	for i := 0; i < len(alist); i++ {
 | |
| 		a := alist[i].(*AltWords)
 | |
| 		alts[a.Canon] = a
 | |
| 	}
 | |
| 
 | |
| 	// create text index
 | |
| 	var suffixes *suffixarray.Index
 | |
| 	if c.IndexFullText {
 | |
| 		suffixes = suffixarray.New(x.sources.Bytes())
 | |
| 	}
 | |
| 
 | |
| 	// sort idents by the number of imports of their respective packages
 | |
| 	for _, idMap := range x.idents {
 | |
| 		for _, ir := range idMap {
 | |
| 			sort.Sort(byImportCount{ir, x.importCount})
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return &Index{
 | |
| 		fset:        x.fset,
 | |
| 		suffixes:    suffixes,
 | |
| 		words:       words,
 | |
| 		alts:        alts,
 | |
| 		snippets:    x.snippets,
 | |
| 		stats:       x.stats,
 | |
| 		importCount: x.importCount,
 | |
| 		packagePath: x.packagePath,
 | |
| 		exports:     x.exports,
 | |
| 		idents:      x.idents,
 | |
| 		opts: indexOptions{
 | |
| 			Docs:       x.c.IndexDocs,
 | |
| 			GoCode:     x.c.IndexGoCode,
 | |
| 			FullText:   x.c.IndexFullText,
 | |
| 			MaxResults: x.c.MaxResults,
 | |
| 		},
 | |
| 	}
 | |
| }
 | |
| 
 | |
| var ErrFileIndexVersion = errors.New("file index version out of date")
 | |
| 
 | |
| const fileIndexVersion = 3
 | |
| 
 | |
| // fileIndex is the subset of Index that's gob-encoded for use by
 | |
| // Index.Write and Index.Read.
 | |
| type fileIndex struct {
 | |
| 	Version     int
 | |
| 	Words       map[string]*LookupResult
 | |
| 	Alts        map[string]*AltWords
 | |
| 	Snippets    []*Snippet
 | |
| 	Fulltext    bool
 | |
| 	Stats       Statistics
 | |
| 	ImportCount map[string]int
 | |
| 	PackagePath map[string]map[string]bool
 | |
| 	Exports     map[string]map[string]SpotKind
 | |
| 	Idents      map[SpotKind]map[string][]Ident
 | |
| 	Opts        indexOptions
 | |
| }
 | |
| 
 | |
| func (x *fileIndex) Write(w io.Writer) error {
 | |
| 	return gob.NewEncoder(w).Encode(x)
 | |
| }
 | |
| 
 | |
| func (x *fileIndex) Read(r io.Reader) error {
 | |
| 	return gob.NewDecoder(r).Decode(x)
 | |
| }
 | |
| 
 | |
| // WriteTo writes the index x to w.
 | |
| func (x *Index) WriteTo(w io.Writer) (n int64, err error) {
 | |
| 	w = countingWriter{&n, w}
 | |
| 	fulltext := false
 | |
| 	if x.suffixes != nil {
 | |
| 		fulltext = true
 | |
| 	}
 | |
| 	fx := fileIndex{
 | |
| 		Version:     fileIndexVersion,
 | |
| 		Words:       x.words,
 | |
| 		Alts:        x.alts,
 | |
| 		Snippets:    x.snippets,
 | |
| 		Fulltext:    fulltext,
 | |
| 		Stats:       x.stats,
 | |
| 		ImportCount: x.importCount,
 | |
| 		PackagePath: x.packagePath,
 | |
| 		Exports:     x.exports,
 | |
| 		Idents:      x.idents,
 | |
| 		Opts:        x.opts,
 | |
| 	}
 | |
| 	if err := fx.Write(w); err != nil {
 | |
| 		return 0, err
 | |
| 	}
 | |
| 	if fulltext {
 | |
| 		encode := func(x interface{}) error {
 | |
| 			return gob.NewEncoder(w).Encode(x)
 | |
| 		}
 | |
| 		if err := x.fset.Write(encode); err != nil {
 | |
| 			return 0, err
 | |
| 		}
 | |
| 		if err := x.suffixes.Write(w); err != nil {
 | |
| 			return 0, err
 | |
| 		}
 | |
| 	}
 | |
| 	return n, nil
 | |
| }
 | |
| 
 | |
| // ReadFrom reads the index from r into x; x must not be nil.
 | |
| // If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader.
 | |
| // If the index is from an old version, the error is ErrFileIndexVersion.
 | |
| func (x *Index) ReadFrom(r io.Reader) (n int64, err error) {
 | |
| 	// We use the ability to read bytes as a plausible surrogate for buffering.
 | |
| 	if _, ok := r.(io.ByteReader); !ok {
 | |
| 		r = bufio.NewReader(r)
 | |
| 	}
 | |
| 	r = countingReader{&n, r.(byteReader)}
 | |
| 	var fx fileIndex
 | |
| 	if err := fx.Read(r); err != nil {
 | |
| 		return n, err
 | |
| 	}
 | |
| 	if fx.Version != fileIndexVersion {
 | |
| 		return 0, ErrFileIndexVersion
 | |
| 	}
 | |
| 	x.words = fx.Words
 | |
| 	x.alts = fx.Alts
 | |
| 	x.snippets = fx.Snippets
 | |
| 	x.stats = fx.Stats
 | |
| 	x.importCount = fx.ImportCount
 | |
| 	x.packagePath = fx.PackagePath
 | |
| 	x.exports = fx.Exports
 | |
| 	x.idents = fx.Idents
 | |
| 	x.opts = fx.Opts
 | |
| 	if fx.Fulltext {
 | |
| 		x.fset = token.NewFileSet()
 | |
| 		decode := func(x interface{}) error {
 | |
| 			return gob.NewDecoder(r).Decode(x)
 | |
| 		}
 | |
| 		if err := x.fset.Read(decode); err != nil {
 | |
| 			return n, err
 | |
| 		}
 | |
| 		x.suffixes = new(suffixarray.Index)
 | |
| 		if err := x.suffixes.Read(r); err != nil {
 | |
| 			return n, err
 | |
| 		}
 | |
| 	}
 | |
| 	return n, nil
 | |
| }
 | |
| 
 | |
| // Stats returns index statistics.
 | |
| func (x *Index) Stats() Statistics {
 | |
| 	return x.stats
 | |
| }
 | |
| 
 | |
| // ImportCount returns a map from import paths to how many times they were seen.
 | |
| func (x *Index) ImportCount() map[string]int {
 | |
| 	return x.importCount
 | |
| }
 | |
| 
 | |
| // PackagePath returns a map from short package name to a set
 | |
| // of full package path names that use that short package name.
 | |
| func (x *Index) PackagePath() map[string]map[string]bool {
 | |
| 	return x.packagePath
 | |
| }
 | |
| 
 | |
| // Exports returns a map from full package path to exported
 | |
| // symbol name to its type.
 | |
| func (x *Index) Exports() map[string]map[string]SpotKind {
 | |
| 	return x.exports
 | |
| }
 | |
| 
 | |
| // Idents returns a map from identifier type to exported
 | |
| // symbol name to the list of identifiers matching that name.
 | |
| func (x *Index) Idents() map[SpotKind]map[string][]Ident {
 | |
| 	return x.idents
 | |
| }
 | |
| 
 | |
| func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) {
 | |
| 	match = x.words[w]
 | |
| 	alt = x.alts[canonical(w)]
 | |
| 	// remove current spelling from alternatives
 | |
| 	// (if there is no match, the alternatives do
 | |
| 	// not contain the current spelling)
 | |
| 	if match != nil && alt != nil {
 | |
| 		alt = alt.filter(w)
 | |
| 	}
 | |
| 	return
 | |
| }
 | |
| 
 | |
| // isIdentifier reports whether s is a Go identifier.
 | |
| func isIdentifier(s string) bool {
 | |
| 	for i, ch := range s {
 | |
| 		if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) {
 | |
| 			continue
 | |
| 		}
 | |
| 		return false
 | |
| 	}
 | |
| 	return len(s) > 0
 | |
| }
 | |
| 
 | |
| // For a given query, which is either a single identifier or a qualified
 | |
| // identifier, Lookup returns a SearchResult containing packages, a LookupResult, a
 | |
| // list of alternative spellings, and identifiers, if any. Any and all results
 | |
| // may be nil.  If the query syntax is wrong, an error is reported.
 | |
| func (x *Index) Lookup(query string) (*SearchResult, error) {
 | |
| 	ss := strings.Split(query, ".")
 | |
| 
 | |
| 	// check query syntax
 | |
| 	for _, s := range ss {
 | |
| 		if !isIdentifier(s) {
 | |
| 			return nil, errors.New("all query parts must be identifiers")
 | |
| 		}
 | |
| 	}
 | |
| 	rslt := &SearchResult{
 | |
| 		Query:  query,
 | |
| 		Idents: make(map[SpotKind][]Ident, 5),
 | |
| 	}
 | |
| 	// handle simple and qualified identifiers
 | |
| 	switch len(ss) {
 | |
| 	case 1:
 | |
| 		ident := ss[0]
 | |
| 		rslt.Hit, rslt.Alt = x.lookupWord(ident)
 | |
| 		if rslt.Hit != nil {
 | |
| 			// found a match - filter packages with same name
 | |
| 			// for the list of packages called ident, if any
 | |
| 			rslt.Pak = rslt.Hit.Others.filter(ident)
 | |
| 		}
 | |
| 		for k, v := range x.idents {
 | |
| 			const rsltLimit = 50
 | |
| 			ids := byImportCount{v[ident], x.importCount}
 | |
| 			rslt.Idents[k] = ids.top(rsltLimit)
 | |
| 		}
 | |
| 
 | |
| 	case 2:
 | |
| 		pakname, ident := ss[0], ss[1]
 | |
| 		rslt.Hit, rslt.Alt = x.lookupWord(ident)
 | |
| 		if rslt.Hit != nil {
 | |
| 			// found a match - filter by package name
 | |
| 			// (no paks - package names are not qualified)
 | |
| 			decls := rslt.Hit.Decls.filter(pakname)
 | |
| 			others := rslt.Hit.Others.filter(pakname)
 | |
| 			rslt.Hit = &LookupResult{decls, others}
 | |
| 		}
 | |
| 		for k, v := range x.idents {
 | |
| 			ids := byImportCount{v[ident], x.importCount}
 | |
| 			rslt.Idents[k] = ids.filter(pakname)
 | |
| 		}
 | |
| 
 | |
| 	default:
 | |
| 		return nil, errors.New("query is not a (qualified) identifier")
 | |
| 	}
 | |
| 
 | |
| 	return rslt, nil
 | |
| }
 | |
| 
 | |
| func (x *Index) Snippet(i int) *Snippet {
 | |
| 	// handle illegal snippet indices gracefully
 | |
| 	if 0 <= i && i < len(x.snippets) {
 | |
| 		return x.snippets[i]
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| type positionList []struct {
 | |
| 	filename string
 | |
| 	line     int
 | |
| }
 | |
| 
 | |
| func (list positionList) Len() int           { return len(list) }
 | |
| func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename }
 | |
| func (list positionList) Swap(i, j int)      { list[i], list[j] = list[j], list[i] }
 | |
| 
 | |
| // unique returns the list sorted and with duplicate entries removed
 | |
| func unique(list []int) []int {
 | |
| 	sort.Ints(list)
 | |
| 	var last int
 | |
| 	i := 0
 | |
| 	for _, x := range list {
 | |
| 		if i == 0 || x != last {
 | |
| 			last = x
 | |
| 			list[i] = x
 | |
| 			i++
 | |
| 		}
 | |
| 	}
 | |
| 	return list[0:i]
 | |
| }
 | |
| 
 | |
| // A FileLines value specifies a file and line numbers within that file.
 | |
| type FileLines struct {
 | |
| 	Filename string
 | |
| 	Lines    []int
 | |
| }
 | |
| 
 | |
| // LookupRegexp returns the number of matches and the matches where a regular
 | |
| // expression r is found in the full text index. At most n matches are
 | |
| // returned (thus found <= n).
 | |
| //
 | |
| func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) {
 | |
| 	if x.suffixes == nil || n <= 0 {
 | |
| 		return
 | |
| 	}
 | |
| 	// n > 0
 | |
| 
 | |
| 	var list positionList
 | |
| 	// FindAllIndex may returns matches that span across file boundaries.
 | |
| 	// Such matches are unlikely, buf after eliminating them we may end up
 | |
| 	// with fewer than n matches. If we don't have enough at the end, redo
 | |
| 	// the search with an increased value n1, but only if FindAllIndex
 | |
| 	// returned all the requested matches in the first place (if it
 | |
| 	// returned fewer than that there cannot be more).
 | |
| 	for n1 := n; found < n; n1 += n - found {
 | |
| 		found = 0
 | |
| 		matches := x.suffixes.FindAllIndex(r, n1)
 | |
| 		// compute files, exclude matches that span file boundaries,
 | |
| 		// and map offsets to file-local offsets
 | |
| 		list = make(positionList, len(matches))
 | |
| 		for _, m := range matches {
 | |
| 			// by construction, an offset corresponds to the Pos value
 | |
| 			// for the file set - use it to get the file and line
 | |
| 			p := token.Pos(m[0])
 | |
| 			if file := x.fset.File(p); file != nil {
 | |
| 				if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
 | |
| 					// match [m[0], m[1]) is within the file boundaries
 | |
| 					list[found].filename = file.Name()
 | |
| 					list[found].line = file.Line(p)
 | |
| 					found++
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 		if found == n || len(matches) < n1 {
 | |
| 			// found all matches or there's no chance to find more
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| 	list = list[0:found]
 | |
| 	sort.Sort(list) // sort by filename
 | |
| 
 | |
| 	// collect matches belonging to the same file
 | |
| 	var last string
 | |
| 	var lines []int
 | |
| 	addLines := func() {
 | |
| 		if len(lines) > 0 {
 | |
| 			// remove duplicate lines
 | |
| 			result = append(result, FileLines{last, unique(lines)})
 | |
| 			lines = nil
 | |
| 		}
 | |
| 	}
 | |
| 	for _, m := range list {
 | |
| 		if m.filename != last {
 | |
| 			addLines()
 | |
| 			last = m.filename
 | |
| 		}
 | |
| 		lines = append(lines, m.line)
 | |
| 	}
 | |
| 	addLines()
 | |
| 
 | |
| 	return
 | |
| }
 | |
| 
 | |
| // InvalidateIndex should be called whenever any of the file systems
 | |
| // under godoc's observation change so that the indexer is kicked on.
 | |
| func (c *Corpus) invalidateIndex() {
 | |
| 	c.fsModified.Set(nil)
 | |
| 	c.refreshMetadata()
 | |
| }
 | |
| 
 | |
| // indexUpToDate() returns true if the search index is not older
 | |
| // than any of the file systems under godoc's observation.
 | |
| //
 | |
| func (c *Corpus) indexUpToDate() bool {
 | |
| 	_, fsTime := c.fsModified.Get()
 | |
| 	_, siTime := c.searchIndex.Get()
 | |
| 	return !fsTime.After(siTime)
 | |
| }
 | |
| 
 | |
| // feedDirnames feeds the directory names of all directories
 | |
| // under the file system given by root to channel c.
 | |
| //
 | |
| func (c *Corpus) feedDirnames(ch chan<- string) {
 | |
| 	if dir, _ := c.fsTree.Get(); dir != nil {
 | |
| 		for d := range dir.(*Directory).iter(false) {
 | |
| 			ch <- d.Path
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // fsDirnames() returns a channel sending all directory names
 | |
| // of all the file systems under godoc's observation.
 | |
| //
 | |
| func (c *Corpus) fsDirnames() <-chan string {
 | |
| 	ch := make(chan string, 256) // buffered for fewer context switches
 | |
| 	go func() {
 | |
| 		c.feedDirnames(ch)
 | |
| 		close(ch)
 | |
| 	}()
 | |
| 	return ch
 | |
| }
 | |
| 
 | |
| // CompatibleWith reports whether the Index x is compatible with the corpus
 | |
| // indexing options set in c.
 | |
| func (x *Index) CompatibleWith(c *Corpus) bool {
 | |
| 	return x.opts.Docs == c.IndexDocs &&
 | |
| 		x.opts.GoCode == c.IndexGoCode &&
 | |
| 		x.opts.FullText == c.IndexFullText &&
 | |
| 		x.opts.MaxResults == c.MaxResults
 | |
| }
 | |
| 
 | |
| func (c *Corpus) readIndex(filenames string) error {
 | |
| 	matches, err := filepath.Glob(filenames)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	} else if matches == nil {
 | |
| 		return fmt.Errorf("no index files match %q", filenames)
 | |
| 	}
 | |
| 	sort.Strings(matches) // make sure files are in the right order
 | |
| 	files := make([]io.Reader, 0, len(matches))
 | |
| 	for _, filename := range matches {
 | |
| 		f, err := os.Open(filename)
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		defer f.Close()
 | |
| 		files = append(files, f)
 | |
| 	}
 | |
| 	return c.ReadIndexFrom(io.MultiReader(files...))
 | |
| }
 | |
| 
 | |
| // ReadIndexFrom sets the current index from the serialized version found in r.
 | |
| func (c *Corpus) ReadIndexFrom(r io.Reader) error {
 | |
| 	x := new(Index)
 | |
| 	if _, err := x.ReadFrom(r); err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 	if !x.CompatibleWith(c) {
 | |
| 		return fmt.Errorf("index file options are incompatible: %v", x.opts)
 | |
| 	}
 | |
| 	c.searchIndex.Set(x)
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (c *Corpus) UpdateIndex() {
 | |
| 	if c.Verbose {
 | |
| 		log.Printf("updating index...")
 | |
| 	}
 | |
| 	start := time.Now()
 | |
| 	index := c.NewIndex()
 | |
| 	stop := time.Now()
 | |
| 	c.searchIndex.Set(index)
 | |
| 	if c.Verbose {
 | |
| 		secs := stop.Sub(start).Seconds()
 | |
| 		stats := index.Stats()
 | |
| 		log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
 | |
| 			secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
 | |
| 	}
 | |
| 	memstats := new(runtime.MemStats)
 | |
| 	runtime.ReadMemStats(memstats)
 | |
| 	if c.Verbose {
 | |
| 		log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
 | |
| 	}
 | |
| 	runtime.GC()
 | |
| 	runtime.ReadMemStats(memstats)
 | |
| 	if c.Verbose {
 | |
| 		log.Printf("after  GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // RunIndexer runs forever, indexing.
 | |
| func (c *Corpus) RunIndexer() {
 | |
| 	// initialize the index from disk if possible
 | |
| 	if c.IndexFiles != "" {
 | |
| 		c.initFSTree()
 | |
| 		if err := c.readIndex(c.IndexFiles); err != nil {
 | |
| 			log.Printf("error reading index from file %s: %v", c.IndexFiles, err)
 | |
| 		}
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	// Repeatedly update the package directory tree and index.
 | |
| 	// TODO(bgarcia): Use fsnotify to only update when notified of a filesystem change.
 | |
| 	for {
 | |
| 		c.initFSTree()
 | |
| 		c.UpdateIndex()
 | |
| 		if c.IndexInterval < 0 {
 | |
| 			return
 | |
| 		}
 | |
| 		delay := 5 * time.Minute // by default, reindex every 5 minutes
 | |
| 		if c.IndexInterval > 0 {
 | |
| 			delay = c.IndexInterval
 | |
| 		}
 | |
| 		time.Sleep(delay)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| type countingWriter struct {
 | |
| 	n *int64
 | |
| 	w io.Writer
 | |
| }
 | |
| 
 | |
| func (c countingWriter) Write(p []byte) (n int, err error) {
 | |
| 	n, err = c.w.Write(p)
 | |
| 	*c.n += int64(n)
 | |
| 	return
 | |
| }
 | |
| 
 | |
| type byteReader interface {
 | |
| 	io.Reader
 | |
| 	io.ByteReader
 | |
| }
 | |
| 
 | |
| type countingReader struct {
 | |
| 	n *int64
 | |
| 	r byteReader
 | |
| }
 | |
| 
 | |
| func (c countingReader) Read(p []byte) (n int, err error) {
 | |
| 	n, err = c.r.Read(p)
 | |
| 	*c.n += int64(n)
 | |
| 	return
 | |
| }
 | |
| 
 | |
| func (c countingReader) ReadByte() (b byte, err error) {
 | |
| 	b, err = c.r.ReadByte()
 | |
| 	*c.n += 1
 | |
| 	return
 | |
| }
 |