From cb2dda6eabdf9160e66ca7293897f984154a7f8b Mon Sep 17 00:00:00 2001 From: Dominik Honnef Date: Wed, 10 Apr 2019 15:47:02 +0200 Subject: [PATCH] go/packages: deduplicate file parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When loading packages from source, many files are being parsed repeatedly, for example due to test variants. While the median number of times a file gets parsed is 2, it is significantly higher (up to 28 times) when parsing the standard library, because of test variant shenanigans. By caching file contents and their parsed representations we can cut down on processing time and garbage produced. When loading individual packages or 3rd party projects, the effect is rather small. However when loading the entire standard library, the effect is substantial. name old time/op new time/op delta Jaeger-8 2.95s ± 7% 2.84s ± 8% ~ (p=0.089 n=10+10) Std-8 4.96s ± 7% 4.23s ± 3% -14.62% (p=0.000 n=9+9) Strconv-8 892ms ±34% 877ms ±21% ~ (p=0.853 n=10+10) name old alloc/op new alloc/op delta Jaeger-8 1.22GB ± 0% 1.21GB ± 0% -0.84% (p=0.000 n=10+10) Std-8 2.57GB ± 0% 2.20GB ± 0% -14.61% (p=0.000 n=10+8) Strconv-8 201MB ± 1% 200MB ± 1% ~ (p=0.105 n=10+10) name old allocs/op new allocs/op delta Jaeger-8 12.7M ± 0% 12.4M ± 0% -1.82% (p=0.000 n=9+10) Std-8 26.4M ± 0% 17.3M ± 0% -34.62% (p=0.000 n=9+9) Strconv-8 1.94M ± 0% 1.91M ± 0% -1.50% (p=0.000 n=10+10) When loading std, peak RSS decreases from 1.96 GB to 1.57 GB. While we're here, we simplify our ParseFile implementation. The contract of ParseFile specifies that implementations must use src for parsing, and use filename only for display purposes. As such, we mustn't ever call it with a nil src, making the check for a nil src in our own implementation superfluous. Change-Id: I33daac20fc52ccdb3187a336633f712d01b71d86 Reviewed-on: https://go-review.googlesource.com/c/tools/+/171377 Run-TryBot: Ian Cottrell Run-TryBot: Michael Matloob Reviewed-by: Michael Matloob --- go/packages/packages.go | 77 +++++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 26 deletions(-) diff --git a/go/packages/packages.go b/go/packages/packages.go index 4639fcdd..eedd43bb 100644 --- a/go/packages/packages.go +++ b/go/packages/packages.go @@ -418,8 +418,10 @@ type loaderPackage struct { type loader struct { pkgs map[string]*loaderPackage Config - sizes types.Sizes - exportMu sync.Mutex // enforces mutual exclusion of exportdata operations + sizes types.Sizes + parseCache map[string]*parseValue + parseCacheMu sync.Mutex + exportMu sync.Mutex // enforces mutual exclusion of exportdata operations // TODO(matloob): Add an implied mode here and use that instead of mode. // Implied mode would contain all the fields we need the data for so we can @@ -428,8 +430,16 @@ type loader struct { // where we need certain modes right. } +type parseValue struct { + f *ast.File + err error + ready chan struct{} +} + func newLoader(cfg *Config) *loader { - ld := &loader{} + ld := &loader{ + parseCache: map[string]*parseValue{}, + } if cfg != nil { ld.Config = *cfg } @@ -457,12 +467,8 @@ func newLoader(cfg *Config) *loader { // because we load source if export data is missing. if ld.ParseFile == nil { ld.ParseFile = func(fset *token.FileSet, filename string, src []byte) (*ast.File, error) { - var isrc interface{} - if src != nil { - isrc = src - } const mode = parser.AllErrors | parser.ParseComments - return parser.ParseFile(fset, filename, isrc, mode) + return parser.ParseFile(fset, filename, src, mode) } } } @@ -864,6 +870,42 @@ func (f importerFunc) Import(path string) (*types.Package, error) { return f(pat // the number of parallel I/O calls per process. var ioLimit = make(chan bool, 20) +func (ld *loader) parseFile(filename string) (*ast.File, error) { + ld.parseCacheMu.Lock() + v, ok := ld.parseCache[filename] + if ok { + // cache hit + ld.parseCacheMu.Unlock() + <-v.ready + } else { + // cache miss + v = &parseValue{ready: make(chan struct{})} + ld.parseCache[filename] = v + ld.parseCacheMu.Unlock() + + var src []byte + for f, contents := range ld.Config.Overlay { + if sameFile(f, filename) { + src = contents + } + } + var err error + if src == nil { + ioLimit <- true // wait + src, err = ioutil.ReadFile(filename) + <-ioLimit // signal + } + if err != nil { + v.err = err + } else { + v.f, v.err = ld.ParseFile(ld.Fset, filename, src) + } + + close(v.ready) + } + return v.f, v.err +} + // parseFiles reads and parses the Go source files and returns the ASTs // of the ones that could be at least partially parsed, along with a // list of I/O and parse errors encountered. @@ -884,24 +926,7 @@ func (ld *loader) parseFiles(filenames []string) ([]*ast.File, []error) { } wg.Add(1) go func(i int, filename string) { - ioLimit <- true // wait - // ParseFile may return both an AST and an error. - var src []byte - for f, contents := range ld.Config.Overlay { - if sameFile(f, filename) { - src = contents - } - } - var err error - if src == nil { - src, err = ioutil.ReadFile(filename) - } - if err != nil { - parsed[i], errors[i] = nil, err - } else { - parsed[i], errors[i] = ld.ParseFile(ld.Fset, filename, src) - } - <-ioLimit // signal + parsed[i], errors[i] = ld.parseFile(filename) wg.Done() }(i, file) }