From 9f3b32b5c48ba2ae38aa67c3c1ff69f1dbbf54ca Mon Sep 17 00:00:00 2001 From: Alan Donovan Date: Mon, 17 Sep 2018 09:25:48 -0400 Subject: [PATCH] go/analysis: a new API for analysis tools This CL contains just the API, the validate function, and two example analyses, findcall and pkglemma. Change-Id: Ia1f2652647050b1e0e15dad8b9ae10cf1a5fbdbc Synopsis: go-review.googlesource.com/c/tools/+/134935 Design: docs.google.com/document/d/1-azPLXaLgTCKeKDNg0HVMq2ovMlD-e7n1ZHzZVzOlJk Reviewed-on: https://go-review.googlesource.com/135635 Reviewed-by: Ian Cottrell --- go/analysis/analysis.go | 212 ++++++++++++++++++++++++ go/analysis/plugin/README | 8 + go/analysis/plugin/findcall/findcall.go | 45 +++++ go/analysis/plugin/pkglemma/pkglemma.go | 102 ++++++++++++ go/analysis/validate.go | 97 +++++++++++ 5 files changed, 464 insertions(+) create mode 100644 go/analysis/analysis.go create mode 100644 go/analysis/plugin/README create mode 100644 go/analysis/plugin/findcall/findcall.go create mode 100644 go/analysis/plugin/pkglemma/pkglemma.go create mode 100644 go/analysis/validate.go diff --git a/go/analysis/analysis.go b/go/analysis/analysis.go new file mode 100644 index 00000000..e103fdeb --- /dev/null +++ b/go/analysis/analysis.go @@ -0,0 +1,212 @@ +// The analysis package defines a uniform interface for static checkers +// of Go source code. By implementing a common interface, checkers from +// a variety of sources can be easily selected, incorporated, and reused +// in a wide range of programs including command-line tools, text +// editors and IDEs, build systems, test frameworks, code review tools, +// and batch pipelines for large code bases. For the design, see +// https://docs.google.com/document/d/1-azPLXaLgTCKeKDNg0HVMq2ovMlD-e7n1ZHzZVzOlJk +// +// Each analysis is invoked once per Go package, and is provided the +// abstract syntax trees (ASTs) and type information for that package. +// +// The principal data types of this package are structs, not interfaces, +// to permit later addition of optional fields as the API evolves. +package analysis + +import ( + "flag" + "fmt" + "go/ast" + "go/token" + "go/types" + "reflect" +) + +// An Analysis describes an analysis function and its options. +type Analysis struct { + // The Name of the analysis must be a valid Go identifier + // as it may appear in command-line flags, URLs, and so on. + Name string + + // Doc is the documentation for the analysis. + Doc string + + // Flags defines any flags accepted by the analysis. + // The manner in which these flags are exposed to the user + // depends on the driver which runs the analysis. + Flags flag.FlagSet + + // Run applies the analysis to a package. + // It returns an error if the analysis failed. + Run func(*Unit) error + + // RunDespiteErrors allows the driver to invoke + // the Run method of this analysis even on a + // package that contains parse or type errors. + RunDespiteErrors bool + + // Requires is a set of analyses that must run successfully + // before this one on a given package. This analysis may inspect + // the outputs produced by each analysis in Requires. + // The graph over analyses implied by Requires edges must be acyclic. + // + // Requires establishes a "horizontal" dependency between + // analysis units (different analyses, same package). + Requires []*Analysis + + // OutputType is the type of the optional Output value + // computed by this analysis and stored in Unit.Output. + // (The Output is provided as an Input to + // each analysis that Requires this one.) + OutputType reflect.Type + + // LemmaTypes is the set of types of lemmas produced and + // consumed by this analysis. An analysis that uses lemmas + // may assume that its import dependencies have been + // similarly analyzed before it runs. Lemmas are pointers. + // + // LemmaTypes establishes a "vertical" dependency between + // analysis units (same analysis, different packages). + LemmaTypes []reflect.Type +} + +func (a *Analysis) String() string { return a.Name } + +// A Unit provides information to the Run function that +// applies a specific analysis to a single Go package. +// +// It forms the interface between the analysis logic and the driver +// program, and has both input and an output components. +type Unit struct { + // -- inputs -- + + Analysis *Analysis // the identity of the current analysis + + // syntax and type information + Fset *token.FileSet // file position information + Syntax []*ast.File // the abstract syntax tree of each file + Pkg *types.Package // type information about the package + Info *types.Info // type information about the syntax trees + + // Inputs provides the inputs to this analysis unit, which are + // the corresponding outputs of its prerequisite analysis. + // The map keys are the elements of Analysis.Required, + // and the type of each corresponding value is the required + // analysis's OutputType. + Inputs map[*Analysis]interface{} + + // ObjectLemma retrieves a lemma associated with obj. + // Given a value ptr of type *T, where *T satisfies Lemma, + // ObjectLemma copies the value to *ptr. + // + // ObjectLemma may panic if applied to a lemma type that + // the analysis did not declare among its LemmaTypes, + // or if called after analysis of the unit is complete. + // + // ObjectLemma is not concurrency-safe. + ObjectLemma func(obj types.Object, lemma Lemma) bool + + // PackageLemma retrives a lemma associated with package pkg, + // which must be this package or one if its dependencies. + // See comments for ObjectLemma. + PackageLemma func(pkg *types.Package, lemma Lemma) bool + + // -- outputs -- + + // Findings is a list of findings about specific locations + // in the analyzed source code, such as potential mistakes. + // It is populated by the Run function. + Findings []*Finding + + // SetObjectLemma associates a lemma of type *T with the obj, + // replacing any previous lemma of that type. + // + // SetObjectLemma panics if the lemma's type is not among + // Analysis.LemmaTypes, or if obj does not belong to the package + // being analyzed, or if it is called after analysis of the unit + // is complete. + // + // SetObjectLemma is not concurrency-safe. + SetObjectLemma func(obj types.Object, lemma Lemma) + + // SetPackageLemma associates a lemma with the current package. + // See comments for SetObjectLemma. + SetPackageLemma func(lemma Lemma) + + // Output is an immutable result computed by this analysis unit + // and set by the Run function. + // It will be made available as an input to any analysis that + // depends directly on this one; see Analysis.Requires. + // Its type must match Analysis.OutputType. + // + // Outputs are available as Inputs to later analyses of the + // same package. To pass analysis results between packages (and + // thus potentially between address spaces), use Lemmas, which + // are serializable. + Output interface{} + + /* Further fields may be added in future. */ + // For example, suggested or applied refactorings. +} + +// Findingf is a helper function that creates a new Finding using the +// specified position and formatted error message, appends it to +// unit.Findings, and returns it. +func (unit *Unit) Findingf(pos token.Pos, format string, args ...interface{}) *Finding { + msg := fmt.Sprintf(format, args...) + f := &Finding{Pos: pos, Message: msg} + unit.Findings = append(unit.Findings, f) + return f +} + +func (unit *Unit) String() string { + return fmt.Sprintf("%s@%s", unit.Analysis.Name, unit.Pkg.Path()) +} + +// A Lemma is an intermediate fact produced during analysis. +// +// Each lemma is associated with a named declaration (a types.Object). +// A single object may have multiple associated lemmas, but only one of +// any particular lemma type. +// +// A Lemma represents a predicate such as "never returns", but does not +// represent the subject of the predicate such as "function F". +// +// Lemmas may be produced in one analysis unit and consumed by another +// analysis unit even if these are in different address spaces. +// If package P imports Q, all lemmas about objects of Q produced during +// analysis of that package will be available during later analysis of P. +// Lemmas are analogous to type export data in a build system: +// just as export data enables separate compilation of several units, +// lemmas enable "separate analysis". +// +// Each unit of analysis starts with the set of lemmas produced by the +// same analysis applied to the packages directly imported by the +// current one. The analysis may add additional lemmas to the set, and +// they may be exported in turn. An analysis's Run function may retrieve +// lemmas by calling Unit.Lemma and set them using Unit.SetLemma. +// +// Each type of Lemma may be produced by at most one Analysis. +// Lemmas are logically private to their Analysis; to pass values +// between different analysis, use the Input/Output mechanism. +// +// A Lemma type must be a pointer. (Unit.GetLemma relies on it.) +// Lemmas are encoded and decoded using encoding/gob. +// A Lemma may implement the GobEncoder/GobDecoder interfaces +// to customize its encoding; Lemma encoding should not fail. +// +// A Lemma should not be modified once passed to SetLemma. +type Lemma interface { + IsLemma() // dummy method to avoid type errors +} + +// A Finding is a message associated with a source location. +// +// An Analysis may return a variety of findings; the optional Category, +// which should be a constant, may be used to classify them. +// It is primarily intended to make it easy to look up documentation. +type Finding struct { + Pos token.Pos + Category string // optional + Message string +} diff --git a/go/analysis/plugin/README b/go/analysis/plugin/README new file mode 100644 index 00000000..add86bdb --- /dev/null +++ b/go/analysis/plugin/README @@ -0,0 +1,8 @@ + +This directory does not contain a Go package, +but acts as a container for various analyses +that implement the golang.org/x/tools/go/analysis +API and may be imported into an analysis tool. + +By convention, each package foo provides the analysis, +and each command foo/cmd/foo provides a standalone driver. diff --git a/go/analysis/plugin/findcall/findcall.go b/go/analysis/plugin/findcall/findcall.go new file mode 100644 index 00000000..c7942551 --- /dev/null +++ b/go/analysis/plugin/findcall/findcall.go @@ -0,0 +1,45 @@ +// The findcall package is a trivial example and test of an analyzer of +// Go source code. It reports a finding for every call to a function or +// method of the name specified by its --name flag. +package findcall + +import ( + "go/ast" + + "golang.org/x/tools/go/analysis" +) + +var Analysis = &analysis.Analysis{ + Name: "findcall", + Doc: "find calls to a particular function", + Run: findcall, + RunDespiteErrors: true, +} + +var name = "println" // --name flag + +func init() { + Analysis.Flags.StringVar(&name, "name", name, "name of the function to find") +} + +func findcall(unit *analysis.Unit) error { + for _, f := range unit.Syntax { + ast.Inspect(f, func(n ast.Node) bool { + if call, ok := n.(*ast.CallExpr); ok { + var id *ast.Ident + switch fun := call.Fun.(type) { + case *ast.Ident: + id = fun + case *ast.SelectorExpr: + id = fun.Sel + } + if id != nil && !unit.Info.Types[id].IsType() && id.Name == name { + unit.Findingf(call.Lparen, "call of %s(...)", id.Name) + } + } + return true + }) + } + + return nil +} diff --git a/go/analysis/plugin/pkglemma/pkglemma.go b/go/analysis/plugin/pkglemma/pkglemma.go new file mode 100644 index 00000000..94b4e17a --- /dev/null +++ b/go/analysis/plugin/pkglemma/pkglemma.go @@ -0,0 +1,102 @@ +// The pkglemma package is a demonstration and test of the package lemma +// mechanism. +// +// The output of the pkglemma analysis is a set of key/values pairs +// gathered from the analyzed package and its imported dependencies. +// Each key/value pair comes from a top-level constant declaration +// whose name starts with "_". For example: +// +// package p +// +// const _greeting = "hello" +// const _audience = "world" +// +// the pkglemma analysis output for package p would be: +// +// {"greeting": "hello", "audience": "world"}. +// +// In addition, the analysis reports a finding at each import +// showing which key/value pairs it contributes. +package pkglemma + +import ( + "fmt" + "go/ast" + "go/token" + "go/types" + "reflect" + "sort" + "strings" + + "golang.org/x/tools/go/analysis" +) + +var Analysis = &analysis.Analysis{ + Name: "pkglemma", + Doc: "gather name/value pairs from constant declarations", + Run: run, + LemmaTypes: []reflect.Type{reflect.TypeOf(new(note))}, + OutputType: reflect.TypeOf(map[string]string{}), +} + +// A note is a package-level lemma that records +// key/value pairs accumulated from constant +// declarations in this package and its dependencies. +type note struct { + M map[string]string +} + +func (*note) IsLemma() {} + +func run(unit *analysis.Unit) error { + m := make(map[string]string) + + // At each import, print the lemma from the imported + // package and accumulate its information into m. + doImport := func(spec *ast.ImportSpec) { + pkg := unit.Info.Defs[spec.Name].(*types.PkgName).Imported() + var lemma note + if unit.PackageLemma(pkg, &lemma) { + var lines []string + for k, v := range lemma.M { + m[k] = v + lines = append(lines, fmt.Sprintf("%s=%s", k, v)) + } + sort.Strings(lines) + unit.Findingf(spec.Pos(), "%s", strings.Join(lines, " ")) + } + } + + // At each "const _name = value", add a fact into m. + doConst := func(spec *ast.ValueSpec) { + if len(spec.Names) == len(spec.Values) { + for i := range spec.Names { + name := spec.Names[i].Name + if strings.HasPrefix(name, "_") { + m[name[1:]] = unit.Info.Types[spec.Values[i]].Value.String() + } + } + } + } + + for _, f := range unit.Syntax { + for _, decl := range f.Decls { + if decl, ok := decl.(*ast.GenDecl); ok { + for _, spec := range decl.Specs { + switch decl.Tok { + case token.IMPORT: + doImport(spec.(*ast.ImportSpec)) + case token.CONST: + doConst(spec.(*ast.ValueSpec)) + } + } + } + } + } + + unit.Output = m + + unit.SetPackageLemma(¬e{m}) + + return nil +} diff --git a/go/analysis/validate.go b/go/analysis/validate.go new file mode 100644 index 00000000..7fbb3b54 --- /dev/null +++ b/go/analysis/validate.go @@ -0,0 +1,97 @@ +package analysis + +import ( + "fmt" + "reflect" + "unicode" +) + +// Validate reports an error if any of the analyses are misconfigured. +// Checks include: +// - that the name is a valid identifier; +// - that analysis names are unique; +// - that the Requires graph is acylic; +// - that analyses' lemma and output types are unique. +// - that each lemma type is a pointer. +func Validate(analyses []*Analysis) error { + names := make(map[string]bool) + + // Map each lemma/output type to its sole generating analysis. + lemmaTypes := make(map[reflect.Type]*Analysis) + outputTypes := make(map[reflect.Type]*Analysis) + + // Traverse the Requires graph, depth first. + color := make(map[*Analysis]uint8) // 0=white 1=grey 2=black + var visit func(a *Analysis) error + visit = func(a *Analysis) error { + if a == nil { + return fmt.Errorf("nil *Analysis") + } + if color[a] == 0 { // white + color[a] = 1 // grey + + // names + if !validIdent(a.Name) { + return fmt.Errorf("invalid analysis name %q", a) + } + if names[a.Name] { + return fmt.Errorf("duplicate analysis name %q", a) + } + names[a.Name] = true + + if a.Doc == "" { + return fmt.Errorf("analysis %q is undocumented", a) + } + + // lemma types + for _, t := range a.LemmaTypes { + if t == nil { + return fmt.Errorf("analysis %s has nil LemmaType", a) + } + if prev := lemmaTypes[t]; prev != nil { + return fmt.Errorf("lemma type %s registered by two analyses: %v, %v", + t, a, prev) + } + if t.Kind() != reflect.Ptr { + return fmt.Errorf("%s: lemma type %s is not a pointer", a, t) + } + lemmaTypes[t] = a + } + + // output types + if a.OutputType != nil { + if prev := outputTypes[a.OutputType]; prev != nil { + return fmt.Errorf("output type %s registered by two analyses: %v, %v", + a.OutputType, a, prev) + } + outputTypes[a.OutputType] = a + } + + // recursion + for i, req := range a.Requires { + if err := visit(req); err != nil { + return fmt.Errorf("%s.Requires[%d]: %v", a.Name, i, err) + } + } + color[a] = 2 // black + } + + return nil + } + for _, a := range analyses { + if err := visit(a); err != nil { + return err + } + } + + return nil +} + +func validIdent(name string) bool { + for i, r := range name { + if !(r == '_' || unicode.IsLetter(r) || i > 0 && unicode.IsDigit(r)) { + return false + } + } + return name != "" +}