From e5d4b58ff5020f74d6b9085bc1db45e898c4724f Mon Sep 17 00:00:00 2001 From: Alan Donovan Date: Sat, 3 Nov 2018 16:24:43 -0400 Subject: [PATCH] go/analysis/passes/cgocall: analyze raw cgo files (again) The cgocall analyzer was originally written in vet to analyze "raw" cgo files, which are type-checked on a best-effort basis. However, the go/analysis API presents analyzers with "cooked" cgo files, which are well-typed legal Go but obscure the relationship between a call C.f(...) and its arguments. Prior to this CL, cgocall attempted to "uncook" the file, which was as predictably fragile as it sounds, and it rapidly broke as the cgo recipe evolved. This change causes cgocall to parse, modify, type-check and analyze "raw" cgo files. The approach (based on dot-importing the "cooked" package) is rather too clever but should be more robust than the one it replaces. Fixes golang/go#28566 Change-Id: I3092a313c64d27153eaaa115fe8635abfed17023 Reviewed-on: https://go-review.googlesource.com/c/147317 Reviewed-by: Michael Matloob --- go/analysis/passes/cgocall/cgocall.go | 300 ++++++++++++++---- go/analysis/passes/cgocall/cgocall_test.go | 7 +- .../passes/cgocall/testdata/src/a/cgo.go | 12 + .../passes/cgocall/testdata/src/a/cgo3.go | 11 + .../passes/cgocall/testdata/src/a/cgo4.go | 15 - .../passes/cgocall/testdata/src/b/b.go | 11 +- .../testdata/src/{a/cgo2.go => c/c.go} | 2 +- 7 files changed, 265 insertions(+), 93 deletions(-) delete mode 100644 go/analysis/passes/cgocall/testdata/src/a/cgo4.go rename go/analysis/passes/cgocall/testdata/src/{a/cgo2.go => c/c.go} (97%) diff --git a/go/analysis/passes/cgocall/cgocall.go b/go/analysis/passes/cgocall/cgocall.go index 7eb24a4a..e72d64f9 100644 --- a/go/analysis/passes/cgocall/cgocall.go +++ b/go/analysis/passes/cgocall/cgocall.go @@ -9,18 +9,22 @@ package cgocall import ( "fmt" "go/ast" + "go/build" + "go/format" + "go/parser" "go/token" "go/types" "log" - "strings" + "os" + "strconv" "golang.org/x/tools/go/analysis" - "golang.org/x/tools/go/analysis/passes/inspect" "golang.org/x/tools/go/analysis/passes/internal/analysisutil" - "golang.org/x/tools/go/ast/inspector" ) -const Doc = `detect some violations of the cgo pointer passing rules +const debug = false + +const doc = `detect some violations of the cgo pointer passing rules Check for invalid cgo pointer passing. This looks for code that uses cgo to call C code passing values @@ -31,118 +35,253 @@ or slice to C, either directly, or via a pointer, array, or struct.` var Analyzer = &analysis.Analyzer{ Name: "cgocall", - Doc: Doc, - Requires: []*analysis.Analyzer{inspect.Analyzer}, + Doc: doc, RunDespiteErrors: true, Run: run, } func run(pass *analysis.Pass) (interface{}, error) { - inspect := pass.ResultOf[inspect.Analyzer].(*inspector.Inspector) - - nodeFilter := []ast.Node{ - (*ast.CallExpr)(nil), + if imports(pass.Pkg, "runtime/cgo") == nil { + return nil, nil // doesn't use cgo } - inspect.WithStack(nodeFilter, func(n ast.Node, push bool, stack []ast.Node) bool { - if !push { + + cgofiles, info, err := typeCheckCgoSourceFiles(pass.Fset, pass.Pkg, pass.Files, pass.TypesInfo) + if err != nil { + return nil, err + } + for _, f := range cgofiles { + checkCgo(pass.Fset, f, info, pass.Reportf) + } + return nil, nil +} + +func checkCgo(fset *token.FileSet, f *ast.File, info *types.Info, reportf func(token.Pos, string, ...interface{})) { + ast.Inspect(f, func(n ast.Node) bool { + call, ok := n.(*ast.CallExpr) + if !ok { return true } - call, name := findCall(pass.Fset, stack) - if call == nil { + + // Is this a C.f() call? + var name string + if sel, ok := analysisutil.Unparen(call.Fun).(*ast.SelectorExpr); ok { + if id, ok := sel.X.(*ast.Ident); ok && id.Name == "C" { + name = sel.Sel.Name + } + } + if name == "" { return true // not a call we need to check } - // A call to C.CBytes passes a pointer but is always safe. + // A call to C.Bytes passes a pointer but is always safe. if name == "CBytes" { return true } - if false { - fmt.Printf("%s: inner call to C.%s\n", pass.Fset.Position(n.Pos()), name) - fmt.Printf("%s: outer call to C.%s\n", pass.Fset.Position(call.Lparen), name) + if debug { + log.Printf("%s: call to C.%s", fset.Position(call.Lparen), name) } for _, arg := range call.Args { - if !typeOKForCgoCall(cgoBaseType(pass.TypesInfo, arg), make(map[types.Type]bool)) { - pass.Reportf(arg.Pos(), "possibly passing Go type with embedded pointer to C") + if !typeOKForCgoCall(cgoBaseType(info, arg), make(map[types.Type]bool)) { + reportf(arg.Pos(), "possibly passing Go type with embedded pointer to C") break } // Check for passing the address of a bad type. if conv, ok := arg.(*ast.CallExpr); ok && len(conv.Args) == 1 && - isUnsafePointer(pass.TypesInfo, conv.Fun) { + isUnsafePointer(info, conv.Fun) { arg = conv.Args[0] } if u, ok := arg.(*ast.UnaryExpr); ok && u.Op == token.AND { - if !typeOKForCgoCall(cgoBaseType(pass.TypesInfo, u.X), make(map[types.Type]bool)) { - pass.Reportf(arg.Pos(), "possibly passing Go type with embedded pointer to C") + if !typeOKForCgoCall(cgoBaseType(info, u.X), make(map[types.Type]bool)) { + reportf(arg.Pos(), "possibly passing Go type with embedded pointer to C") break } } } return true }) - return nil, nil } -// findCall returns the CallExpr that we need to check, which may not be -// the same as the one we're currently visiting, due to code generation. -// It also returns the name of the function, such as "f" for C.f(...). +// typeCheckCgoSourceFiles returns type-checked syntax trees for the raw +// cgo files of a package (those that import "C"). Such files are not +// Go, so there may be gaps in type information around C.f references. // -// This checker was initially written in vet to inpect unprocessed cgo -// source files using partial type information. However, Analyzers in -// the new analysis API are presented with the type-checked, processed -// Go ASTs resulting from cgo processing files, so we must choose -// between: +// This checker was initially written in vet to inpect raw cgo source +// files using partial type information. However, Analyzers in the new +// analysis API are presented with the type-checked, "cooked" Go ASTs +// resulting from cgo-processing files, so we must choose between +// working with the cooked file generated by cgo (which was tried but +// proved fragile) or locating the raw cgo file (e.g. from //line +// directives) and working with that, as we now do. // -// a) locating the cgo file (e.g. from //line directives) -// and working with that, or -// b) working with the file generated by cgo. +// Specifically, we must type-check the raw cgo source files (or at +// least the subtrees needed for this analyzer) in an environment that +// simulates the rest of the already type-checked package. // -// We cannot use (a) because it does not provide type information, which -// the analyzer needs, and it is infeasible for the analyzer to run the -// type checker on this file. Thus we choose (b), which is fragile, -// because the checker may need to change each time the cgo processor -// changes. +// For example, for each raw cgo source file in the original package, +// such as this one: // -// Consider a cgo source file containing this header: +// package p +// import "C" +// import "fmt" +// type T int +// const k = 3 +// var x, y = fmt.Println() +// func f() { ... } +// func g() { ... C.malloc(k) ... } +// func (T) f(int) string { ... } // -// /* void f(void *x, *y); */ -// import "C" +// we synthesize a new ast.File, shown below, that dot-imports the +// orginal "cooked" package using a special name ("·this·"), so that all +// references to package members resolve correctly. (References to +// unexported names cause an "unexported" error, which we ignore.) // -// The cgo tool expands a call such as: +// To avoid shadowing names imported from the cooked package, +// package-level declarations in the new source file are modified so +// that they do not declare any names. +// (The cgocall analysis is concerned with uses, not declarations.) +// Specifically, type declarations are discarded; +// all names in each var and const declaration are blanked out; +// each method is turned into a regular function by turning +// the receiver into the first parameter; +// and all functions are renamed to "_". // -// C.f(x, y) +// package p +// import . "·this·" // declares T, k, x, y, f, g, T.f +// import "C" +// import "fmt" +// const _ = 3 +// var _, _ = fmt.Println() +// func _() { ... } +// func _() { ... C.malloc(k) ... } +// func _(T, int) string { ... } // -// to this: +// In this way, the raw function bodies and const/var initializer +// expressions are preserved but refer to the "cooked" objects imported +// from "·this·", and none of the transformed package-level declarations +// actually declares anything. In the example above, the reference to k +// in the argument of the call to C.malloc resolves to "·this·".k, which +// has an accurate type. // -// 1 func(param0, param1 unsafe.Pointer) { -// 2 ... various checks on params ... -// 3 (_Cfunc_f)(param0, param1) -// 4 }(x, y) +// This approach could in principle be generalized to more complex +// analyses on raw cgo files. One could synthesize a "C" package so that +// C.f would resolve to "·this·"._C_func_f, for example. But we have +// limited ourselves here to preserving function bodies and initializer +// expressions since that is all that the cgocall analyzer needs. // -// We first locate the _Cfunc_f call on line 3, then -// walk up the stack of enclosing nodes until we find -// the call on line 4. -// -func findCall(fset *token.FileSet, stack []ast.Node) (*ast.CallExpr, string) { - last := len(stack) - 1 - call := stack[last].(*ast.CallExpr) - if id, ok := analysisutil.Unparen(call.Fun).(*ast.Ident); ok { - if name := strings.TrimPrefix(id.Name, "_Cfunc_"); name != id.Name { - // Find the outer call with the arguments (x, y) we want to check. - for i := last - 1; i >= 0; i-- { - if outer, ok := stack[i].(*ast.CallExpr); ok { - return outer, name +func typeCheckCgoSourceFiles(fset *token.FileSet, pkg *types.Package, files []*ast.File, info *types.Info) ([]*ast.File, *types.Info, error) { + const thispkg = "·this·" + + // Which files are cgo files? + var cgoFiles []*ast.File + importMap := map[string]*types.Package{thispkg: pkg} + for _, raw := range files { + // If f is a cgo-generated file, Position reports + // the original file, honoring //line directives. + filename := fset.Position(raw.Pos()).Filename + f, err := parser.ParseFile(fset, filename, nil, parser.Mode(0)) + if err != nil { + return nil, nil, fmt.Errorf("can't parse raw cgo file: %v", err) + } + found := false + for _, spec := range f.Imports { + if spec.Path.Value == `"C"` { + found = true + break + } + } + if !found { + continue // not a cgo file + } + + // Record the original import map. + for _, spec := range raw.Imports { + path, _ := strconv.Unquote(spec.Path.Value) + importMap[path] = imported(info, spec) + } + + // Add special dot-import declaration: + // import . "·this·" + var decls []ast.Decl + decls = append(decls, &ast.GenDecl{ + Tok: token.IMPORT, + Specs: []ast.Spec{ + &ast.ImportSpec{ + Name: &ast.Ident{Name: "."}, + Path: &ast.BasicLit{ + Kind: token.STRING, + Value: strconv.Quote(thispkg), + }, + }, + }, + }) + + // Transform declarations from the raw cgo file. + for _, decl := range f.Decls { + switch decl := decl.(type) { + case *ast.GenDecl: + switch decl.Tok { + case token.TYPE: + // Discard type declarations. + continue + case token.IMPORT: + // Keep imports. + case token.VAR, token.CONST: + // Blank the declared var/const names. + for _, spec := range decl.Specs { + spec := spec.(*ast.ValueSpec) + for i := range spec.Names { + spec.Names[i].Name = "_" + } + } + } + case *ast.FuncDecl: + // Blank the declared func name. + decl.Name.Name = "_" + + // Turn a method receiver: func (T) f(P) R {...} + // into regular parameter: func _(T, P) R {...} + if decl.Recv != nil { + var params []*ast.Field + params = append(params, decl.Recv.List...) + params = append(params, decl.Type.Params.List...) + decl.Type.Params.List = params + decl.Recv = nil } } - // This shouldn't happen. - // Perhaps the code generator has changed? - log.Printf("%s: can't find outer call for C.%s(...)", - fset.Position(call.Lparen), name) + decls = append(decls, decl) } + f.Decls = decls + if debug { + format.Node(os.Stderr, fset, f) // debugging + } + cgoFiles = append(cgoFiles, f) } - return nil, "" + if cgoFiles == nil { + return nil, nil, nil // nothing to do (can't happen?) + } + + // Type-check the synthetic files. + tc := &types.Config{ + FakeImportC: true, + Importer: importerFunc(func(path string) (*types.Package, error) { + return importMap[path], nil + }), + // TODO(adonovan): Sizes should probably be provided by analysis.Pass. + Sizes: types.SizesFor("gc", build.Default.GOARCH), + Error: func(error) {}, // ignore errors (e.g. unused import) + } + + // It's tempting to record the new types in the + // existing pass.TypesInfo, but we don't own it. + altInfo := &types.Info{ + Types: make(map[ast.Expr]types.TypeAndValue), + } + tc.Check(pkg.Path(), fset, cgoFiles, altInfo) + + return cgoFiles, altInfo, nil } // cgoBaseType tries to look through type conversions involving @@ -224,3 +363,28 @@ func isUnsafePointer(info *types.Info, e ast.Expr) bool { t := info.Types[e].Type return t != nil && t.Underlying() == types.Typ[types.UnsafePointer] } + +type importerFunc func(path string) (*types.Package, error) + +func (f importerFunc) Import(path string) (*types.Package, error) { return f(path) } + +// TODO(adonovan): make this a library function or method of Info. +func imported(info *types.Info, spec *ast.ImportSpec) *types.Package { + obj, ok := info.Implicits[spec] + if !ok { + obj = info.Defs[spec.Name] // renaming import + } + return obj.(*types.PkgName).Imported() +} + +// imports reports whether pkg has path among its direct imports. +// It returns the imported package if so, or nil if not. +// TODO(adonovan): move to analysisutil. +func imports(pkg *types.Package, path string) *types.Package { + for _, imp := range pkg.Imports() { + if imp.Path() == path { + return imp + } + } + return nil +} diff --git a/go/analysis/passes/cgocall/cgocall_test.go b/go/analysis/passes/cgocall/cgocall_test.go index 4dd8bf23..ba654261 100644 --- a/go/analysis/passes/cgocall/cgocall_test.go +++ b/go/analysis/passes/cgocall/cgocall_test.go @@ -2,11 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// TODO(adonovan): the findCall function is fragile and no longer works -// at tip. Fix the bug and re-enable the test. - -// +build !go1.12 - package cgocall_test import ( @@ -18,5 +13,5 @@ import ( func Test(t *testing.T) { testdata := analysistest.TestData() - analysistest.Run(t, testdata, cgocall.Analyzer, "a", "b") + analysistest.Run(t, testdata, cgocall.Analyzer, "a", "b", "c") } diff --git a/go/analysis/passes/cgocall/testdata/src/a/cgo.go b/go/analysis/passes/cgocall/testdata/src/a/cgo.go index 39ad897e..75464b29 100644 --- a/go/analysis/passes/cgocall/testdata/src/a/cgo.go +++ b/go/analysis/passes/cgocall/testdata/src/a/cgo.go @@ -36,6 +36,10 @@ func CgoTests() { C.f(*(*unsafe.Pointer)(unsafe.Pointer(&st))) // want "embedded pointer" C.f(unsafe.Pointer(&st)) // want "embedded pointer" + var st3 S + C.f(*(*unsafe.Pointer)(unsafe.Pointer(&st3))) // want "embedded pointer" + C.f(unsafe.Pointer(&st3)) // want "embedded pointer" + // The following cases are OK. var i int C.f(*(*unsafe.Pointer)(unsafe.Pointer(&i))) @@ -52,8 +56,16 @@ func CgoTests() { C.f(*(*unsafe.Pointer)(unsafe.Pointer(&st2))) C.f(unsafe.Pointer(&st2)) + var st4 S2 + C.f(*(*unsafe.Pointer)(unsafe.Pointer(&st4))) + C.f(unsafe.Pointer(&st4)) + type cgoStruct struct{ p *cgoStruct } C.f(unsafe.Pointer(&cgoStruct{})) C.CBytes([]byte("hello")) } + +type S struct{ slice []int } + +type S2 struct{ int int } diff --git a/go/analysis/passes/cgocall/testdata/src/a/cgo3.go b/go/analysis/passes/cgocall/testdata/src/a/cgo3.go index fd095166..4873db02 100644 --- a/go/analysis/passes/cgocall/testdata/src/a/cgo3.go +++ b/go/analysis/passes/cgocall/testdata/src/a/cgo3.go @@ -4,7 +4,18 @@ package a +// The purpose of this inherited test is unclear. + import "C" +const x = 1 + +var a, b = 1, 2 + func F() { } + +func FAD(int, string) bool { + C.malloc(3) + return true +} diff --git a/go/analysis/passes/cgocall/testdata/src/a/cgo4.go b/go/analysis/passes/cgocall/testdata/src/a/cgo4.go deleted file mode 100644 index fc1d1e5f..00000000 --- a/go/analysis/passes/cgocall/testdata/src/a/cgo4.go +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Test the cgo checker on a file that doesn't use cgo, but has an -// import named "C". - -package a - -import C "fmt" - -var _ = C.Println(*p(**p)) - -// Passing a pointer (via a slice), but C is fmt, not cgo. -var _ = C.Println([]int{3}) diff --git a/go/analysis/passes/cgocall/testdata/src/b/b.go b/go/analysis/passes/cgocall/testdata/src/b/b.go index 342b14be..ff2f2e19 100644 --- a/go/analysis/passes/cgocall/testdata/src/b/b.go +++ b/go/analysis/passes/cgocall/testdata/src/b/b.go @@ -9,7 +9,12 @@ package b import C "fmt" -var _ = C.Println(*p(**p)) +import "unsafe" -// Passing a pointer (via a slice), but C is fmt, not cgo. -var _ = C.Println([]int{3}) +func init() { + var f func() + C.Println(unsafe.Pointer(&f)) + + // Passing a pointer (via a slice), but C is fmt, not cgo. + C.Println([]int{3}) +} diff --git a/go/analysis/passes/cgocall/testdata/src/a/cgo2.go b/go/analysis/passes/cgocall/testdata/src/c/c.go similarity index 97% rename from go/analysis/passes/cgocall/testdata/src/a/cgo2.go rename to go/analysis/passes/cgocall/testdata/src/c/c.go index a624afb9..256e3dfb 100644 --- a/go/analysis/passes/cgocall/testdata/src/a/cgo2.go +++ b/go/analysis/passes/cgocall/testdata/src/c/c.go @@ -4,7 +4,7 @@ // Test the cgo checker on a file that doesn't use cgo. -package a +package c import "unsafe"