diff --git a/pointer/TODO b/pointer/TODO index d288c48f..eb0ddd73 100644 --- a/pointer/TODO +++ b/pointer/TODO @@ -34,6 +34,5 @@ API: Think about them sooner rather than later. MISC: -- os.Args should point to something; currently they don't. - Test on all platforms. Currently we assume these go/build tags: linux, amd64, !cgo. diff --git a/pointer/analysis.go b/pointer/analysis.go index 7d14e814..db04ef86 100644 --- a/pointer/analysis.go +++ b/pointer/analysis.go @@ -39,17 +39,18 @@ type object struct { // allocation. Zero for all other nodes. size uint32 - // The SSA operation that caused this object to be allocated. - // May be nil for (e.g.) intrinsic allocations. - val ssa.Value + // data describes this object; it has one of these types: + // + // ssa.Value for an object allocated by an SSA operation. + // types.Type for an rtype instance object or *rtype-tagged object. + // string for an instrinsic object, e.g. the array behind os.Args. + // nil for an object allocated by an instrinsic. + // (cgn provides the identity of the intrinsic.) + data interface{} // The call-graph node (=context) in which this object was allocated. // May be nil for global objects: Global, Const, some Functions. cgn *cgnode - - // If this is an rtype instance object, or a *rtype-tagged - // object, this is its type. - rtype types.Type } // nodeid denotes a node. diff --git a/pointer/doc.go b/pointer/doc.go index 760961cb..037b4a70 100644 --- a/pointer/doc.go +++ b/pointer/doc.go @@ -24,6 +24,36 @@ optimisatisions such as Hybrid- and Lazy- Cycle Detection from (Hardekopf & Lin, PLDI'07), +CLASSIFICATION + +Our algorithm is INCLUSION-BASED: the points-to sets for x and y will +be related by pts(y) ⊇ pts(x) if the program contains the statement +y = x. + +It is FLOW-INSENSITIVE: it ignores all control flow constructs and the +order of statements in a program. It is therefore a "MAY ALIAS" +analysis: its facts are of the form "P may/may not point to L", +not "P must point to L". + +It is FIELD-SENSITIVE: it builds separate points-to sets for distinct +fields, such as x and y in struct { x, y *int }. + +It is mostly CONTEXT-INSENSITIVE: most functions are analyzed once, +so values can flow in at one call to the function and return out at +another. Only some smaller functions are analyzed with consideration +to their calling context. + +It has a CONTEXT-SENSITIVE HEAP: objects are named by both allocation +site and context, so the objects returned by two distinct calls to f: + func f() *T { return new(T) } +are distinguished up to the limits of the calling context. + +It is a WHOLE PROGRAM analysis: it requires SSA-form IR for the +complete Go program and summaries for native code. + +See the (Hind, PASTE'01) survey paper for an explanation of these terms. + + TERMINOLOGY We occasionally use C's x->f notation to distinguish the case where x diff --git a/pointer/gen.go b/pointer/gen.go index 8dd52187..b4c19e24 100644 --- a/pointer/gen.go +++ b/pointer/gen.go @@ -95,9 +95,9 @@ func (a *analysis) setValueNode(v ssa.Value, id nodeid, cgn *cgnode) { // a single object allocation. // // obj is the start node of the object, from a prior call to nextNode. -// Its size, flags and (optionally) data will be updated. +// Its size, flags and optional data will be updated. // -func (a *analysis) endObject(obj nodeid, cgn *cgnode, val ssa.Value) *object { +func (a *analysis) endObject(obj nodeid, cgn *cgnode, data interface{}) *object { // Ensure object is non-empty by padding; // the pad will be the object node. size := uint32(a.nextNode() - obj) @@ -108,12 +108,9 @@ func (a *analysis) endObject(obj nodeid, cgn *cgnode, val ssa.Value) *object { o := &object{ size: size, // excludes padding cgn: cgn, - val: val, + data: data, } objNode.obj = o - if val != nil && a.log != nil { - fmt.Fprintf(a.log, "\tobj[%s] = n%d\n", val, obj) - } return o } @@ -150,10 +147,10 @@ func (a *analysis) makeFunctionObject(fn *ssa.Function) nodeid { } // makeTagged creates a tagged object of type typ. -func (a *analysis) makeTagged(typ types.Type, cgn *cgnode, val ssa.Value) nodeid { +func (a *analysis) makeTagged(typ types.Type, cgn *cgnode, data interface{}) nodeid { obj := a.addOneNode(typ, "tagged.T", nil) // NB: type may be non-scalar! a.addNodes(typ, "tagged.v") - a.endObject(obj, cgn, val).flags |= otTagged + a.endObject(obj, cgn, data).flags |= otTagged return obj } @@ -168,10 +165,9 @@ func (a *analysis) makeRtype(T types.Type) nodeid { // ordinarily a large struct but here a single node will do. obj := a.nextNode() a.addOneNode(T, "reflect.rtype", nil) - a.endObject(obj, nil, nil).rtype = T + a.endObject(obj, nil, T) - id := a.makeTagged(a.reflectRtypePtr, nil, nil) - a.nodes[id].obj.rtype = T + id := a.makeTagged(a.reflectRtypePtr, nil, T) a.nodes[id+1].typ = T // trick (each *rtype tagged object is a singleton) a.addressOf(id+1, obj) @@ -809,6 +805,10 @@ func (a *analysis) objectNode(cgn *cgnode, v ssa.Value) nodeid { // For now, Captures have the same cardinality as globals. // TODO(adonovan): treat captures context-sensitively. } + + if a.log != nil { + fmt.Fprintf(a.log, "\tglobalobj[%s] = n%d\n", a.nodes[obj].obj, obj) + } a.globalobj[v] = obj } return obj @@ -874,6 +874,10 @@ func (a *analysis) objectNode(cgn *cgnode, v ssa.Value) nodeid { // - unsafe.Pointer->*T conversion acts like Alloc // - string->[]byte/[]rune conversion acts like MakeSlice } + + if a.log != nil { + fmt.Fprintf(a.log, "\tlocalobj[%s] = n%d\n", a.nodes[obj].obj, obj) + } a.localobj[v] = obj } return obj @@ -1222,5 +1226,13 @@ func (a *analysis) generate() *cgnode { a.genFunc(cgn) } + // The runtime magically allocates os.Args; so should we. + if os := a.prog.ImportedPackage("os"); os != nil { + // In effect: os.Args = new([1]string)[:] + obj := a.addNodes(types.NewArray(types.Typ[types.String], 1), "") + a.endObject(obj, nil, "") + a.addressOf(a.objectNode(nil, os.Var("Args")), obj) + } + return root } diff --git a/pointer/labels.go b/pointer/labels.go index 215d24c9..d805b9cd 100644 --- a/pointer/labels.go +++ b/pointer/labels.go @@ -25,6 +25,7 @@ import ( // - stack- and heap-allocated variables (including composite literals) // - channels, maps and arrays created by make() // - instrinsic or reflective operations that allocate (e.g. append, reflect.New) +// - instrinsic objects, e.g. the initial array behind os.Args. // - and their subelements, e.g. "alloc.y[*].z" // // Labels are so varied that they defy good generalizations; @@ -32,16 +33,25 @@ import ( // Many objects have types that are inexpressible in Go: // maps, channels, functions, tagged objects. // +// At most one of Value() or ReflectType() may return non-nil. +// type Label struct { obj *object // the addressable memory location containing this label subelement *fieldInfo // subelement path within obj, e.g. ".a.b[*].c" } -// Value returns the ssa.Value that allocated this label's object, -// or nil if it was allocated by an intrinsic. -// +// Value returns the ssa.Value that allocated this label's object, if any. func (l Label) Value() ssa.Value { - return l.obj.val + val, _ := l.obj.data.(ssa.Value) + return val +} + +// ReflectType returns the type represented by this label if it is an +// reflect.rtype instance object or *reflect.rtype-tagged object. +// +func (l Label) ReflectType() types.Type { + rtype, _ := l.obj.data.(types.Type) + return rtype } // Context returns the analytic context in which this label's object was allocated, @@ -60,11 +70,11 @@ func (l Label) Path() string { // Pos returns the position of this label, if known, zero otherwise. func (l Label) Pos() token.Pos { - if v := l.Value(); v != nil { - return v.Pos() - } - if l.obj.rtype != nil { - if nt, ok := deref(l.obj.rtype).(*types.Named); ok { + switch data := l.obj.data.(type) { + case ssa.Value: + return data.Pos() + case types.Type: + if nt, ok := deref(data).(*types.Named); ok { return nt.Obj().Pos() } } @@ -84,6 +94,7 @@ func (l Label) Pos() token.Pos { // makeinterface (tagged object allocated by makeinterface) // (allocation in instrinsic) // sync.Mutex (a reflect.rtype instance) +// (an instrinsic object) // // Labels within compound objects have subelement paths: // x.y[*].z (a struct variable, x) @@ -92,18 +103,25 @@ func (l Label) Pos() token.Pos { // func (l Label) String() string { var s string - switch v := l.obj.val.(type) { + switch v := l.obj.data.(type) { + case types.Type: + return v.String() + + case string: + s = v // an intrinsic object (e.g. os.Args[*]) + case nil: - if l.obj.rtype != nil { - return l.obj.rtype.String() - } if l.obj.cgn != nil { // allocation by intrinsic or reflective operation - return fmt.Sprintf("", l.obj.cgn.Func()) + s = fmt.Sprintf("", l.obj.cgn.Func()) + } else { + s = "" // should be unreachable } - return "" // should be unreachable - case *ssa.Function, *ssa.Global: + case *ssa.Function: + s = v.String() + + case *ssa.Global: s = v.String() case *ssa.Const: @@ -132,7 +150,7 @@ func (l Label) String() string { s = "makeinterface:" + v.X.Type().String() default: - panic(fmt.Sprintf("unhandled Label.val type: %T", v)) + panic(fmt.Sprintf("unhandled object data type: %T", v)) } return s + l.subelement.path() diff --git a/pointer/reflect.go b/pointer/reflect.go index 5b72fe29..591bc613 100644 --- a/pointer/reflect.go +++ b/pointer/reflect.go @@ -751,7 +751,7 @@ func (c *rtypeElemConstraint) solve(a *analysis, _ *node, delta nodeset) { } changed := false for tObj := range delta { - T := a.nodes[tObj].obj.rtype + T := a.nodes[tObj].obj.data.(types.Type) if tHasElem, ok := T.Underlying().(hasElem); ok { if a.addLabel(c.result, a.makeRtype(tHasElem.Elem())) { changed = true @@ -797,7 +797,7 @@ func (c *rtypeInOutConstraint) ptr() nodeid { func (c *rtypeInOutConstraint) solve(a *analysis, _ *node, delta nodeset) { changed := false for tObj := range delta { - T := a.nodes[tObj].obj.rtype + T := a.nodes[tObj].obj.data.(types.Type) sig, ok := T.Underlying().(*types.Signature) if !ok { continue // not a func type @@ -861,7 +861,7 @@ func (c *rtypeKeyConstraint) ptr() nodeid { func (c *rtypeKeyConstraint) solve(a *analysis, _ *node, delta nodeset) { changed := false for tObj := range delta { - T := a.nodes[tObj].obj.rtype + T := a.nodes[tObj].obj.data.(types.Type) if tMap, ok := T.Underlying().(*types.Map); ok { if a.addLabel(c.result, a.makeRtype(tMap.Key())) { changed = true diff --git a/pointer/testdata/hello.go b/pointer/testdata/hello.go index 0cb08ff9..8e636888 100644 --- a/pointer/testdata/hello.go +++ b/pointer/testdata/hello.go @@ -2,7 +2,10 @@ package main -import "fmt" +import ( + "fmt" + "os" +) type S int @@ -14,6 +17,7 @@ func (s *S) String() string { } func main() { + print(os.Args) // @pointsto fmt.Println("Hello, World!", &theS) }