From 6643abb26c4eebe7b5608a0066b7c1d4e3841512 Mon Sep 17 00:00:00 2001 From: Alan Donovan Date: Thu, 22 Aug 2013 12:27:55 -0400 Subject: [PATCH] go.tools/pointer: inclusion-based pointer analysis for Go. Suggested reading order: - doc.go - api.go, analysis.go, callgraph.go, labels.go - print.go, util.go - gen.go - solve.go - pointer_test.go, testdata/* - intrinsics.go (none are implemented yet) R=dannyb, gri, crawshaw, 0xjnml CC=golang-dev https://golang.org/cl/10618043 --- pointer/TODO | 31 + pointer/analysis.go | 255 +++++++ pointer/api.go | 232 +++++++ pointer/callgraph.go | 114 +++ pointer/doc.go | 391 +++++++++++ pointer/gen.go | 1069 +++++++++++++++++++++++++++++ pointer/intrinsics.go | 276 ++++++++ pointer/labels.go | 67 ++ pointer/pointer_test.go | 567 +++++++++++++++ pointer/print.go | 35 + pointer/solve.go | 262 +++++++ pointer/testdata/a_test.go | 42 ++ pointer/testdata/another.go | 35 + pointer/testdata/arrayreflect.go | 108 +++ pointer/testdata/arrays.go | 97 +++ pointer/testdata/channels.go | 91 +++ pointer/testdata/chanreflect.go | 75 ++ pointer/testdata/chanreflect1.go | 35 + pointer/testdata/context.go | 48 ++ pointer/testdata/conv.go | 61 ++ pointer/testdata/finalizer.go | 70 ++ pointer/testdata/flow.go | 63 ++ pointer/testdata/fmtexcerpt.go | 42 ++ pointer/testdata/func.go | 165 +++++ pointer/testdata/funcreflect.go | 30 + pointer/testdata/hello.go | 21 + pointer/testdata/interfaces.go | 152 ++++ pointer/testdata/mapreflect.go | 52 ++ pointer/testdata/maps.go | 51 ++ pointer/testdata/panic.go | 36 + pointer/testdata/recur.go | 11 + pointer/testdata/reflect.go | 69 ++ pointer/testdata/structreflect.go | 25 + pointer/testdata/structs.go | 101 +++ pointer/util.go | 289 ++++++++ 35 files changed, 5068 insertions(+) create mode 100644 pointer/TODO create mode 100644 pointer/analysis.go create mode 100644 pointer/api.go create mode 100644 pointer/callgraph.go create mode 100644 pointer/doc.go create mode 100644 pointer/gen.go create mode 100644 pointer/intrinsics.go create mode 100644 pointer/labels.go create mode 100644 pointer/pointer_test.go create mode 100644 pointer/print.go create mode 100644 pointer/solve.go create mode 100644 pointer/testdata/a_test.go create mode 100644 pointer/testdata/another.go create mode 100644 pointer/testdata/arrayreflect.go create mode 100644 pointer/testdata/arrays.go create mode 100644 pointer/testdata/channels.go create mode 100644 pointer/testdata/chanreflect.go create mode 100644 pointer/testdata/chanreflect1.go create mode 100644 pointer/testdata/context.go create mode 100644 pointer/testdata/conv.go create mode 100644 pointer/testdata/finalizer.go create mode 100644 pointer/testdata/flow.go create mode 100644 pointer/testdata/fmtexcerpt.go create mode 100644 pointer/testdata/func.go create mode 100644 pointer/testdata/funcreflect.go create mode 100644 pointer/testdata/hello.go create mode 100644 pointer/testdata/interfaces.go create mode 100644 pointer/testdata/mapreflect.go create mode 100644 pointer/testdata/maps.go create mode 100644 pointer/testdata/panic.go create mode 100644 pointer/testdata/recur.go create mode 100644 pointer/testdata/reflect.go create mode 100644 pointer/testdata/structreflect.go create mode 100644 pointer/testdata/structs.go create mode 100644 pointer/util.go diff --git a/pointer/TODO b/pointer/TODO new file mode 100644 index 00000000..0c9daecf --- /dev/null +++ b/pointer/TODO @@ -0,0 +1,31 @@ +-*- text -*- + +Pointer analysis to-do list +=========================== + +CONSTRAINT GENERATION: +- reflection intrinsics +- unsafe.Pointer conversions. Three options: + 1) unsoundly (but type-safely) treat p=unsafe.Pointer(x) conversions as + allocations, losing aliases. This is what's currently implemented. + 2) unsoundly (but type-safely) treat p=unsafe.Pointer(x) and T(p) + conversions as interface boxing and unboxing operations. + This may preserve some aliasing relations at little cost. + 3) soundly track physical field offsets. (Summarise dannyb's email here.) + A downside is that we can't keep the identity field of struct + allocations that identifies the object. + +PRESOLVER OPTIMISATIONS +- use HVN, HRU, LE, PE, HCD, LCD. + +SOLVER: +- use BDDs and/or sparse bitvectors for ptsets +- use sparse bitvectors for graph edges +- use BDD-based relational composition for e.g. offset computations. +- experiment with different worklist algorithms: + priority queue (solver visit-time order) + red-black tree (node id order) + double-ended queue (insertion order) + fast doubly-linked list (See Zhanh et al PLDI'13) + (insertion order with fast membership test) + dannyb recommends sparse bitmap. diff --git a/pointer/analysis.go b/pointer/analysis.go new file mode 100644 index 00000000..e00cc9f6 --- /dev/null +++ b/pointer/analysis.go @@ -0,0 +1,255 @@ +package pointer + +// This file defines the entry points into the pointer analysis. + +import ( + "fmt" + "go/token" + "io" + "os" + + "code.google.com/p/go.tools/go/types" + "code.google.com/p/go.tools/ssa" +) + +// nodeid denotes a node. +// It is an index within analysis.nodes. +// We use small integers, not *node pointers, for many reasons: +// - they are smaller on 64-bit systems. +// - sets of them can be represented compactly in bitvectors or BDDs. +// - order matters; a field offset can be computed by simple addition. +type nodeid uint32 + +// node.flags bitmask values. +const ( + ntObject = 1 << iota // start of an object (addressable memory location) + ntInterface // conctype node of interface object (=> ntObject) + ntFunction // identity node of function object (=> ntObject) +) + +// A node is an equivalence class of memory locations. +// Nodes may be pointers, pointed-to locations, neither, or both. +type node struct { + // flags is a bitset of the node type (nt*) flags defined above. + flags uint32 + + // Number of following words belonging to the same "object" allocation. + // (Set by endObject.) Zero for all other nodes. + size uint32 + + // The type of the field denoted by this node. Non-aggregate, + // unless this is an iface.conctype node (i.e. the thing + // pointed to by an interface) in which case typ is that type. + typ types.Type + + // data holds additional attributes of this node, depending on + // its flags. + // + // If ntObject is set, data is the ssa.Value of the + // instruction that allocated this memory, or nil if it was + // implicit. + // + // Special cases: + // - If ntInterface is also set, data will be a *ssa.MakeInterface. + // - If ntFunction is also set, this node is the first word of a + // function block, and data is a *cgnode (not an ssa.Value) + // representing this function. + data interface{} + + // subelement indicates which directly embedded subelement of + // an object of aggregate type (struct, tuple, array) this is. + subelement *fieldInfo // e.g. ".a.b[*].c" + + // Points-to sets. + pts nodeset // points-to set of this node + prevPts nodeset // pts(n) in previous iteration (for difference propagation) + + // Graph edges + copyTo nodeset // simple copy constraint edges + + // Complex constraints attached to this node (x). + // - *loadConstraint y=*x + // - *offsetAddrConstraint y=&x.f or y=&x[0] + // - *storeConstraint *x=z + // - *typeAssertConstraint y=x.(T) + // - *invokeConstraint y=x.f(params...) + complex constraintset +} + +type constraint interface { + String() string + + // Called by solver to prepare a constraint, e.g. to + // - initialize a points-to set (addrConstraint). + // - attach it to a pointer node (complex constraints). + init(a *analysis) + + // solve is called for complex constraints when the pts for + // the node to which they are attached has changed. + solve(a *analysis, n *node, delta nodeset) +} + +// dst = &src +// pts(dst) ⊇ {src} +// A base constraint used to initialize the solver's pt sets +type addrConstraint struct { + dst nodeid + src nodeid +} + +// dst = src +// A simple constraint represented directly as a copyTo graph edge. +type copyConstraint struct { + dst nodeid + src nodeid +} + +// dst = src[offset] +// A complex constraint attached to src (the pointer) +type loadConstraint struct { + offset uint32 + dst nodeid + src nodeid +} + +// dst[offset] = src +// A complex constraint attached to dst (the pointer) +type storeConstraint struct { + offset uint32 + dst nodeid + src nodeid +} + +// dst = &src.f or dst = &src[0] +// A complex constraint attached to dst (the pointer) +type offsetAddrConstraint struct { + offset uint32 + dst nodeid + src nodeid +} + +// dst = src.(typ) +// A complex constraint attached to src (the interface). +type typeAssertConstraint struct { + typ types.Type + dst nodeid + src nodeid +} + +// src.method(params...) +// A complex constraint attached to iface. +type invokeConstraint struct { + method *types.Func // the abstract method + iface nodeid // the interface + params nodeid // the first parameter in the params/results block +} + +// An analysis instance holds the state of a single pointer analysis problem. +type analysis struct { + config *Config // the client's control/observer interface + prog *ssa.Program // the program being analyzed + log io.Writer // log stream; nil to disable + panicNode nodeid // sink for panic, source for recover + nodes []*node // indexed by nodeid + flattenMemo map[types.Type][]*fieldInfo // memoization of flatten() + constraints []constraint // set of constraints + callsites []*callsite // all callsites + genq []*cgnode // queue of functions to generate constraints for + intrinsics map[*ssa.Function]intrinsic // non-nil values are summaries for intrinsic fns + reflectValueObj types.Object // type symbol for reflect.Value (if present) + reflectRtypeObj types.Object // type symbol for reflect.rtype (if present) + reflectRtype *types.Pointer // *reflect.rtype + funcObj map[*ssa.Function]nodeid // default function object for each func + probes map[*ssa.CallCommon]nodeid // maps call to print() to argument variable + valNode map[ssa.Value]nodeid // node for each ssa.Value + work worklist // solver's worklist +} + +func (a *analysis) warnf(pos token.Pos, format string, args ...interface{}) { + if Warn := a.config.Warn; Warn != nil { + Warn(pos, format, args...) + } else { + fmt.Fprintf(os.Stderr, "%s: warning: ", a.prog.Fset.Position(pos)) + fmt.Fprintf(os.Stderr, format, args...) + fmt.Fprintln(os.Stderr) + } +} + +// Analyze runs the pointer analysis with the scope and options +// specified by config, and returns the (synthetic) root of the callgraph. +// +func Analyze(config *Config) CallGraphNode { + a := &analysis{ + config: config, + log: config.Log, + prog: config.prog(), + valNode: make(map[ssa.Value]nodeid), + flattenMemo: make(map[types.Type][]*fieldInfo), + intrinsics: make(map[*ssa.Function]intrinsic), + funcObj: make(map[*ssa.Function]nodeid), + probes: make(map[*ssa.CallCommon]nodeid), + work: makeMapWorklist(), + } + + if reflect := a.prog.PackagesByPath["reflect"]; reflect != nil { + a.reflectValueObj = reflect.Object.Scope().Lookup("Value") + a.reflectRtypeObj = reflect.Object.Scope().Lookup("rtype") + a.reflectRtype = types.NewPointer(a.reflectRtypeObj.Type()) + } + + if false { + a.log = os.Stderr // for debugging crashes; extremely verbose + } + + if a.log != nil { + fmt.Fprintln(a.log, "======== NEW ANALYSIS ========") + } + + root := a.generate() + + // ---------- Presolver ---------- + + // TODO(adonovan): opt: presolver optimisations. + + // ---------- Solver ---------- + + a.solve() + + if a.log != nil { + // Dump solution. + for i, n := range a.nodes { + if n.pts != nil { + fmt.Fprintf(a.log, "pts(n%d) = %s : %s\n", i, n.pts, n.typ) + } + } + } + + // Notify the client of the callsites if they're interested. + if CallSite := a.config.CallSite; CallSite != nil { + for _, site := range a.callsites { + CallSite(site) + } + } + + Call := a.config.Call + for _, site := range a.callsites { + for nid := range a.nodes[site.targets].pts { + cgn := a.nodes[nid].data.(*cgnode) + + // Notify the client of the call graph, if + // they're interested. + if Call != nil { + Call(site, site.caller, cgn) + } + + // Warn about calls to non-intrinsic external functions. + + if fn := cgn.fn; fn.Blocks == nil && a.findIntrinsic(fn) == nil { + a.warnf(site.Pos(), "unsound call to unknown intrinsic: %s", fn) + a.warnf(fn.Pos(), " (declared here)") + } + } + } + + return root +} diff --git a/pointer/api.go b/pointer/api.go new file mode 100644 index 00000000..be594d1f --- /dev/null +++ b/pointer/api.go @@ -0,0 +1,232 @@ +package pointer + +import ( + "fmt" + "go/token" + "io" + + "code.google.com/p/go.tools/go/types/typemap" + "code.google.com/p/go.tools/ssa" +) + +type Config struct { + // -------- Scope of the analysis -------- + + // Clients must provide the analysis with at least one package defining a main() function. + Mains []*ssa.Package // set of 'main' packages to analyze + root *ssa.Function // synthetic analysis root + + // -------- Optional callbacks invoked by the analysis -------- + + // Call is invoked for each discovered call-graph edge. The + // call-graph is a multigraph over CallGraphNodes with edges + // labelled by the CallSite that gives rise to the edge. + // Clients that wish to construct a call graph may provide + // CallGraph.AddEdge here. + // + // The callgraph may be context-sensitive, i.e. it may + // distinguish separate calls to the same function depending + // on the context. + // + Call func(site CallSite, caller, callee CallGraphNode) + + // CallSite is invoked for each call-site encountered in the + // program. + // + // The callgraph may be context-sensitive, i.e. it may + // distinguish separate calls to the same function depending + // on the context. + // + CallSite func(site CallSite) + + // Warn is invoked for each warning encountered by the analysis, + // e.g. unknown external function, unsound use of unsafe.Pointer. + // pos may be zero if the position is not known. + Warn func(pos token.Pos, format string, args ...interface{}) + + // Print is invoked during the analysis for each discovered + // call to the built-in print(x). + // + // Pointer p may be saved until the analysis is complete, at + // which point its methods provide access to the analysis + // (The result of callings its methods within the Print + // callback is undefined.) p is nil if x is non-pointerlike. + // + // TODO(adonovan): this was a stop-gap measure for identifing + // arbitrary expressions of interest in the tests. Now that + // ssa.ValueForExpr exists, we should use that instead. + // + Print func(site *ssa.CallCommon, p Pointer) + + // The client populates QueryValues with {v, nil} for each + // ssa.Value v of interest. The pointer analysis will + // populate the corresponding map value when it creates the + // pointer variable for v. Upon completion the client can + // inspect the map for the results. + // + // If a Value belongs to a function that the analysis treats + // context-sensitively, the corresponding slice may have + // multiple Pointers, one per distinct context. + // Use PointsToCombined to merge them. + // + // TODO(adonovan): separate the keys set (input) from the + // key/value associations (result) and perhaps return the + // latter from Analyze(). + // + QueryValues map[ssa.Value][]Pointer + + // -------- Other configuration options -------- + + // If Log is non-nil, a log messages are written to it. + // Logging is extremely verbose. + Log io.Writer +} + +func (c *Config) prog() *ssa.Program { + for _, main := range c.Mains { + return main.Prog + } + panic("empty scope") +} + +// A Pointer is an equivalence class of pointerlike values. +// +// TODO(adonovan): add a method +// Context() CallGraphNode +// for pointers corresponding to local variables, +// +type Pointer interface { + // PointsTo returns the points-to set of this pointer. + PointsTo() PointsToSet + + // MayAlias reports whether the receiver pointer may alias + // the argument pointer. + MayAlias(Pointer) bool + + String() string +} + +// A PointsToSet is a set of labels (locations or allocations). +// +type PointsToSet interface { + // PointsTo returns the set of labels that this points-to set + // contains. + Labels() []*Label + + // Intersects reports whether this points-to set and the + // argument points-to set contain common members. + Intersects(PointsToSet) bool + + // If this PointsToSet came from a Pointer of interface kind, + // ConcreteTypes returns the set of concrete types the + // interface may contain. + // + // The result is a mapping whose keys are the concrete types + // to which this interface may point. For each pointer-like + // key type, the corresponding map value is a set of pointer + // abstractions of that concrete type, represented as a + // []Pointer slice. Use PointsToCombined to merge them. + ConcreteTypes() *typemap.M +} + +// Union returns the set containing all the elements of each set in sets. +func Union(sets ...PointsToSet) PointsToSet { + var union ptset + for _, set := range sets { + set := set.(ptset) + union.a = set.a + union.pts.addAll(set.pts) + } + return union +} + +// PointsToCombined returns the combined points-to set of all the +// specified pointers. +func PointsToCombined(ptrs []Pointer) PointsToSet { + var ptsets []PointsToSet + for _, ptr := range ptrs { + ptsets = append(ptsets, ptr.PointsTo()) + } + return Union(ptsets...) +} + +// ---- PointsToSet public interface + +type ptset struct { + a *analysis // may be nil if pts is nil + pts nodeset +} + +func (s ptset) Labels() []*Label { + var labels []*Label + for l := range s.pts { + // Scan back to the previous object start. + for i := l; i >= 0; i-- { + n := s.a.nodes[i] + if n.flags&ntObject != 0 { + // TODO(adonovan): do bounds-check against n.size. + var v ssa.Value + if n.flags&ntFunction != 0 { + v = n.data.(*cgnode).fn + } else { + v = n.data.(ssa.Value) + // TODO(adonovan): what if v is nil? + } + labels = append(labels, &Label{ + Value: v, + subelement: s.a.nodes[l].subelement, + }) + break + } + } + } + return labels +} + +func (s ptset) ConcreteTypes() *typemap.M { + var tmap typemap.M // default hasher // TODO(adonovan): opt: memoize per analysis + for ifaceObjId := range s.pts { + if s.a.nodes[ifaceObjId].flags&ntInterface == 0 { + // ConcreteTypes called on non-interface PT set. + continue // shouldn't happen + } + v, tconc := s.a.interfaceValue(ifaceObjId) + prev, _ := tmap.At(tconc).([]Pointer) + tmap.Set(tconc, append(prev, ptr{s.a, v})) + } + return &tmap +} + +func (x ptset) Intersects(y_ PointsToSet) bool { + y := y_.(ptset) + for l := range x.pts { + if _, ok := y.pts[l]; ok { + return true + } + } + return false +} + +// ---- Pointer public interface + +// ptr adapts a node to the Pointer interface. +type ptr struct { + a *analysis + n nodeid // non-zero +} + +func (p ptr) String() string { + return fmt.Sprintf("n%d", p.n) +} + +func (p ptr) PointsTo() PointsToSet { + return ptset{p.a, p.a.nodes[p.n].pts} +} + +func (p ptr) MayAlias(q Pointer) bool { + return p.PointsTo().Intersects(q.PointsTo()) +} + +func (p ptr) ConcreteTypes() *typemap.M { + return p.PointsTo().ConcreteTypes() +} diff --git a/pointer/callgraph.go b/pointer/callgraph.go new file mode 100644 index 00000000..de8489e4 --- /dev/null +++ b/pointer/callgraph.go @@ -0,0 +1,114 @@ +package pointer + +import ( + "fmt" + "go/token" + + "code.google.com/p/go.tools/ssa" +) + +// TODO(adonovan): move the CallGraph, CallGraphNode, CallSite types +// into a separate package 'callgraph', and make them pure interfaces +// capable of supporting several implementations (context-sensitive +// and insensitive PTA, RTA, etc). + +// ---------- CallGraphNode ---------- + +// A CallGraphNode is a context-sensitive representation of a node in +// the callgraph. In other words, there may be multiple nodes +// representing a single *Function, depending on the contexts in which +// it is called. The identity of the node is therefore important. +// +type CallGraphNode interface { + Func() *ssa.Function // the function this node represents + String() string // diagnostic description of this callgraph node +} + +type cgnode struct { + fn *ssa.Function + obj nodeid // start of this contour's object block +} + +func (n *cgnode) Func() *ssa.Function { + return n.fn +} + +func (n *cgnode) String() string { + return fmt.Sprintf("cg%d:%s", n.obj, n.fn) +} + +// ---------- CallSite ---------- + +// A CallSite is a context-sensitive representation of a function call +// site in the program. +// +type CallSite interface { + Caller() CallGraphNode // the enclosing context of this call + Pos() token.Pos // source position; token.NoPos for synthetic calls + Description() string // UI description of call kind; see (*ssa.CallCommon).Description + String() string // diagnostic description of this callsite +} + +// A callsite represents a single function or method callsite within a +// function. callsites never represent calls to built-ins; they are +// handled as intrinsics. +// +type callsite struct { + caller *cgnode // the origin of the call + targets nodeid // pts(targets) contains identities of all called functions. + instr ssa.CallInstruction // optional call instruction; provides IsInvoke, position, etc. + pos token.Pos // position, if instr == nil, i.e. synthetic callsites. +} + +// Caller returns the node in the callgraph from which this call originated. +func (c *callsite) Caller() CallGraphNode { + return c.caller +} + +// Description returns a description of this kind of call, in the +// manner of ssa.CallCommon.Description(). +// +func (c *callsite) Description() string { + if c.instr != nil { + return c.instr.Common().Description() + } + return "synthetic function call" +} + +// Pos returns the source position of this callsite, or token.NoPos if implicit. +func (c *callsite) Pos() token.Pos { + if c.instr != nil { + return c.instr.Pos() + } + return c.pos +} + +func (c *callsite) String() string { + // TODO(adonovan): provide more info, e.g. target of static + // call, arguments, location. + return c.Description() +} + +// ---------- CallGraph ---------- + +// CallGraph is a forward directed graph of functions labelled by an +// arbitrary site within the caller. +// +// CallGraph.AddEdge may be used as the Context.Call callback for +// clients that wish to construct a call graph. +// +// TODO(adonovan): this is just a starting point. Add options to +// control whether we record no callsite, an arbitrary callsite, or +// all callsites for a given graph edge. Also, this could live in +// another package since it's just a client utility. +// +type CallGraph map[CallGraphNode]map[CallGraphNode]CallSite + +func (cg CallGraph) AddEdge(site CallSite, caller, callee CallGraphNode) { + callees := cg[caller] + if callees == nil { + callees = make(map[CallGraphNode]CallSite) + cg[caller] = callees + } + callees[callee] = site // save an arbitrary site +} diff --git a/pointer/doc.go b/pointer/doc.go new file mode 100644 index 00000000..6eabfbed --- /dev/null +++ b/pointer/doc.go @@ -0,0 +1,391 @@ +/* + +Package pointer implements Andersen's analysis, an inclusion-based +pointer analysis algorithm first described in (Andersen, 1994). + +The implementation is similar to that described in (Pearce et al, +PASTE'04). Unlike many algorithms which interleave constraint +generation and solving, constructing the callgraph as they go, this +implementation has a strict phase ordering: generation before solving. +Only simple (copy) constraints may be generated during solving. This +improves the traction of presolver optimisations, but imposes certain +restrictions, e.g. potential context sensitivity is limited since all +variants must be created a priori. + +We intend to add various presolving optimisations such as Pointer and +Location Equivalence from (Hardekopf & Lin, SAS '07) and solver +optimisatisions such as Hybrid- and Lazy- Cycle Detection from +(Hardekopf & Lin, PLDI'07), + + +TERMINOLOGY + +We occasionally use C's x->f notation to distinguish the case where x +is a struct pointer from x.f where is a struct value. + + +NODES + +Nodes are the key datastructure of the analysis, and have a dual role: +they represent both constraint variables (equivalence classes of +pointers) and members of points-to sets (things that can be pointed +at, i.e. "labels"). + +Nodes are naturally numbered. The numbering enables compact +representations of sets of nodes such as bitvectors or BDDs; and the +ordering enables a very cheap way to group related nodes together. +(For example, passing n parameters consists of generating n parallel +constraints from caller+i to callee+i for 0<=if requires a dynamic closure rule to create + simple edges for each struct discovered in pts(x). + + The nodes of a struct consist of a special 'identity' node (whose + type is that of the struct itself), followed by the nodes for all + the struct's fields, recursively flattened out. A pointer to the + struct is a pointer to its identity node. That node allows us to + distinguish a pointer to a struct from a pointer to its first field. + + Field offsets are currently the logical field offsets (plus one for + the identity node), so the sizes of the fields can be ignored by the + analysis. + + Sound treatment of unsafe.Pointer conversions (not yet implemented) + would require us to model memory layout using physical field offsets + to ascertain which object field(s) might be aliased by a given + FieldAddr of a different base pointer type. It would also require + us to dispense with the identity node. + + *ssa.Field y = x.f creates a simple edge to y from x's node at f's offset. + + *ssa.FieldAddr y = &x->f requires a dynamic closure rule to create + simple edges for each struct discovered in pts(x). + +Arrays + We model an array by an identity node (whose type is that of the + array itself) followed by a node representing all the elements of + the array; the analysis does not distinguish elements with different + indices. Effectively, an array is treated like struct{elem T}, a + load y=x[i] like y=x.elem, and a store x[i]=y like x.elem=y; the + index i is ignored. + + A pointer to an array is pointer to its identity node. (A slice is + also a pointer to an array's identity node.) The identity node + allows us to distinguish a pointer to an array from a pointer to one + of its elements, but it is rather costly because it introduces more + offset constraints into the system. Furthermore, sound treatment of + unsafe.Pointer would require us to dispense with this node. + + Arrays may be allocated by Alloc, by make([]T), by calls to append, + and via reflection. + +Tuples (T, ...) + Tuples are treated like structs with naturally numbered fields. + *ssa.Extract is analogous to *ssa.Field. + + However, tuples have no identity field since by construction, they + cannot be address-taken. + + +FUNCTION CALLS + + There are three kinds of function call: + (1) static "call"-mode calls of functions. + (2) dynamic "call"-mode calls of functions. + (3) dynamic "invoke"-mode calls of interface methods. + Cases 1 and 2 apply equally to methods and standalone functions. + + Static calls. + A static call consists three steps: + - finding the function object of the callee; + - creating copy edges from the actual parameter value nodes to the + params block in the function object (this includes the receiver + if the callee is a method); + - creating copy edges from the results block in the function + object to the value nodes for the result of the call. + + Context sensitivity + + Static calls (alone) may be treated context sensitively, + i.e. each callsite may cause a distinct re-analysis of the + callee, improving precision. Our current context-sensitivity + policy treats all intrinsics and getter/setter methods in this + manner since such functions are small and seem like an obvious + source of spurious confluences, though this has not yet been + evaluated. + + Dynamic function calls + + Dynamic calls work in a similar manner except that the creation of + copy edges occurs dynamically, in a similar fashion to a pair of + struct copies: + + *fn->params = callargs + callresult = *fn->results + + (Recall that the function object's params and results blocks are + contiguous.) + + Interface method invocation + + For invoke-mode calls, we create a params/results block for the + callsite and attach a dynamic closure rule to the interface. For + each new interface object that flows to the interface, we look up + the concrete method, find its function object, and connect its P/R + block to the callsite's P/R block. + + Recording call targets + + The analysis notifies its clients of each callsite it encounters, + passing a CallSite interface. Among other things, the CallSite + contains a synthetic constraint variable ("targets") whose + points-to solution includes the set of all function objects to + which the call may dispatch. + + It is via this mechanism that the callgraph is made available. + Clients may also elect to be notified of callgraph edges directly; + internally this just iterates all "targets" variables' pts(·)s. + + +SOLVER + +The solver is currently a very naive Andersen-style implementation, +although it does use difference propagation (Pearce et al, SQC'04). +There is much to do. + + +FURTHER READING. + +Andersen, L. O. 1994. Program analysis and specialization for the C +programming language. Ph.D. dissertation. DIKU, University of +Copenhagen. + +David J. Pearce, Paul H. J. Kelly, and Chris Hankin. 2004. Efficient +field-sensitive pointer analysis for C. In Proceedings of the 5th ACM +SIGPLAN-SIGSOFT workshop on Program analysis for software tools and +engineering (PASTE '04). ACM, New York, NY, USA, 37-42. +http://doi.acm.org/10.1145/996821.996835 + +David J. Pearce, Paul H. J. Kelly, and Chris Hankin. 2004. Online +Cycle Detection and Difference Propagation: Applications to Pointer +Analysis. Software Quality Control 12, 4 (December 2004), 311-337. +http://dx.doi.org/10.1023/B:SQJO.0000039791.93071.a2 + +David Grove and Craig Chambers. 2001. A framework for call graph +construction algorithms. ACM Trans. Program. Lang. Syst. 23, 6 +(November 2001), 685-746. +http://doi.acm.org/10.1145/506315.506316 + +Ben Hardekopf and Calvin Lin. 2007. The ant and the grasshopper: fast +and accurate pointer analysis for millions of lines of code. In +Proceedings of the 2007 ACM SIGPLAN conference on Programming language +design and implementation (PLDI '07). ACM, New York, NY, USA, 290-299. +http://doi.acm.org/10.1145/1250734.1250767 + +Ben Hardekopf and Calvin Lin. 2007. Exploiting pointer and location +equivalence to optimize pointer analysis. In Proceedings of the 14th +international conference on Static Analysis (SAS'07), Hanne Riis +Nielson and Gilberto Filé (Eds.). Springer-Verlag, Berlin, Heidelberg, +265-280. + +*/ +package pointer diff --git a/pointer/gen.go b/pointer/gen.go new file mode 100644 index 00000000..b13d47a0 --- /dev/null +++ b/pointer/gen.go @@ -0,0 +1,1069 @@ +package pointer + +// This file defines the constraint generation phase. + +import ( + "fmt" + "go/ast" + "go/token" + + "code.google.com/p/go.tools/go/types" + "code.google.com/p/go.tools/ssa" +) + +var ( + tEface = types.NewInterface(nil) + tInvalid = types.Typ[types.Invalid] + tUnsafePtr = types.Typ[types.UnsafePointer] +) + +// ---------- Node creation ---------- + +// nextNode returns the index of the next unused node. +func (a *analysis) nextNode() nodeid { + return nodeid(len(a.nodes)) +} + +// addNodes creates nodes for all scalar elements in type typ, and +// returns the id of the first one, or zero if the type was +// analytically uninteresting. +// +// comment explains the origin of the nodes, as a debugging aid. +// +func (a *analysis) addNodes(typ types.Type, comment string) nodeid { + id := a.nextNode() + for _, fi := range a.flatten(typ) { + a.addOneNode(fi.typ, comment, fi) + } + if id == a.nextNode() { + return 0 // type contained no pointers + } + return id +} + +// addOneNode creates a single node with type typ, and returns its id. +// +// typ should generally be scalar (except for interface.conctype nodes +// and struct/array identity nodes). Use addNodes for non-scalar types. +// +// comment explains the origin of the nodes, as a debugging aid. +// subelement indicates the subelement, e.g. ".a.b[*].c". +// +func (a *analysis) addOneNode(typ types.Type, comment string, subelement *fieldInfo) nodeid { + id := a.nextNode() + a.nodes = append(a.nodes, &node{typ: typ, subelement: subelement}) + if a.log != nil { + fmt.Fprintf(a.log, "\tcreate n%d %s for %s%s\n", + id, typ, comment, subelement.path()) + } + return id +} + +// setValueNode associates node id with the value v. +// TODO(adonovan): disambiguate v by its CallGraphNode, if it's a local. +func (a *analysis) setValueNode(v ssa.Value, id nodeid) { + a.valNode[v] = id + if a.log != nil { + fmt.Fprintf(a.log, "\tval[%s] = n%d (%T)\n", v.Name(), id, v) + } + + // Record the (v, id) relation if the client has queried v. + qv := a.config.QueryValues + if ptrs, ok := qv[v]; ok { + qv[v] = append(ptrs, ptr{a, id}) + } +} + +// endObject marks the end of a sequence of calls to addNodes denoting +// a single object allocation. +// +// obj is the start node of the object, from a prior call to nextNode. +// Its size, flags and (optionally) data will be updated. +// +func (a *analysis) endObject(obj nodeid, data ssa.Value) { + // Ensure object is non-empty by padding; + // the pad will be the object node. + size := uint32(a.nextNode() - obj) + if size == 0 { + a.addOneNode(tInvalid, "padding", nil) + } + objNode := a.nodes[obj] + objNode.size = size // excludes padding + objNode.flags = ntObject + + if data != nil { + objNode.data = data + if a.log != nil { + fmt.Fprintf(a.log, "\tobj[%s] = n%d\n", data, obj) + } + } +} + +// makeFunctionObject creates and returns a new function object for +// fn, and returns the id of its first node. It also enqueues fn for +// subsequent constraint generation. +// +func (a *analysis) makeFunctionObject(fn *ssa.Function) nodeid { + if a.log != nil { + fmt.Fprintf(a.log, "\t---- makeFunctionObject %s\n", fn) + } + + // obj is the function object (identity, params, results). + obj := a.nextNode() + sig := fn.Signature + a.addOneNode(sig, "func.cgnode", nil) // (scalar with Signature type) + if recv := sig.Recv(); recv != nil { + a.addNodes(recv.Type(), "func.recv") + } + a.addNodes(sig.Params(), "func.params") + a.addNodes(sig.Results(), "func.results") + a.endObject(obj, fn) + + if a.log != nil { + fmt.Fprintf(a.log, "\t----\n") + } + + cgn := &cgnode{fn: fn, obj: obj} + a.nodes[obj].flags |= ntFunction + a.nodes[obj].data = cgn + + // Queue it up for constraint processing. + a.genq = append(a.genq, cgn) + + return obj +} + +// makeFunction creates the shared function object (aka contour) for +// function fn and returns a 'func' value node that points to it. +// +func (a *analysis) makeFunction(fn *ssa.Function) nodeid { + obj := a.makeFunctionObject(fn) + a.funcObj[fn] = obj + + var comment string + if a.log != nil { + comment = fn.String() + } + id := a.addOneNode(fn.Type(), comment, nil) + a.addressOf(id, obj) + return id +} + +// makeGlobal creates the value node and object node for global g, +// and returns the value node. +// +// The value node represents the address of the global variable, and +// points to the object (and nothing else). +// +// The object consists of the global variable itself (conceptually, +// the BSS address). +// +func (a *analysis) makeGlobal(g *ssa.Global) nodeid { + var comment string + if a.log != nil { + fmt.Fprintf(a.log, "\t---- makeGlobal %s\n", g) + comment = g.FullName() + } + + // The nodes representing the object itself. + obj := a.nextNode() + a.addNodes(mustDeref(g.Type()), "global") + a.endObject(obj, g) + + if a.log != nil { + fmt.Fprintf(a.log, "\t----\n") + } + + // The node representing the address of the global. + id := a.addOneNode(g.Type(), comment, nil) + a.addressOf(id, obj) + + return id +} + +// makeConstant creates the value node and object node (if needed) for +// constant c, and returns the value node. +// An object node is created only for []byte or []rune constants. +// The value node points to the object node, iff present. +// +func (a *analysis) makeConstant(l *ssa.Const) nodeid { + id := a.addNodes(l.Type(), "const") + if !l.IsNil() { + // []byte or []rune? + if t, ok := l.Type().Underlying().(*types.Slice); ok { + // Treat []T like *[1]T, 'make []T' like new([1]T). + obj := a.nextNode() + a.addNodes(sliceToArray(t), "array in slice constant") + a.endObject(obj, l) + + a.addressOf(id, obj) + } + } + return id +} + +// valueNode returns the id of the value node for v, creating it (and +// the association) as needed. It may return zero for uninteresting +// values containing no pointers. +// +// Nodes for locals are created en masse during genFunc and are +// implicitly contextualized by the function currently being analyzed +// (i.e. parameter to genFunc). +// +func (a *analysis) valueNode(v ssa.Value) nodeid { + id, ok := a.valNode[v] + if !ok { + switch v := v.(type) { + case *ssa.Function: + id = a.makeFunction(v) + + case *ssa.Global: + id = a.makeGlobal(v) + + case *ssa.Const: + id = a.makeConstant(v) + + case *ssa.Capture: + // TODO(adonovan): treat captures context-sensitively. + id = a.addNodes(v.Type(), "capture") + + default: + // *ssa.Parameters and ssa.Instruction values + // are created by genFunc. + // *Builtins are not true values. + panic(v) + } + a.setValueNode(v, id) + } + return id +} + +// valueOffsetNode ascertains the node for tuple/struct value v, +// then returns the node for its subfield #index. +// +func (a *analysis) valueOffsetNode(v ssa.Value, index int) nodeid { + id := a.valueNode(v) + if id == 0 { + panic(fmt.Sprintf("cannot offset within n0: %s = %s", v.Name(), v)) + } + return id + nodeid(a.offsetOf(v.Type(), index)) +} + +// interfaceValue returns the (first node of) the value, and the +// concrete type, of the interface object (flags&ntInterface) starting +// at id. +// +func (a *analysis) interfaceValue(id nodeid) (nodeid, types.Type) { + n := a.nodes[id] + if n.flags&ntInterface == 0 { + panic(fmt.Sprintf("interfaceValue(n%d): not an interface object; typ=%s", id, n.typ)) + } + return id + 1, n.typ +} + +// funcParams returns the first node of the params block of the +// function whose object node (flags&ntFunction) is id. +// +func (a *analysis) funcParams(id nodeid) nodeid { + if a.nodes[id].flags&ntFunction == 0 { + panic(fmt.Sprintf("funcParams(n%d): not a function object block", id)) + } + return id + 1 +} + +// funcResults returns the first node of the results block of the +// function whose object node (flags&ntFunction) is id. +// +func (a *analysis) funcResults(id nodeid) nodeid { + n := a.nodes[id] + if n.flags&ntFunction == 0 { + panic(fmt.Sprintf("funcResults(n%d): not a function object block", id)) + } + sig := n.typ.(*types.Signature) + id += 1 + nodeid(a.sizeof(sig.Params())) + if sig.Recv() != nil { + id += nodeid(a.sizeof(sig.Recv().Type())) + } + return id +} + +// ---------- Constraint creation ---------- + +// copy creates a constraint of the form dst = src. +// sizeof is the width (in logical fields) of the copied type. +// +func (a *analysis) copy(dst, src nodeid, sizeof uint32) { + if src == dst || sizeof == 0 { + return // trivial + } + if src == 0 || dst == 0 { + panic(fmt.Sprintf("ill-typed copy dst=n%d src=n%d", dst, src)) + } + for i := uint32(0); i < sizeof; i++ { + a.addConstraint(©Constraint{dst, src}) + src++ + dst++ + } +} + +// addressOf creates a constraint of the form id = &obj. +func (a *analysis) addressOf(id, obj nodeid) { + if id == 0 { + panic("addressOf: zero id") + } + if obj == 0 { + panic("addressOf: zero obj") + } + a.addConstraint(&addrConstraint{id, obj}) +} + +// load creates a load constraint of the form dst = *src. +// sizeof is the width (in logical fields) of the loaded type. +// +func (a *analysis) load(dst, src nodeid, sizeof uint32) { + a.loadOffset(dst, src, 0, sizeof) +} + +// loadOffset creates a load constraint of the form dst = src[offset]. +// offset is the pointer offset in logical fields. +// sizeof is the width (in logical fields) of the loaded type. +// +func (a *analysis) loadOffset(dst, src nodeid, offset uint32, sizeof uint32) { + if dst == 0 { + return // load of non-pointerlike value + } + if src == 0 && dst == 0 { + return // non-pointerlike operation + } + if src == 0 || dst == 0 { + panic(fmt.Sprintf("ill-typed load dst=n%d src=n%d", dst, src)) + } + for i := uint32(0); i < sizeof; i++ { + a.addConstraint(&loadConstraint{offset, dst, src}) + offset++ + dst++ + } +} + +// store creates a store constraint of the form *dst = src. +// sizeof is the width (in logical fields) of the stored type. +// +func (a *analysis) store(dst, src nodeid, sizeof uint32) { + a.storeOffset(dst, src, 0, sizeof) +} + +// storeOffset creates a store constraint of the form dst[offset] = src. +// offset is the pointer offset in logical fields. +// sizeof is the width (in logical fields) of the stored type. +// +func (a *analysis) storeOffset(dst, src nodeid, offset uint32, sizeof uint32) { + if src == 0 { + return // store of non-pointerlike value + } + if src == 0 && dst == 0 { + return // non-pointerlike operation + } + if src == 0 || dst == 0 { + panic(fmt.Sprintf("ill-typed store dst=n%d src=n%d", dst, src)) + } + for i := uint32(0); i < sizeof; i++ { + a.addConstraint(&storeConstraint{offset, dst, src}) + offset++ + src++ + } +} + +// offsetAddr creates an offsetAddr constraint of the form dst = &src.#offset. +// offset is the field offset in logical fields. +// +func (a *analysis) offsetAddr(dst, src nodeid, offset uint32) { + if offset == 0 { + // Simplify dst = &src->f0 + // to dst = src + // (NB: this optimisation is defeated by the identity + // field prepended to struct and array objects.) + a.copy(dst, src, 1) + } else { + a.addConstraint(&offsetAddrConstraint{offset, dst, src}) + } +} + +// addConstraint adds c to the constraint set. +func (a *analysis) addConstraint(c constraint) { + a.constraints = append(a.constraints, c) + if a.log != nil { + fmt.Fprintf(a.log, "\t%s\n", c) + } +} + +// copyElems generates load/store constraints for *dst = *src, +// where src and dst are slices or *arrays. +// (If pts(·) of either is a known singleton, this is suboptimal.) +// +func (a *analysis) copyElems(typ types.Type, dst, src nodeid) { + tmp := a.addNodes(typ, "copy") + sz := a.sizeof(typ) + a.loadOffset(tmp, src, 1, sz) + a.storeOffset(dst, tmp, 1, sz) +} + +// ---------- Constraint generation ---------- + +// genConv generates constraints for the conversion operation conv. +func (a *analysis) genConv(conv *ssa.Convert) { + res := a.valueNode(conv) + if res == 0 { + return // result is non-pointerlike + } + + tSrc := conv.X.Type() + tDst := conv.Type() + + switch utSrc := tSrc.Underlying().(type) { + case *types.Slice: + // []byte/[]rune -> string? + return + + case *types.Pointer: + // *T -> unsafe.Pointer? + if tDst == tUnsafePtr { + // ignore for now + // a.copy(res, a.valueNode(conv.X), 1) + return + } + + case *types.Basic: + switch utDst := tDst.Underlying().(type) { + case *types.Pointer: + // unsafe.Pointer -> *T? (currently unsound) + if utSrc == tUnsafePtr { + a.warnf(conv.Pos(), + "unsound: %s contains an unsafe.Pointer conversion (to %s)", + conv.Parent(), tDst) + + // For now, we treat unsafe.Pointer->*T + // conversion like new(T) and create an + // unaliased object. In future we may handle + // unsafe conversions soundly; see TODO file. + obj := a.addNodes(mustDeref(tDst), "unsafe.Pointer conversion") + a.endObject(obj, conv) + a.addressOf(res, obj) + return + } + + case *types.Slice: + // string -> []byte/[]rune (or named aliases)? + if utSrc.Info()&types.IsString != 0 { + obj := a.addNodes(sliceToArray(tDst), "convert") + a.endObject(obj, conv) + a.addressOf(res, obj) + return + } + + case *types.Basic: + // TODO(adonovan): + // unsafe.Pointer -> uintptr? + // uintptr -> unsafe.Pointer + // + // The language doesn't adequately specify the + // behaviour of these operations, but almost + // all uses of these conversions (even in the + // spec) seem to imply a non-moving garbage + // collection strategy, or implicit "pinning" + // semantics for unsafe.Pointer conversions. + + // TODO(adonovan): we need more work before we can handle + // cryptopointers well. + if utSrc == tUnsafePtr || utDst == tUnsafePtr { + // Ignore for now. See TODO file for ideas. + return + } + + return // ignore all other basic type conversions + } + } + + panic(fmt.Sprintf("illegal *ssa.Convert %s -> %s: %s", tSrc, tDst, conv.Parent())) +} + +// genAppend generates constraints for a call to append. +func (a *analysis) genAppend(instr *ssa.Call) { + // Consider z = append(x, y). y is optional. + // This may allocate a new [1]T array; call its object w. + // We get the following constraints: + // z = x + // z = &w + // *z = *y + + x := a.valueNode(instr.Call.Args[0]) + + z := a.valueNode(instr) + a.copy(z, x, 1) // z = x + + if len(instr.Call.Args) == 1 { + return // no allocation for z = append(x) or _ = append(x). + } + + // TODO(adonovan): test append([]byte, ...string) []byte. + + y := a.valueNode(instr.Call.Args[1]) + tArray := sliceToArray(instr.Call.Args[0].Type()) + + var w nodeid + w = a.nextNode() + a.addNodes(tArray, "append") + a.endObject(w, instr) + + a.copyElems(tArray.Elem(), z, y) // *z = *y + a.addressOf(z, w) // z = &w +} + +// genBuiltinCall generates contraints for a call to a built-in. +func (a *analysis) genBuiltinCall(instr ssa.CallInstruction) { + call := instr.Common() + switch call.Value.(*ssa.Builtin).Object().Name() { + case "append": + // Safe cast: append cannot appear in a go or defer statement. + a.genAppend(instr.(*ssa.Call)) + + case "copy": + tElem := call.Args[0].Type().Underlying().(*types.Slice).Elem() + a.copyElems(tElem, a.valueNode(call.Args[0]), a.valueNode(call.Args[1])) + + case "panic": + a.copy(a.panicNode, a.valueNode(call.Args[0]), 1) + + case "recover": + if v := instr.Value(); v != nil { + a.copy(a.valueNode(v), a.panicNode, 1) + } + + case "print": + // Analytically print is a no-op, but it's a convenient hook + // for testing the pts of an expression, so we notify the client. + // Existing uses in Go core libraries are few and harmless. + if Print := a.config.Print; Print != nil { + // Due to context-sensitivity, we may encounter + // the same print() call in many contexts, so + // we merge them to a canonical node. + probe := a.probes[call] + t := call.Args[0].Type() + + // First time? Create the canonical probe node. + if probe == 0 { + probe = a.addNodes(t, "print") + a.probes[call] = probe + Print(call, ptr{a, probe}) // notify client + } + + a.copy(probe, a.valueNode(call.Args[0]), a.sizeof(t)) + } + + default: + // No-ops: close len cap real imag complex println delete. + } +} + +// shouldUseContext defines the context-sensitivity policy. It +// returns true if we should analyse all static calls to fn anew. +// +// Obviously this interface rather limits how much freedom we have to +// choose a policy. The current policy, rather arbitrarily, is true +// for intrinsics and accessor methods (actually: short, single-block, +// call-free functions). This is just a starting point. +// +func (a *analysis) shouldUseContext(fn *ssa.Function) bool { + if a.findIntrinsic(fn) != nil { + return true // treat intrinsics context-sensitively + } + if len(fn.Blocks) != 1 { + return false // too expensive + } + blk := fn.Blocks[0] + if len(blk.Instrs) > 10 { + return false // too expensive + } + if fn.Synthetic != "" && (fn.Pkg == nil || fn != fn.Pkg.Func("init")) { + return true // treat synthetic wrappers context-sensitively + } + for _, instr := range blk.Instrs { + switch instr := instr.(type) { + case ssa.CallInstruction: + // Disallow function calls (except to built-ins) + // because of the danger of unbounded recursion. + if _, ok := instr.Common().Value.(*ssa.Builtin); !ok { + return false + } + } + } + return true +} + +// genStaticCall generates constraints for a statically dispatched +// function call. It returns a node whose pts() will be the set of +// possible call targets (in this case, a singleton). +// +func (a *analysis) genStaticCall(call *ssa.CallCommon, result nodeid) nodeid { + // Ascertain the context (contour/CGNode) for a particular call. + var obj nodeid + fn := call.StaticCallee() + if a.shouldUseContext(fn) { + obj = a.makeFunctionObject(fn) // new contour for this call + } else { + a.valueNode(fn) // ensure shared contour was created + obj = a.funcObj[fn] // ordinary (shared) contour. + } + + sig := call.Signature() + targets := a.addOneNode(sig, "call.targets", nil) + a.addressOf(targets, obj) // (a singleton) + + // Copy receiver, if any. + params := a.funcParams(obj) + args := call.Args + if sig.Recv() != nil { + sz := a.sizeof(sig.Recv().Type()) + a.copy(params, a.valueNode(args[0]), sz) + params += nodeid(sz) + args = args[1:] + } + + // Copy actual parameters into formal params block. + // Must loop, since the actuals aren't contiguous. + for i, arg := range args { + sz := a.sizeof(sig.Params().At(i).Type()) + a.copy(params, a.valueNode(arg), sz) + params += nodeid(sz) + } + + // Copy formal results block to actual result. + if result != 0 { + a.copy(result, a.funcResults(obj), a.sizeof(sig.Results())) + } + + return targets +} + +// genDynamicCall generates constraints for a dynamic function call. +// It returns a node whose pts() will be the set of possible call targets. +// +func (a *analysis) genDynamicCall(call *ssa.CallCommon, result nodeid) nodeid { + fn := a.valueNode(call.Value) + sig := call.Signature() + + // We add dynamic closure rules that store the arguments into, + // and load the results from, the P/R block of each function + // discovered in pts(fn). + + var offset uint32 = 1 // P/R block starts at offset 1 + for i, arg := range call.Args { + sz := a.sizeof(sig.Params().At(i).Type()) + a.storeOffset(fn, a.valueNode(arg), offset, sz) + offset += sz + } + if result != 0 { + a.loadOffset(result, fn, offset, a.sizeof(sig.Results())) + } + return fn +} + +// genInvoke generates constraints for a dynamic method invocation. +// It returns a node whose pts() will be the set of possible call targets. +// +func (a *analysis) genInvoke(call *ssa.CallCommon, result nodeid) nodeid { + sig := call.Signature() + + // Allocate a contiguous targets/params/results block for this call. + block := a.nextNode() + targets := a.addOneNode(sig, "invoke.targets", nil) + p := a.addNodes(sig.Params(), "invoke.params") + r := a.addNodes(sig.Results(), "invoke.results") + + // Copy the actual parameters into the call's params block. + for i, n := 0, sig.Params().Len(); i < n; i++ { + sz := a.sizeof(sig.Params().At(i).Type()) + a.copy(p, a.valueNode(call.Args[i]), sz) + p += nodeid(sz) + } + // Copy the call's results block to the actual results. + if result != 0 { + a.copy(result, r, a.sizeof(sig.Results())) + } + + // We add a dynamic invoke constraint that will add + // edges from the caller's P/R block to the callee's + // P/R block for each discovered call target. + a.addConstraint(&invokeConstraint{call.Method, a.valueNode(call.Value), block}) + + return targets +} + +// genCall generates contraints for call instruction instr. +func (a *analysis) genCall(caller *cgnode, instr ssa.CallInstruction) { + call := instr.Common() + + // Intrinsic implementations of built-in functions. + if _, ok := call.Value.(*ssa.Builtin); ok { + a.genBuiltinCall(instr) + return + } + + var result nodeid + if v := instr.Value(); v != nil { + result = a.valueNode(v) + } + + // The node whose pts(·) will contain all targets of the call. + var targets nodeid + switch { + case call.StaticCallee() != nil: + targets = a.genStaticCall(call, result) + case call.IsInvoke(): + targets = a.genInvoke(call, result) + default: + targets = a.genDynamicCall(call, result) + } + + site := &callsite{ + caller: caller, + targets: targets, + instr: instr, + pos: instr.Pos(), + } + a.callsites = append(a.callsites, site) + if a.log != nil { + fmt.Fprintf(a.log, "\t%s to targets %s from %s\n", + site.Description(), site.targets, site.caller) + } +} + +// genInstr generates contraints for instruction instr in context cgn. +func (a *analysis) genInstr(cgn *cgnode, instr ssa.Instruction) { + if a.log != nil { + var prefix string + if val, ok := instr.(ssa.Value); ok { + prefix = val.Name() + " = " + } + fmt.Fprintf(a.log, "; %s%s\n", prefix, instr) + } + + switch instr := instr.(type) { + case *ssa.DebugRef: + // no-op. + + case *ssa.UnOp: + switch instr.Op { + case token.ARROW: // <-x + // We can ignore instr.CommaOk because the node we're + // altering is always at zero offset relative to instr. + a.load(a.valueNode(instr), a.valueNode(instr.X), a.sizeof(instr.Type())) + + case token.MUL: // *x + a.load(a.valueNode(instr), a.valueNode(instr.X), a.sizeof(instr.Type())) + + default: + // NOT, SUB, XOR: no-op. + } + + case *ssa.BinOp: + // All no-ops. + + case ssa.CallInstruction: // *ssa.Call, *ssa.Go, *ssa.Defer + a.genCall(cgn, instr) + + case *ssa.ChangeType: + a.copy(a.valueNode(instr), a.valueNode(instr.X), 1) + + case *ssa.Convert: + a.genConv(instr) + + case *ssa.Extract: + a.copy(a.valueNode(instr), + a.valueOffsetNode(instr.Tuple, instr.Index), + a.sizeof(instr.Type())) + + case *ssa.FieldAddr: + a.offsetAddr(a.valueNode(instr), a.valueNode(instr.X), + a.offsetOf(mustDeref(instr.X.Type()), instr.Field)) + + case *ssa.IndexAddr: + a.offsetAddr(a.valueNode(instr), a.valueNode(instr.X), 1) + + case *ssa.Field: + a.copy(a.valueNode(instr), + a.valueOffsetNode(instr.X, instr.Field), + a.sizeof(instr.Type())) + + case *ssa.Index: + a.copy(a.valueNode(instr), 1+a.valueNode(instr.X), a.sizeof(instr.Type())) + + case *ssa.Select: + recv := a.valueOffsetNode(instr, 2) // instr : (index, recvOk, recv0, ... recv_n-1) + for _, st := range instr.States { + elemSize := a.sizeof(st.Chan.Type().Underlying().(*types.Chan).Elem()) + switch st.Dir { + case ast.RECV: + a.load(recv, a.valueNode(st.Chan), elemSize) + recv++ + + case ast.SEND: + a.store(a.valueNode(st.Chan), a.valueNode(st.Send), elemSize) + } + } + + case *ssa.Ret: + results := a.funcResults(cgn.obj) + for _, r := range instr.Results { + sz := a.sizeof(r.Type()) + a.copy(results, a.valueNode(r), sz) + results += nodeid(sz) + } + + case *ssa.Send: + a.store(a.valueNode(instr.Chan), a.valueNode(instr.X), a.sizeof(instr.X.Type())) + + case *ssa.Store: + a.store(a.valueNode(instr.Addr), a.valueNode(instr.Val), a.sizeof(instr.Val.Type())) + + case *ssa.Alloc: + obj := a.nextNode() + a.addNodes(mustDeref(instr.Type()), "alloc") + a.endObject(obj, instr) + a.addressOf(a.valueNode(instr), obj) + + case *ssa.MakeSlice: + obj := a.nextNode() + a.addNodes(sliceToArray(instr.Type()), "makeslice") + a.endObject(obj, instr) + a.addressOf(a.valueNode(instr), obj) + + case *ssa.MakeChan: + obj := a.nextNode() + a.addNodes(instr.Type().Underlying().(*types.Chan).Elem(), "makechan") + a.endObject(obj, instr) + a.addressOf(a.valueNode(instr), obj) + + case *ssa.MakeMap: + obj := a.nextNode() + tmap := instr.Type().Underlying().(*types.Map) + a.addNodes(tmap.Key(), "makemap.key") + a.addNodes(tmap.Elem(), "makemap.value") + a.endObject(obj, instr) + a.addressOf(a.valueNode(instr), obj) + + case *ssa.MakeInterface: + tConc := instr.X.Type() + // Create nodes and constraints for all methods of the type. + // Ascertaining which will be needed is undecidable in general. + mset := tConc.MethodSet() + for i, n := 0, mset.Len(); i < n; i++ { + a.valueNode(a.prog.Method(mset.At(i))) + } + obj := a.addOneNode(tConc, "iface.conctype", nil) // NB: type may be non-scalar! + vnode := a.addNodes(tConc, "iface.value") + a.endObject(obj, instr) + a.nodes[obj].flags |= ntInterface + // Copy the value into it, if nontrivial. + if x := a.valueNode(instr.X); x != 0 { + a.copy(vnode, x, a.sizeof(tConc)) + } + a.addressOf(a.valueNode(instr), obj) + + case *ssa.ChangeInterface: + a.copy(a.valueNode(instr), a.valueNode(instr.X), 1) + + case *ssa.TypeAssert: + dst, src := a.valueNode(instr), a.valueNode(instr.X) + a.addConstraint(&typeAssertConstraint{instr.AssertedType, dst, src}) + + case *ssa.Slice: + a.copy(a.valueNode(instr), a.valueNode(instr.X), 1) + + case *ssa.If, *ssa.Jump: + // no-op. + + case *ssa.Phi: + sz := a.sizeof(instr.Type()) + for _, e := range instr.Edges { + a.copy(a.valueNode(instr), a.valueNode(e), sz) + } + + case *ssa.MakeClosure: + fn := instr.Fn.(*ssa.Function) + a.copy(a.valueNode(instr), a.valueNode(fn), 1) + // Free variables are treated like global variables. + for i, b := range instr.Bindings { + a.copy(a.valueNode(fn.FreeVars[i]), a.valueNode(b), a.sizeof(b.Type())) + } + + case *ssa.RunDefers: + // The analysis is flow insensitive, so we just "call" + // defers as we encounter them. + + case *ssa.Range: + // Do nothing. Next{Iter: *ssa.Range} handles this case. + + case *ssa.Next: + if !instr.IsString { // map + // Assumes that Next is always directly applied to a Range result. + theMap := instr.Iter.(*ssa.Range).X + tMap := theMap.Type().Underlying().(*types.Map) + ksize := a.sizeof(tMap.Key()) + vsize := a.sizeof(tMap.Elem()) + + // Load from the map's (k,v) into the tuple's (ok, k, v). + a.load(a.valueNode(instr)+1, a.valueNode(theMap), ksize+vsize) + } + + case *ssa.Lookup: + if tMap, ok := instr.X.Type().Underlying().(*types.Map); ok { + // CommaOk can be ignored: field 0 is a no-op. + ksize := a.sizeof(tMap.Key()) + vsize := a.sizeof(tMap.Elem()) + a.loadOffset(a.valueNode(instr), a.valueNode(instr.X), ksize, vsize) + } + + case *ssa.MapUpdate: + tmap := instr.Map.Type().Underlying().(*types.Map) + ksize := a.sizeof(tmap.Key()) + vsize := a.sizeof(tmap.Elem()) + m := a.valueNode(instr.Map) + a.store(m, a.valueNode(instr.Key), ksize) + a.storeOffset(m, a.valueNode(instr.Value), ksize, vsize) + + case *ssa.Panic: + a.copy(a.panicNode, a.valueNode(instr.X), 1) + + default: + panic(fmt.Sprintf("unimplemented: %T", instr)) + } +} + +// genRootCalls generates the synthetic root of the callgraph and the +// initial calls from it to the analysis scope, such as main, a test +// or a library. +// +func (a *analysis) genRootCalls() *cgnode { + r := ssa.NewFunction("", new(types.Signature), "root of callgraph") + r.Prog = a.prog // hack. + r.Enclosing = r // hack, so Function.String() doesn't crash + r.String() // (asserts that it doesn't crash) + root := &cgnode{fn: r} + + // For each main package, call main.init(), main.main(). + for _, mainPkg := range a.config.Mains { + main := mainPkg.Func("main") + if main == nil { + panic(fmt.Sprintf("%s has no main function", mainPkg)) + } + + targets := a.addOneNode(main.Signature, "root.targets", nil) + site := &callsite{ + caller: root, + targets: targets, + } + a.callsites = append(a.callsites, site) + for _, fn := range [2]*ssa.Function{mainPkg.Func("init"), main} { + if a.log != nil { + fmt.Fprintf(a.log, "\troot call to %s:\n", fn) + } + a.copy(targets, a.valueNode(fn), 1) + } + } + + return root +} + +// genFunc generates constraints for function fn. +func (a *analysis) genFunc(cgn *cgnode) { + fn := cgn.fn + if a.log != nil { + fmt.Fprintln(a.log) + fmt.Fprintln(a.log) + cgn.fn.DumpTo(a.log) + } + + if impl := a.findIntrinsic(fn); impl != nil { + impl(a, cgn) + return + } + + if fn.Blocks == nil { + // External function with no intrinsic treatment. + // We'll warn about calls to such functions at the end. + return + } + + // The value nodes for the params are in the func object block. + params := a.funcParams(cgn.obj) + for _, p := range fn.Params { + // TODO(adonovan): record the context (cgn) too. + a.setValueNode(p, params) + params += nodeid(a.sizeof(p.Type())) + } + + // Free variables are treated like global variables: + // the outer function sets them with MakeClosure; + // the inner function accesses them with Capture. + + // Create value nodes for all value instructions. + // (Clobbers any previous nodes from same fn in different context.) + if a.log != nil { + fmt.Fprintln(a.log, "; Creating instruction values") + } + for _, b := range fn.Blocks { + for _, instr := range b.Instrs { + switch instr := instr.(type) { + case *ssa.Range: + // do nothing: it has a funky type. + + case ssa.Value: + var comment string + if a.log != nil { + comment = instr.Name() + } + id := a.addNodes(instr.Type(), comment) + // TODO(adonovan): record the context (cgn) too. + a.setValueNode(instr, id) + } + } + } + + // Generate constraints for instructions. + for _, b := range fn.Blocks { + for _, instr := range b.Instrs { + a.genInstr(cgn, instr) + } + } + + // (Instruction Values will hang around in the environment.) +} + +// generate generates offline constraints for the entire program. +// It returns the synthetic root of the callgraph. +// +func (a *analysis) generate() *cgnode { + // Create a dummy node since we use the nodeid 0 for + // non-pointerlike variables. + a.addNodes(tInvalid, "(zero)") + + // Create the global node for panic values. + a.panicNode = a.addNodes(tEface, "panic") + + root := a.genRootCalls() + + // Generate constraints for entire program. + // (Actually just the RTA-reachable portion of the program. + // See Bacon & Sweeney, OOPSLA'96). + for len(a.genq) > 0 { + cgn := a.genq[0] + a.genq = a.genq[1:] + a.genFunc(cgn) + } + + // Create a dummy node to avoid out-of-range indexing in case + // the last allocated type was of zero length. + a.addNodes(tInvalid, "(max)") + + return root +} diff --git a/pointer/intrinsics.go b/pointer/intrinsics.go new file mode 100644 index 00000000..63230237 --- /dev/null +++ b/pointer/intrinsics.go @@ -0,0 +1,276 @@ +package pointer + +// This package defines the treatment of intrinsics, i.e. library +// functions requiring special analytical treatment. +// +// Most of these are C or assembly functions, but even some Go +// functions require may special treatment if the analysis completely +// replaces the implementation of an API such as reflection. + +// TODO(adonovan): support a means of writing analytic summaries in +// the target code, so that users can summarise the effects of their +// own C functions using a snippet of Go. + +import ( + "code.google.com/p/go.tools/ssa" +) + +// Instances of 'intrinsic' generate analysis constraints for calls to +// intrinsic functions. +type intrinsic func(a *analysis, cgn *cgnode) + +// Initialized in explicit init() to defeat (spurious) initialization +// cycle error. +var intrinsicsByName map[string]intrinsic + +func init() { + // Key strings are from Function.String(). + // That little dot ۰ is an Arabic zero numeral (U+06F0), + // categories [Nd]. + intrinsicsByName = map[string]intrinsic{ + // reflect.Value methods. + // "(reflect.Value).Addr": ext۰reflect۰Value۰Addr, + "(reflect.Value).Bool": ext۰NoEffect, + // "(reflect.Value).Bytes": ext۰reflect۰Value۰Bytes, + // "(reflect.Value).Call": ext۰reflect۰Value۰Call, + // "(reflect.Value).CallSlice": ext۰reflect۰Value۰CallSlice, + "(reflect.Value).CanAddr": ext۰NoEffect, + "(reflect.Value).CanInterface": ext۰NoEffect, + "(reflect.Value).CanSet": ext۰NoEffect, + "(reflect.Value).Cap": ext۰NoEffect, + "(reflect.Value).Close": ext۰NoEffect, + "(reflect.Value).Complex": ext۰NoEffect, + // "(reflect.Value).Convert": ext۰reflect۰Value۰Convert, + // "(reflect.Value).Elem": ext۰reflect۰Value۰Elem, + // "(reflect.Value).Field": ext۰reflect۰Value۰Field, + // "(reflect.Value).FieldByIndex": ext۰reflect۰Value۰FieldByIndex, + // "(reflect.Value).FieldByName": ext۰reflect۰Value۰FieldByName, + // "(reflect.Value).FieldByNameFunc": ext۰reflect۰Value۰FieldByNameFunc, + "(reflect.Value).Float": ext۰NoEffect, + // "(reflect.Value).Index": ext۰reflect۰Value۰Index, + "(reflect.Value).Int": ext۰NoEffect, + // "(reflect.Value).Interface": ext۰reflect۰Value۰Interface, + "(reflect.Value).InterfaceData": ext۰NoEffect, + "(reflect.Value).IsNil": ext۰NoEffect, + "(reflect.Value).IsValid": ext۰NoEffect, + "(reflect.Value).Kind": ext۰NoEffect, + "(reflect.Value).Len": ext۰NoEffect, + // "(reflect.Value).MapIndex": ext۰reflect۰Value۰MapIndex, + // "(reflect.Value).MapKeys": ext۰reflect۰Value۰MapKeys, + // "(reflect.Value).Method": ext۰reflect۰Value۰Method, + // "(reflect.Value).MethodByName": ext۰reflect۰Value۰MethodByName, + "(reflect.Value).NumField": ext۰NoEffect, + "(reflect.Value).NumMethod": ext۰NoEffect, + "(reflect.Value).OverflowComplex": ext۰NoEffect, + "(reflect.Value).OverflowFloat": ext۰NoEffect, + "(reflect.Value).OverflowInt": ext۰NoEffect, + "(reflect.Value).OverflowUint": ext۰NoEffect, + "(reflect.Value).Pointer": ext۰NoEffect, + // "(reflect.Value).Set": ext۰reflect۰Value۰Set, + "(reflect.Value).SetBool": ext۰NoEffect, + // "(reflect.Value).SetBytes": ext۰reflect۰Value۰SetBytes, + "(reflect.Value).SetComplex": ext۰NoEffect, + "(reflect.Value).SetFloat": ext۰NoEffect, + "(reflect.Value).SetInt": ext۰NoEffect, + "(reflect.Value).SetLen": ext۰NoEffect, + // "(reflect.Value).SetMapIndex": ext۰reflect۰Value۰SetMapIndex, + // "(reflect.Value).SetPointer": ext۰reflect۰Value۰SetPointer, + "(reflect.Value).SetString": ext۰NoEffect, + "(reflect.Value).SetUint": ext۰NoEffect, + // "(reflect.Value).Slice": ext۰reflect۰Value۰Slice, + "(reflect.Value).String": ext۰NoEffect, + "(reflect.Value).Type": ext۰NoEffect, + "(reflect.Value).Uint": ext۰NoEffect, + "(reflect.Value).UnsafeAddr": ext۰NoEffect, + + // Standalone reflect.* functions. + "reflect.Append": ext۰NotYetImplemented, + "reflect.AppendSlice": ext۰NotYetImplemented, + "reflect.Copy": ext۰NotYetImplemented, + // "reflect.ChanOf": ext۰reflect۰ChanOf, + "reflect.DeepEqual": ext۰NoEffect, + // "reflect.Indirect": ext۰reflect۰Indirect, + // "reflect.MakeChan": ext۰reflect۰MakeChan, + "reflect.MakeFunc": ext۰NotYetImplemented, + "reflect.MakeMap": ext۰NotYetImplemented, + "reflect.MakeSlice": ext۰NotYetImplemented, + "reflect.MapOf": ext۰NotYetImplemented, + // "reflect.New": ext۰reflect۰New, + // "reflect.NewAt": ext۰reflect۰NewAt, + "reflect.PtrTo": ext۰NotYetImplemented, + "reflect.Select": ext۰NotYetImplemented, + "reflect.SliceOf": ext۰NotYetImplemented, + // "reflect.TypeOf": ext۰reflect۰TypeOf, + // "reflect.ValueOf": ext۰reflect۰ValueOf, + // "reflect.Zero": ext۰reflect۰Zero, + "reflect.init": ext۰NoEffect, + + // *reflect.rtype methods + "(*reflect.rtype).Align": ext۰NoEffect, + "(*reflect.rtype).AssignableTo": ext۰NoEffect, + "(*reflect.rtype).Bits": ext۰NoEffect, + "(*reflect.rtype).ChanDir": ext۰NoEffect, + "(*reflect.rtype).ConvertibleTo": ext۰NoEffect, + // "(*reflect.rtype).Elem": ext۰reflect۰rtype۰Elem, + "(*reflect.rtype).Field": ext۰NotYetImplemented, + "(*reflect.rtype).FieldAlign": ext۰NoEffect, + // "(*reflect.rtype).FieldByIndex": ext۰reflect۰rtype۰FieldByIndex, + // "(*reflect.rtype).FieldByName": ext۰reflect۰rtype۰FieldByName, + // "(*reflect.rtype).FieldByNameFunc": ext۰reflect۰rtype۰FieldByNameFunc, + "(*reflect.rtype).Implements": ext۰NoEffect, + // "(*reflect.rtype).In": ext۰reflect۰rtype۰In, + "(*reflect.rtype).IsVariadic": ext۰NoEffect, + // "(*reflect.rtype).Key": ext۰reflect۰rtype۰Key, + "(*reflect.rtype).Kind": ext۰NoEffect, + "(*reflect.rtype).Len": ext۰NoEffect, + "(*reflect.rtype).Method": ext۰NotYetImplemented, + "(*reflect.rtype).MethodByName": ext۰NotYetImplemented, + "(*reflect.rtype).Name": ext۰NoEffect, + "(*reflect.rtype).NumField": ext۰NoEffect, + "(*reflect.rtype).NumIn": ext۰NoEffect, + "(*reflect.rtype).NumMethod": ext۰NoEffect, + "(*reflect.rtype).NumOut": ext۰NoEffect, + // "(*reflect.rtype).Out": ext۰reflect۰rtype۰Out, + "(*reflect.rtype).PkgPath": ext۰NoEffect, + "(*reflect.rtype).Size": ext۰NoEffect, + "(*reflect.rtype).String": ext۰NoEffect, + + // Other packages. + "bytes.Equal": ext۰NoEffect, + "bytes.IndexByte": ext۰NoEffect, + "math.Abs": ext۰NoEffect, + "math.Acos": ext۰NoEffect, + "math.Asin": ext۰NoEffect, + "math.Atan": ext۰NoEffect, + "math.Atan2": ext۰NoEffect, + "math.Ceil": ext۰NoEffect, + "math.Cos": ext۰NoEffect, + "math.Dim": ext۰NoEffect, + "math.Exp": ext۰NoEffect, + "math.Exp2": ext۰NoEffect, + "math.Expm1": ext۰NoEffect, + "math.Float32bits": ext۰NoEffect, + "math.Float32frombits": ext۰NoEffect, + "math.Float64bits": ext۰NoEffect, + "math.Float64frombits": ext۰NoEffect, + "math.Floor": ext۰NoEffect, + "math.Frexp": ext۰NoEffect, + "math.Hypot": ext۰NoEffect, + "math.Ldexp": ext۰NoEffect, + "math.Log": ext۰NoEffect, + "math.Log10": ext۰NoEffect, + "math.Log1p": ext۰NoEffect, + "math.Log2": ext۰NoEffect, + "math.Max": ext۰NoEffect, + "math.Min": ext۰NoEffect, + "math.Mod": ext۰NoEffect, + "math.Modf": ext۰NoEffect, + "math.Remainder": ext۰NoEffect, + "math.Sin": ext۰NoEffect, + "math.Sincos": ext۰NoEffect, + "math.Sqrt": ext۰NoEffect, + "math.Tan": ext۰NoEffect, + "math.Trunc": ext۰NoEffect, + "math/big.addMulVVW": ext۰NoEffect, + "math/big.addVV": ext۰NoEffect, + "math/big.addVW": ext۰NoEffect, + "math/big.bitLen": ext۰NoEffect, + "math/big.divWVW": ext۰NoEffect, + "math/big.divWW": ext۰NoEffect, + "math/big.mulAddVWW": ext۰NoEffect, + "math/big.mulWW": ext۰NoEffect, + "math/big.shlVU": ext۰NoEffect, + "math/big.shrVU": ext۰NoEffect, + "math/big.subVV": ext۰NoEffect, + "math/big.subVW": ext۰NoEffect, + "os.epipecheck": ext۰NoEffect, + "runtime.BlockProfile": ext۰NoEffect, + "runtime.Breakpoint": ext۰NoEffect, + "runtime.CPUProfile": ext۰NotYetImplemented, + "runtime.Caller": ext۰NoEffect, + "runtime.FuncForPC": ext۰NotYetImplemented, + "runtime.GC": ext۰NoEffect, + "runtime.GOMAXPROCS": ext۰NoEffect, + "runtime.Goexit": ext۰NoEffect, + "runtime.GoroutineProfile": ext۰NoEffect, + "runtime.Gosched": ext۰NoEffect, + "runtime.MemProfile": ext۰NoEffect, + "runtime.NumCPU": ext۰NoEffect, + "runtime.NumGoroutine": ext۰NoEffect, + "runtime.ReadMemStats": ext۰NoEffect, + "runtime.SetBlockProfileRate": ext۰NoEffect, + "runtime.SetCPUProfileRate": ext۰NoEffect, + "runtime.SetFinalizer": ext۰NotYetImplemented, + "runtime.Stack": ext۰NoEffect, + "runtime.ThreadCreateProfile": ext۰NoEffect, + "runtime.funcentry_go": ext۰NoEffect, + "runtime.funcline_go": ext۰NoEffect, + "runtime.funcname_go": ext۰NoEffect, + "runtime.getgoroot": ext۰NoEffect, + "runtime/pprof.runtime_cyclesPerSecond": ext۰NoEffect, + "strings.IndexByte": ext۰NoEffect, + "sync.runtime_Semacquire": ext۰NoEffect, + "sync.runtime_Semrelease": ext۰NoEffect, + "sync.runtime_Syncsemcheck": ext۰NoEffect, + "sync/atomic.AddInt32": ext۰NoEffect, + "sync/atomic.CompareAndSwapInt32": ext۰NoEffect, + "sync/atomic.LoadInt32": ext۰NoEffect, + "sync/atomic.LoadUint32": ext۰NoEffect, + "sync/atomic.StoreInt32": ext۰NoEffect, + "sync/atomic.StoreUint32": ext۰NoEffect, + "syscall.Close": ext۰NoEffect, + "syscall.Exit": ext۰NoEffect, + "syscall.Getpid": ext۰NoEffect, + "syscall.Getwd": ext۰NoEffect, + "syscall.Kill": ext۰NoEffect, + "syscall.RawSyscall": ext۰NoEffect, + "syscall.Syscall": ext۰NoEffect, + "syscall.Syscall6": ext۰NoEffect, + "time.Sleep": ext۰NoEffect, + "time.now": ext۰NoEffect, + "time.startTimer": ext۰NoEffect, + "time.stopTimer": ext۰NoEffect, + } +} + +// findIntrinsic returns the constraint generation function for an +// intrinsic function fn, or nil if the function should be handled normally. +// +func (a *analysis) findIntrinsic(fn *ssa.Function) intrinsic { + // Consult the *Function-keyed cache. + // A cached nil indicates a normal non-intrinsic function. + impl, ok := a.intrinsics[fn] + if !ok { + impl = intrinsicsByName[fn.String()] // may be nil + + // Ensure all "reflect" code is treated intrinsically. + if impl == nil && fn.Pkg != nil && a.reflectValueObj != nil && a.reflectValueObj.Pkg() == fn.Pkg.Object { + impl = ext۰NotYetImplemented + } + + a.intrinsics[fn] = impl + } + return impl +} + +// A trivial intrinsic suitable for any function that does not: +// 1) induce aliases between its arguments or any global variables; +// 2) call any functions; or +// 3) create any labels. +// +// We sometimes violate condition #3 if the function creates only +// non-function labels, as the control-flow graph is still sound. +// +func ext۰NoEffect(a *analysis, cgn *cgnode) {} + +func ext۰NotYetImplemented(a *analysis, cgn *cgnode) { + // TODO(adonovan): enable this warning when we've implemented + // enough that it's not unbearably annoying. + // a.warnf(fn.Pos(), "unsound: intrinsic treatment of %s not yet implemented", fn) +} + +// We'll model reflect.Value as an interface{} containing pointers. +// We must use pointers since some reflect.Values (those derived by +// Field, Elem, etc) are abstractions of lvalues, not rvalues, and +// mutations via Set are reflected in the underlying value. (We could +// represent it as a union of lvalue and rvalue but that's more work.) diff --git a/pointer/labels.go b/pointer/labels.go new file mode 100644 index 00000000..d24f9fd1 --- /dev/null +++ b/pointer/labels.go @@ -0,0 +1,67 @@ +package pointer + +import ( + "fmt" + "go/token" + "strings" + + "code.google.com/p/go.tools/ssa" +) + +// A Label is an abstract location or an instruction that allocates memory. +// A points-to set is (conceptually) a set of labels. +// +// (This is basically a pair of a Value that allocates an object and a +// subelement indicator within that object.) +// +// TODO(adonovan): local labels should include their context (CallGraphNode). +// +type Label struct { + Value ssa.Value + subelement *fieldInfo // e.g. ".a.b[*].c" +} + +func (l *Label) Pos() token.Pos { + if l.Value != nil { + return l.Value.Pos() + } + return token.NoPos +} + +func (l *Label) String() string { + var s string + switch v := l.Value.(type) { + case *ssa.Function, *ssa.Global: + s = v.String() + + case *ssa.Const: + s = v.Name() + + case *ssa.Alloc: + s = v.Comment + if s == "" { + s = "alloc" + } + + case *ssa.Call: + // Currently only calls to append can allocate objects. + if v.Call.Value.(*ssa.Builtin).Object().Name() != "append" { + panic("unhandled *ssa.Call label: " + v.Name()) + } + s = "append" + + case *ssa.MakeMap, *ssa.MakeChan, *ssa.MakeSlice, *ssa.Convert: + s = strings.ToLower(strings.TrimPrefix(fmt.Sprintf("%T", v), "*ssa.")) + + case *ssa.MakeInterface: + // MakeInterface is usually implicit in Go source (so + // Pos()==0), and interfaces objects may be allocated + // synthetically (so no *MakeInterface data). + s = "makeinterface:" + v.X.Type().String() + + default: + panic(fmt.Sprintf("unhandled Label.Value type: %T", v)) + } + + return s + l.subelement.path() +} diff --git a/pointer/pointer_test.go b/pointer/pointer_test.go new file mode 100644 index 00000000..b86abe65 --- /dev/null +++ b/pointer/pointer_test.go @@ -0,0 +1,567 @@ +package pointer_test + +// This test uses 'expectation' comments embedded within testdata/*.go +// files to specify the expected pointer analysis behaviour. +// See below for grammar. + +import ( + "bytes" + "fmt" + "go/ast" + "go/parser" + "go/token" + "io/ioutil" + "os" + "regexp" + "strconv" + "strings" + "testing" + + "code.google.com/p/go.tools/go/types" + "code.google.com/p/go.tools/go/types/typemap" + "code.google.com/p/go.tools/importer" + "code.google.com/p/go.tools/pointer" + "code.google.com/p/go.tools/ssa" +) + +var inputs = []string{ + // Currently debugging: + // "testdata/tmp.go", + + // Working: + "testdata/another.go", + "testdata/arrays.go", + "testdata/channels.go", + "testdata/context.go", + "testdata/conv.go", + "testdata/flow.go", + "testdata/fmtexcerpt.go", + "testdata/func.go", + "testdata/hello.go", + "testdata/interfaces.go", + "testdata/maps.go", + "testdata/panic.go", + "testdata/recur.go", + "testdata/structs.go", + "testdata/a_test.go", + + // TODO(adonovan): get these tests (of reflection) passing. + // (The tests are mostly sound since they were used for a + // previous implementation.) + // "testdata/funcreflect.go", + // "testdata/arrayreflect.go", + // "testdata/chanreflect.go", + // "testdata/finalizer.go", + // "testdata/reflect.go", + // "testdata/mapreflect.go", + // "testdata/structreflect.go", +} + +// Expectation grammar: +// +// @calls f -> g +// +// A 'calls' expectation asserts that edge (f, g) appears in the +// callgraph. f and g are notated as per Function.String(), which +// may contain spaces (e.g. promoted method in anon struct). +// +// @pointsto a | b | c +// +// A 'pointsto' expectation asserts that the points-to set of its +// operand contains exactly the set of labels {a,b,c} notated as per +// labelString. +// +// A 'pointsto' expectation must appear on the same line as a +// print(x) statement; the expectation's operand is x. +// +// If one of the strings is "...", the expectation asserts that the +// points-to set at least the other labels. +// +// We use '|' because label names may contain spaces, e.g. methods +// of anonymous structs. +// +// From a theoretical perspective, concrete types in interfaces are +// labels too, but they are represented differently and so have a +// different expectation, @concrete, below. +// +// @concrete t | u | v +// +// A 'concrete' expectation asserts that the set of possible dynamic +// types of its interface operand is exactly {t,u,v}, notated per +// go/types.Type.String(). In other words, it asserts that the type +// component of the interface may point to that set of concrete type +// literals. +// +// A 'concrete' expectation must appear on the same line as a +// print(x) statement; the expectation's operand is x. +// +// If one of the strings is "...", the expectation asserts that the +// interface's type may point to at least the other concrete types. +// +// We use '|' because type names may contain spaces. +// +// @warning "regexp" +// +// A 'warning' expectation asserts that the analysis issues a +// warning that matches the regular expression within the string +// literal. +// +// @line id +// +// A line directive associates the name "id" with the current +// file:line. The string form of labels will use this id instead of +// a file:line, making @pointsto expectations more robust against +// perturbations in the source file. +// (NB, anon functions still include line numbers.) +// +type expectation struct { + kind string // "pointsto" | "concrete" | "calls" | "warning" + filename string + linenum int // source line number, 1-based + args []string + types []types.Type // for concrete +} + +func (e *expectation) String() string { + return fmt.Sprintf("@%s[%s]", e.kind, strings.Join(e.args, " | ")) +} + +func (e *expectation) errorf(format string, args ...interface{}) { + fmt.Printf("%s:%d: ", e.filename, e.linenum) + fmt.Printf(format, args...) + fmt.Println() +} + +func (e *expectation) needsProbe() bool { + return e.kind == "pointsto" || e.kind == "concrete" +} + +// A record of a call to the built-in print() function. Used for testing. +type probe struct { + instr *ssa.CallCommon + arg0 pointer.Pointer // first argument to print +} + +// Find probe (call to print(x)) of same source +// file/line as expectation. +func findProbe(prog *ssa.Program, probes []probe, e *expectation) *probe { + for _, p := range probes { + pos := prog.Fset.Position(p.instr.Pos()) + if pos.Line == e.linenum && pos.Filename == e.filename { + // TODO(adonovan): send this to test log (display only on failure). + // fmt.Printf("%s:%d: info: found probe for %s: %s\n", + // e.filename, e.linenum, e, p.arg0) // debugging + return &p + } + } + return nil // e.g. analysis didn't reach this call +} + +func doOneInput(input, filename string) bool { + impctx := &importer.Config{Loader: importer.MakeGoBuildLoader(nil)} + imp := importer.New(impctx) + + // Parsing. + f, err := parser.ParseFile(imp.Fset, filename, input, parser.DeclarationErrors) + if err != nil { + // TODO(adonovan): err is a scanner error list; + // display all errors not just first? + fmt.Println(err.Error()) + return false + } + + // Type checking. + info := imp.CreateSourcePackage("main", []*ast.File{f}) + if info.Err != nil { + fmt.Println(info.Err.Error()) + return false + } + + // SSA creation + building. + prog := ssa.NewProgram(imp.Fset, ssa.SanityCheckFunctions) + for _, info := range imp.Packages { + prog.CreatePackage(info) + } + prog.BuildAll() + + mainpkg := prog.Package(info.Pkg) + ptrmain := mainpkg // main package for the pointer analysis + if mainpkg.Func("main") == nil { + // No main function; assume it's a test. + ptrmain = prog.CreateTestMainPackage([]*ssa.Package{mainpkg}) + fmt.Printf("%s: synthesized testmain package for test.\n", imp.Fset.Position(f.Package)) + } + + ok := true + + lineMapping := make(map[string]string) // maps "file:line" to @line tag + + // Parse expectations in this input. + var exps []*expectation + re := regexp.MustCompile("// *@([a-z]*) *(.*)$") + lines := strings.Split(input, "\n") + for linenum, line := range lines { + linenum++ // make it 1-based + if matches := re.FindAllStringSubmatch(line, -1); matches != nil { + match := matches[0] + kind, rest := match[1], match[2] + e := &expectation{kind: kind, filename: filename, linenum: linenum} + + if kind == "line" { + if rest == "" { + ok = false + e.errorf("@%s expectation requires identifier", kind) + } else { + lineMapping[fmt.Sprintf("%s:%d", filename, linenum)] = rest + } + continue + } + + if e.needsProbe() && !strings.Contains(line, "print(") { + ok = false + e.errorf("@%s expectation must follow call to print(x)", kind) + continue + } + + switch kind { + case "pointsto": + e.args = split(rest, "|") + + case "concrete": + for _, typstr := range split(rest, "|") { + var t types.Type = types.Typ[types.Invalid] // means "..." + if typstr != "..." { + texpr, err := parser.ParseExpr(typstr) + if err != nil { + ok = false + // Don't print err since its location is bad. + e.errorf("'%s' is not a valid type", typstr) + continue + } + t, _, err = types.EvalNode(imp.Fset, texpr, mainpkg.Object, mainpkg.Object.Scope()) + if err != nil { + ok = false + // TODO Don't print err since its location is bad. + e.errorf("'%s' is not a valid type: %s", typstr, err) + continue + } + } + e.types = append(e.types, t) + } + + case "calls": + e.args = split(rest, "->") + // TODO(adonovan): eagerly reject the + // expectation if fn doesn't denote + // existing function, rather than fail + // the expectation after analysis. + if len(e.args) != 2 { + ok = false + e.errorf("@calls expectation wants 'caller -> callee' arguments") + continue + } + + case "warning": + lit, err := strconv.Unquote(strings.TrimSpace(rest)) + if err != nil { + ok = false + e.errorf("couldn't parse @warning operand: %s", err.Error()) + continue + } + e.args = append(e.args, lit) + + default: + ok = false + e.errorf("unknown expectation kind: %s", e) + continue + } + exps = append(exps, e) + } + } + + var probes []probe + var warnings []string + var log bytes.Buffer + + callgraph := make(pointer.CallGraph) + + // Run the analysis. + config := &pointer.Config{ + Mains: []*ssa.Package{ptrmain}, + Log: &log, + Print: func(site *ssa.CallCommon, p pointer.Pointer) { + probes = append(probes, probe{site, p}) + }, + Call: callgraph.AddEdge, + Warn: func(pos token.Pos, format string, args ...interface{}) { + msg := fmt.Sprintf(format, args...) + fmt.Printf("%s: warning: %s\n", prog.Fset.Position(pos), msg) + warnings = append(warnings, msg) + }, + } + pointer.Analyze(config) + + // Print the log is there was an error or a panic. + complete := false + defer func() { + if !complete || !ok { + log.WriteTo(os.Stderr) + } + }() + + // Check the expectations. + for _, e := range exps { + var pr *probe + if e.needsProbe() { + if pr = findProbe(prog, probes, e); pr == nil { + ok = false + e.errorf("unreachable print() statement has expectation %s", e) + continue + } + if pr.arg0 == nil { + ok = false + e.errorf("expectation on non-pointerlike operand: %s", pr.instr.Args[0].Type()) + continue + } + } + + switch e.kind { + case "pointsto": + if !checkPointsToExpectation(e, pr, lineMapping, prog) { + ok = false + } + + case "concrete": + if !checkConcreteExpectation(e, pr) { + ok = false + } + + case "calls": + if !checkCallsExpectation(prog, e, callgraph) { + ok = false + } + + case "warning": + if !checkWarningExpectation(prog, e, warnings) { + ok = false + } + } + } + + complete = true + + // ok = false // debugging: uncomment to always see log + + return ok +} + +func labelString(l *pointer.Label, lineMapping map[string]string, prog *ssa.Program) string { + // Functions and Globals need no pos suffix. + switch l.Value.(type) { + case *ssa.Function, *ssa.Global: + return l.String() + } + + str := l.String() + if pos := l.Value.Pos(); pos != 0 { + // Append the position, using a @line tag instead of a line number, if defined. + posn := prog.Fset.Position(l.Value.Pos()) + s := fmt.Sprintf("%s:%d", posn.Filename, posn.Line) + if tag, ok := lineMapping[s]; ok { + return fmt.Sprintf("%s@%s:%d", str, tag, posn.Column) + } + str = fmt.Sprintf("%s@%s", str, posn) + } + return str +} + +func checkPointsToExpectation(e *expectation, pr *probe, lineMapping map[string]string, prog *ssa.Program) bool { + expected := make(map[string]struct{}) + surplus := make(map[string]struct{}) + exact := true + for _, g := range e.args { + if g == "..." { + exact = false + continue + } + expected[g] = struct{}{} + } + // Find the set of labels that the probe's + // argument (x in print(x)) may point to. + for _, label := range pr.arg0.PointsTo().Labels() { + name := labelString(label, lineMapping, prog) + if _, ok := expected[name]; ok { + delete(expected, name) + } else if exact { + surplus[name] = struct{}{} + } + } + // Report set difference: + ok := true + if len(expected) > 0 { + ok = false + e.errorf("value does not alias these expected labels: %s", join(expected)) + } + if len(surplus) > 0 { + ok = false + e.errorf("value may additionally alias these labels: %s", join(surplus)) + } + return ok +} + +// underlying returns the underlying type of typ. Copied from go/types. +func underlyingType(typ types.Type) types.Type { + if typ, ok := typ.(*types.Named); ok { + return typ.Underlying() // underlying types are never NamedTypes + } + if typ == nil { + panic("underlying(nil)") + } + return typ +} + +func checkConcreteExpectation(e *expectation, pr *probe) bool { + var expected typemap.M + var surplus typemap.M + exact := true + for _, g := range e.types { + if g == types.Typ[types.Invalid] { + exact = false + continue + } + expected.Set(g, struct{}{}) + } + + switch t := underlyingType(pr.instr.Args[0].Type()).(type) { + case *types.Interface: + // ok + default: + e.errorf("@concrete expectation requires an interface-typed operand, got %s", t) + return false + } + + // Find the set of concrete types that the probe's + // argument (x in print(x)) may contain. + for _, conc := range pr.arg0.PointsTo().ConcreteTypes().Keys() { + if expected.At(conc) != nil { + expected.Delete(conc) + } else if exact { + surplus.Set(conc, struct{}{}) + } + } + // Report set difference: + ok := true + if expected.Len() > 0 { + ok = false + e.errorf("interface cannot contain these concrete types: %s", expected.KeysString()) + } + if surplus.Len() > 0 { + ok = false + e.errorf("interface may additionally contain these concrete types: %s", surplus.KeysString()) + } + return ok + return false +} + +func checkCallsExpectation(prog *ssa.Program, e *expectation, callgraph pointer.CallGraph) bool { + // TODO(adonovan): this is inefficient and not robust against + // typos. Better to convert strings to *Functions during + // expectation parsing (somehow). + for caller, callees := range callgraph { + if caller.Func().String() == e.args[0] { + found := make(map[string]struct{}) + for callee := range callees { + s := callee.Func().String() + found[s] = struct{}{} + if s == e.args[1] { + return true // expectation satisfied + } + } + e.errorf("found no call from %s to %s, but only to %s", + e.args[0], e.args[1], join(found)) + return false + } + } + e.errorf("didn't find any calls from %s", e.args[0]) + return false +} + +func checkWarningExpectation(prog *ssa.Program, e *expectation, warnings []string) bool { + // TODO(adonovan): check the position part of the warning too? + re, err := regexp.Compile(e.args[0]) + if err != nil { + e.errorf("invalid regular expression in @warning expectation: %s", err.Error()) + return false + } + + if len(warnings) == 0 { + e.errorf("@warning %s expectation, but no warnings", strconv.Quote(e.args[0])) + return false + } + + for _, warning := range warnings { + if re.MatchString(warning) { + return true + } + } + + e.errorf("@warning %s expectation not satised; found these warnings though:", strconv.Quote(e.args[0])) + for _, warning := range warnings { + fmt.Println("\t", warning) + } + return false +} + +func TestInput(t *testing.T) { + ok := true + + wd, err := os.Getwd() + if err != nil { + t.Errorf("os.Getwd: %s", err.Error()) + return + } + + // 'go test' does a chdir so that relative paths in + // diagnostics no longer make sense relative to the invoking + // shell's cwd. We print a special marker so that Emacs can + // make sense of them. + fmt.Fprintf(os.Stderr, "Entering directory `%s'\n", wd) + + for _, filename := range inputs { + content, err := ioutil.ReadFile(filename) + if err != nil { + t.Errorf("couldn't read file '%s': %s", filename, err.Error()) + continue + } + + if !doOneInput(string(content), filename) { + ok = false + } + } + if !ok { + t.Fail() + } +} + +// join joins the elements of set with " | "s. +func join(set map[string]struct{}) string { + var buf bytes.Buffer + sep := "" + for name := range set { + buf.WriteString(sep) + sep = " | " + buf.WriteString(name) + } + return buf.String() +} + +// split returns the list of sep-delimited non-empty strings in s. +func split(s, sep string) (r []string) { + for _, elem := range strings.Split(s, sep) { + elem = strings.TrimSpace(elem) + if elem != "" { + r = append(r, elem) + } + } + return +} diff --git a/pointer/print.go b/pointer/print.go new file mode 100644 index 00000000..78d45ca1 --- /dev/null +++ b/pointer/print.go @@ -0,0 +1,35 @@ +package pointer + +import "fmt" + +func (c *addrConstraint) String() string { + return fmt.Sprintf("addr n%d <- {&n%d}", c.dst, c.src) +} + +func (c *copyConstraint) String() string { + return fmt.Sprintf("copy n%d <- n%d", c.dst, c.src) +} + +func (c *loadConstraint) String() string { + return fmt.Sprintf("load n%d <- n%d[%d]", c.dst, c.src, c.offset) +} + +func (c *storeConstraint) String() string { + return fmt.Sprintf("store n%d[%d] <- n%d", c.dst, c.offset, c.src) +} + +func (c *offsetAddrConstraint) String() string { + return fmt.Sprintf("offsetAddr n%d <- n%d.#%d", c.dst, c.src, c.offset) +} + +func (c *typeAssertConstraint) String() string { + return fmt.Sprintf("typeAssert n%d <- n%d.(%s)", c.dst, c.src, c.typ) +} + +func (c *invokeConstraint) String() string { + return fmt.Sprintf("invoke n%d.%s(n%d ...)", c.iface, c.method.Name(), c.params+1) +} + +func (n nodeid) String() string { + return fmt.Sprintf("n%d", n) +} diff --git a/pointer/solve.go b/pointer/solve.go new file mode 100644 index 00000000..34916ca1 --- /dev/null +++ b/pointer/solve.go @@ -0,0 +1,262 @@ +package pointer + +// This file defines a naive Andersen-style solver for the inclusion +// constraint system. + +import ( + "fmt" + + "code.google.com/p/go.tools/go/types" +) + +func (a *analysis) solve() { + // Initialize points-to sets and complex constraint sets. + for _, c := range a.constraints { + c.init(a) + } + a.constraints = nil // aid GC + + work := a.work + + // Now we've initialized all constraints, we populate the + // worklist with nodes that point to something initially (due + // to addrConstraints) and have other constraints attached. + for id, n := range a.nodes { + if len(n.pts) > 0 && (n.copyTo != nil || n.complex != nil) { + if a.log != nil { + fmt.Fprintf(a.log, "Adding to worklist n%d\n", id) + } + a.addWork(nodeid(id)) + } + } + work.swap() + + // Solver main loop. + for round := 1; ; round++ { + if work.swap() { + if a.log != nil { + fmt.Fprintf(a.log, "Solving, round %d\n", round) + } + + // Next iteration. + if work.empty() { + break // done + } + } + + id := work.take() + n := a.nodes[id] + + if a.log != nil { + fmt.Fprintf(a.log, "\tnode n%d\n", id) + } + + // Difference propagation. + delta := n.pts.diff(n.prevPts) + if delta == nil { + continue + } + n.prevPts = n.pts.clone() + + // Process complex constraints dependent on n. + for c := range n.complex { + if a.log != nil { + fmt.Fprintf(a.log, "\t\tconstraint %s\n", c) + } + c.solve(a, n, delta) + } + + // Process copy constraints. + var copySeen nodeset + for mid := range n.copyTo { + if copySeen.add(mid) { + if a.nodes[mid].pts.addAll(delta) { + a.addWork(mid) + } + } + } + + if a.log != nil { + fmt.Fprintf(a.log, "\t\tpts(n%d) = %s\n", id, n.pts) + } + } + + if a.log != nil { + fmt.Fprintf(a.log, "Solver done\n") + } +} + +func (a *analysis) addWork(id nodeid) { + a.work.add(id) + if a.log != nil { + fmt.Fprintf(a.log, "\t\twork: n%d\n", id) + } +} + +func (c *addrConstraint) init(a *analysis) { + a.nodes[c.dst].pts.add(c.src) +} +func (c *copyConstraint) init(a *analysis) { + a.nodes[c.src].copyTo.add(c.dst) +} + +// Complex constraints attach themselves to the relevant pointer node. + +func (c *storeConstraint) init(a *analysis) { + a.nodes[c.dst].complex.add(c) +} +func (c *loadConstraint) init(a *analysis) { + a.nodes[c.src].complex.add(c) +} +func (c *offsetAddrConstraint) init(a *analysis) { + a.nodes[c.src].complex.add(c) +} +func (c *typeAssertConstraint) init(a *analysis) { + a.nodes[c.src].complex.add(c) +} +func (c *invokeConstraint) init(a *analysis) { + a.nodes[c.iface].complex.add(c) +} + +// onlineCopy adds a copy edge. It is called online, i.e. during +// solving, so it adds edges and pts members directly rather than by +// instantiating a 'constraint'. +// +// The size of the copy is implicitly 1. +// It returns true if pts(dst) changed. +// +func (a *analysis) onlineCopy(dst, src nodeid) bool { + if dst != src { + if nsrc := a.nodes[src]; nsrc.copyTo.add(dst) { + if a.log != nil { + fmt.Fprintf(a.log, "\t\t\tdynamic copy n%d <- n%d\n", dst, src) + } + return a.nodes[dst].pts.addAll(nsrc.pts) + } + } + return false +} + +// Returns sizeof. +// Implicitly adds nodes to worklist. +func (a *analysis) onlineCopyN(dst, src nodeid, sizeof uint32) uint32 { + for i := uint32(0); i < sizeof; i++ { + if a.onlineCopy(dst, src) { + a.addWork(dst) + } + src++ + dst++ + } + return sizeof +} + +func (c *loadConstraint) solve(a *analysis, n *node, delta nodeset) { + var changed bool + for k := range delta { + koff := k + nodeid(c.offset) + if a.onlineCopy(c.dst, koff) { + changed = true + } + } + if changed { + a.addWork(c.dst) + } +} + +func (c *storeConstraint) solve(a *analysis, n *node, delta nodeset) { + for k := range delta { + koff := k + nodeid(c.offset) + if a.onlineCopy(koff, c.src) { + a.addWork(koff) + } + } +} + +func (c *offsetAddrConstraint) solve(a *analysis, n *node, delta nodeset) { + dst := a.nodes[c.dst] + for k := range delta { + if dst.pts.add(k + nodeid(c.offset)) { + a.addWork(c.dst) + } + } +} + +func (c *typeAssertConstraint) solve(a *analysis, n *node, delta nodeset) { + tIface, _ := c.typ.Underlying().(*types.Interface) + + for ifaceObj := range delta { + ifaceValue, tConc := a.interfaceValue(ifaceObj) + + if tIface != nil { + if types.IsAssignableTo(tConc, tIface) { + if a.nodes[c.dst].pts.add(ifaceObj) { + a.addWork(c.dst) + } + } + } else { + if types.IsIdentical(tConc, c.typ) { + // Copy entire payload to dst. + // + // TODO(adonovan): opt: if tConc is + // nonpointerlike we can skip this + // entire constraint, perhaps. We + // only care about pointers among the + // fields. + a.onlineCopyN(c.dst, ifaceValue, a.sizeof(tConc)) + } + } + } +} + +func (c *invokeConstraint) solve(a *analysis, n *node, delta nodeset) { + for ifaceObj := range delta { + ifaceValue, tConc := a.interfaceValue(ifaceObj) + + // Look up the concrete method. + meth := tConc.MethodSet().Lookup(c.method.Pkg(), c.method.Name()) + if meth == nil { + panic(fmt.Sprintf("n%d: type %s has no method %s (iface=n%d)", + c.iface, tConc, c.method, ifaceObj)) + } + fn := a.prog.Method(meth) + if fn == nil { + panic(fmt.Sprintf("n%d: no ssa.Function for %s", c.iface, meth)) + } + sig := fn.Signature + + fnObj := a.funcObj[fn] + + // Make callsite's fn variable point to identity of + // concrete method. (There's no need to add it to + // worklist since it never has attached constraints.) + a.nodes[c.params].pts.add(fnObj) + + // Extract value and connect to method's receiver. + // Copy payload to method's receiver param (arg0). + arg0 := a.funcParams(fnObj) + recvSize := a.sizeof(sig.Recv().Type()) + a.onlineCopyN(arg0, ifaceValue, recvSize) + + // Copy iface object payload to method receiver. + src := c.params + 1 // skip past identity + dst := arg0 + nodeid(recvSize) + + // Copy caller's argument block to method formal parameters. + paramsSize := a.sizeof(sig.Params()) + a.onlineCopyN(dst, src, paramsSize) + src += nodeid(paramsSize) + dst += nodeid(paramsSize) + + // Copy method results to caller's result block. + resultsSize := a.sizeof(sig.Results()) + a.onlineCopyN(src, dst, resultsSize) + } +} + +func (c *addrConstraint) solve(a *analysis, n *node, delta nodeset) { + panic("addr is not a complex constraint") +} + +func (c *copyConstraint) solve(a *analysis, n *node, delta nodeset) { + panic("copy is not a complex constraint") +} diff --git a/pointer/testdata/a_test.go b/pointer/testdata/a_test.go new file mode 100644 index 00000000..3baa9ac7 --- /dev/null +++ b/pointer/testdata/a_test.go @@ -0,0 +1,42 @@ +// +build ignore + +package a + +// This test exercises the synthesis of testmain packages for tests. +// The test framework doesn't directly let us perform negative +// assertions (i.e. that TestingQuux isn't called, or that its +// parameter's PTS is empty) so this test is rather roundabout. + +import "testing" + +func log(f func(*testing.T)) { + // The PTS of f is the set of called tests. TestingQuux is not present. + print(f) // @pointsto main.Test | main.TestFoo +} + +func Test(t *testing.T) { + // Don't assert @pointsto(t) since its label contains a fragile line number. + log(Test) +} + +func TestFoo(t *testing.T) { + // Don't assert @pointsto(t) since its label contains a fragile line number. + log(TestFoo) +} + +func TestingQuux(t *testing.T) { + // We can't assert @pointsto(t) since this is dead code. + log(TestingQuux) +} + +func BenchmarkFoo(b *testing.B) { +} + +func ExampleBar() { +} + +// Excludes TestingQuux. +// @calls testing.tRunner -> main.Test +// @calls testing.tRunner -> main.TestFoo +// @calls testing.runExample -> main.ExampleBar +// @calls (*testing.B).runN -> main.BenchmarkFoo diff --git a/pointer/testdata/another.go b/pointer/testdata/another.go new file mode 100644 index 00000000..90d4cd2a --- /dev/null +++ b/pointer/testdata/another.go @@ -0,0 +1,35 @@ +// +build ignore + +package main + +var unknown bool + +type S string + +func incr(x int) int { return x + 1 } + +func main() { + var i interface{} + i = 1 + if unknown { + i = S("foo") + } + if unknown { + i = (func(int, int))(nil) // NB type compares equal to that below. + } + // Look, the test harness can handle equal-but-not-String-equal + // types because we parse types and using a typemap. + if unknown { + i = (func(x int, y int))(nil) + } + if unknown { + i = incr + } + print(i) // @concrete int | S | func(int, int) | func(int) int + + // NB, an interface may never directly alias any global + // labels, even though it may contain pointers that do. + print(i) // @pointsto makeinterface:func(x int) int | makeinterface:func(x int, y int) | makeinterface:func(int, int) | makeinterface:int | makeinterface:main.S + print(i.(func(int) int)) // @pointsto main.incr + print(i.(S)) // @pointsto +} diff --git a/pointer/testdata/arrayreflect.go b/pointer/testdata/arrayreflect.go new file mode 100644 index 00000000..06849d92 --- /dev/null +++ b/pointer/testdata/arrayreflect.go @@ -0,0 +1,108 @@ +// +build ignore + +package main + +import "reflect" + +// Test of arrays & slices with reflection. + +var a int + +func arrayreflect1() { + sl := make([]*int, 10) // @line ar1make + sl[0] = &a + + srv := reflect.ValueOf(sl).Slice(0, 0) + print(srv.Interface()) // @concrete []*int + print(srv.Interface().([]*int)) // @pointsto makeslice@ar1make:12 + print(srv.Interface().([]*int)[42]) // @pointsto main.a +} + +func arrayreflect2() { + var arr [10]*int + sl := arr[:] + sl[0] = &a + + srv := reflect.ValueOf(sl).Slice(0, 0) + print(srv.Interface()) // @concrete []*int + print(srv.Interface().([]*int)) // pointsto TODO + print(srv.Interface().([]*int)[42]) // @pointsto main.a +} + +func arrayreflect3() { + srv := reflect.ValueOf("hi").Slice(0, 0) + print(srv.Interface()) // @concrete string + + type S string + srv2 := reflect.ValueOf(S("hi")).Slice(0, 0) + print(srv2.Interface()) // @concrete main.S +} + +func arrayreflect4() { + rv1 := reflect.ValueOf("hi") + rv2 := rv1 // backflow! + if unknown { + rv2 = reflect.ValueOf(123) + } + // We see backflow through the assignment above causing an + // imprecise result for rv1. This is because the SSA builder + // doesn't yet lift structs (like reflect.Value) into + // registers so these are all loads/stores to the stack. + // Under Das's algorithm, the extra indirection results in + // (undirected) unification not (directed) flow edges. + // TODO(adonovan): precision: lift aggregates. + print(rv1.Interface()) // @concrete string | int + print(rv2.Interface()) // @concrete string | int +} + +func arrayreflect5() { + sl1 := make([]byte, 0) + sl2 := make([]byte, 0) + + srv := reflect.ValueOf(sl1) + + print(srv.Interface()) // @concrete []byte + print(srv.Interface().([]byte)) // @pointsto makeslice@testdata/arrayreflect.go:62:13 + print(srv.Bytes()) // @pointsto makeslice@testdata/arrayreflect.go:62:13 + + srv2 := reflect.ValueOf(123) + srv2.SetBytes(sl2) + print(srv2.Interface()) // @concrete []byte | int + print(srv2.Interface().([]byte)) // @pointsto makeslice@testdata/arrayreflect.go:63:13 + print(srv2.Bytes()) // @pointsto makeslice@testdata/arrayreflect.go:63:13 +} + +func arrayreflect6() { + sl1 := []*bool{new(bool)} + sl2 := []*int{&a} + + srv1 := reflect.ValueOf(sl1) + print(srv1.Index(42).Interface()) // @concrete *bool + print(srv1.Index(42).Interface().(*bool)) // @pointsto alloc@testdata/arrayreflect.go:79:20 + + srv2 := reflect.ValueOf(sl2) + print(srv2.Index(42).Interface()) // @concrete *int + print(srv2.Index(42).Interface().(*int)) // @pointsto main.a + + p1 := &sl1[0] + p2 := &sl2[0] + + prv1 := reflect.ValueOf(p1) + print(prv1.Elem().Interface()) // @concrete *bool + print(prv1.Elem().Interface().(*bool)) // @pointsto alloc@testdata/arrayreflect.go:79:20 + + prv2 := reflect.ValueOf(p2) + print(prv2.Elem().Interface()) // @concrete *int + print(prv2.Elem().Interface().(*int)) // @pointsto main.a +} + +func main() { + arrayreflect1() + arrayreflect2() + arrayreflect3() + arrayreflect4() + arrayreflect5() + arrayreflect6() +} + +var unknown bool diff --git a/pointer/testdata/arrays.go b/pointer/testdata/arrays.go new file mode 100644 index 00000000..ae0ae39d --- /dev/null +++ b/pointer/testdata/arrays.go @@ -0,0 +1,97 @@ +// +build ignore + +package main + +var unknown bool // defeat dead-code elimination + +var a, b int + +func array1() { + sliceA := make([]*int, 10) // @line a1make + sliceA[0] = &a + + var sliceB []*int + sliceB = append(sliceB, &b) // @line a1append + + print(sliceA) // @pointsto makeslice@a1make:16 + print(sliceA[0]) // @pointsto main.a + + print(sliceB) // @pointsto append@a1append:17 + print(sliceB[100]) // @pointsto main.b +} + +func array2() { + sliceA := make([]*int, 10) // @line a2make + sliceA[0] = &a + + sliceB := sliceA[:] + + print(sliceA) // @pointsto makeslice@a2make:16 + print(sliceA[0]) // @pointsto main.a + + print(sliceB) // @pointsto makeslice@a2make:16 + print(sliceB[0]) // @pointsto main.a +} + +func array3() { + a := []interface{}{"", 1} + b := []interface{}{true, func() {}} + print(a[0]) // @concrete string | int + print(b[0]) // @concrete bool | func() +} + +// Test of append, copy, slice. +func array4() { + var s2 struct { // @line a4L0 + a [3]int + b struct{ c, d int } + } + var sl1 = make([]*int, 10) // @line a4make + var someint int // @line a4L1 + sl1[1] = &someint + sl2 := append(sl1, &s2.a[1]) // @line a4append1 + print(sl1) // @pointsto makeslice@a4make:16 + print(sl2) // @pointsto append@a4append1:15 | makeslice@a4make:16 + print(sl1[0]) // @pointsto someint@a4L1:6 | s2.a[*]@a4L0:6 + print(sl2[0]) // @pointsto someint@a4L1:6 | s2.a[*]@a4L0:6 + + // In z=append(x,y) we should observe flow from y[*] to x[*]. + var sl3 = make([]*int, 10) // @line a4L2 + _ = append(sl3, &s2.a[1]) + print(sl3) // @pointsto makeslice@a4L2:16 + print(sl3[0]) // @pointsto s2.a[*]@a4L0:6 + + var sl4 = []*int{&a} // @line a4L3 + sl4a := append(sl4) // @line a4L4 + print(sl4a) // @pointsto slicelit@a4L3:18 | append@a4L4:16 + print(&sl4a[0]) // @pointsto slicelit[*]@a4L3:18 | append[*]@a4L4:16 + print(sl4a[0]) // @pointsto main.a + + var sl5 = []*int{&b} // @line a4L5 + copy(sl5, sl4) + print(sl5) // @pointsto slicelit@a4L5:18 + print(&sl5[0]) // @pointsto slicelit[*]@a4L5:18 + print(sl5[0]) // @pointsto main.b | main.a + + var sl6 = sl5[:0] + print(sl6) // @pointsto slicelit@a4L5:18 + print(&sl6[0]) // @pointsto slicelit[*]@a4L5:18 + print(sl6[0]) // @pointsto main.b | main.a +} + +func array5() { + var arr [2]*int + arr[0] = &a + arr[1] = &b + + var n int + print(arr[n]) // @pointsto main.a | main.b +} + +func main() { + array1() + array2() + array3() + array4() + array5() +} diff --git a/pointer/testdata/channels.go b/pointer/testdata/channels.go new file mode 100644 index 00000000..b7962558 --- /dev/null +++ b/pointer/testdata/channels.go @@ -0,0 +1,91 @@ +// +build ignore + +package main + +func incr(x int) int { return x + 1 } + +func decr(x int) int { return x - 1 } + +var unknown bool // defeat dead-code elimination + +func chan1() { + chA := make(chan func(int) int, 0) // @line c1makeA + chB := make(chan func(int) int, 0) // @line c1makeB + chA <- incr + chB <- decr + chB <- func(int) int { return 1 } + + print(chA) // @pointsto makechan@c1makeA:13 + print(<-chA) // @pointsto main.incr + + print(chB) // @pointsto makechan@c1makeB:13 + print(<-chB) // @pointsto main.decr | func@16.9 +} + +func chan2() { + chA := make(chan func(int) int, 0) // @line c2makeA + chB := make(chan func(int) int, 0) // @line c2makeB + chA <- incr + chB <- decr + chB <- func(int) int { return 1 } + + // Channels flow together. + // Labelsets remain distinct but elements are merged. + chAB := chA + if unknown { + chAB = chB + } + + print(chA) // @pointsto makechan@c2makeA:13 + print(<-chA) // @pointsto main.incr + + print(chB) // @pointsto makechan@c2makeB:13 + print(<-chB) // @pointsto main.decr | func@30.9 + + print(chAB) // @pointsto makechan@c2makeA:13 | makechan@c2makeB:13 + print(<-chAB) // @pointsto main.incr | main.decr | func@30.9 + + (<-chA)(3) +} + +// @calls main.chan2 -> main.incr + +func chan3() { + chA := make(chan func(int) int, 0) // @line c3makeA + chB := make(chan func(int) int, 0) // @line c3makeB + chA <- incr + chB <- decr + chB <- func(int) int { return 1 } + print(chA) // @pointsto makechan@c3makeA:13 + print(<-chA) // @pointsto main.incr + print(chB) // @pointsto makechan@c3makeB:13 + print(<-chB) // @pointsto main.decr | func@58.9 + + (<-chA)(3) +} + +// @calls main.chan3 -> main.incr + +func chan4() { + chA := make(chan func(int) int, 0) // @line c4makeA + chB := make(chan func(int) int, 0) // @line c4makeB + + select { + case chA <- incr: + case chB <- decr: + case a := <-chA: + print(a) // @pointsto main.incr + case b := <-chB: + print(b) // @pointsto main.decr + default: + print(chA) // @pointsto makechan@c4makeA:13 + print(chB) // @pointsto makechan@c4makeB:13 + } +} + +func main() { + chan1() + chan2() + chan3() + chan4() +} diff --git a/pointer/testdata/chanreflect.go b/pointer/testdata/chanreflect.go new file mode 100644 index 00000000..8e662cd0 --- /dev/null +++ b/pointer/testdata/chanreflect.go @@ -0,0 +1,75 @@ +// +build ignore + +package main + +import "reflect" + +// +// This test is very sensitive to line-number perturbations! + +// Test of channels with reflection. + +var a, b int + +func chanreflect1() { + ch := make(chan *int, 0) + crv := reflect.ValueOf(ch) + crv.Send(reflect.ValueOf(&a)) + print(crv.Interface()) // @concrete chan *int + print(crv.Interface().(chan *int)) // @pointsto makechan@testdata/chanreflect.go:15:12 + print(<-ch) // @pointsto main.a +} + +func chanreflect2() { + ch := make(chan *int, 0) + ch <- &b + crv := reflect.ValueOf(ch) + r, _ := crv.Recv() + print(r.Interface()) // @concrete *int + print(r.Interface().(*int)) // @pointsto main.b +} + +func chanOfRecv() { + // MakeChan(<-chan) is a no-op. + t := reflect.ChanOf(reflect.RecvDir, reflect.TypeOf(&a)) + print(reflect.Zero(t).Interface()) // @concrete <-chan *int + print(reflect.MakeChan(t, 0).Interface().(<-chan *int)) // @pointsto +} + +func chanOfSend() { + // MakeChan(chan<-) is a no-op. + t := reflect.ChanOf(reflect.SendDir, reflect.TypeOf(&a)) + print(reflect.Zero(t).Interface()) // @concrete chan<- *int + print(reflect.MakeChan(t, 0).Interface().(chan<- *int)) // @pointsto +} + +func chanOfBoth() { + t := reflect.ChanOf(reflect.BothDir, reflect.TypeOf(&a)) + print(reflect.Zero(t).Interface()) // @concrete chan *int + ch := reflect.MakeChan(t, 0) + print(ch.Interface().(chan *int)) // @pointsto reflectMakechan@testdata/chanreflect.go:49:24 + ch.Send(reflect.ValueOf(&b)) + ch.Interface().(chan *int) <- &a + r, _ := ch.Recv() + print(r.Interface().(*int)) // @pointsto main.a | main.b + print(<-ch.Interface().(chan *int)) // @pointsto main.a | main.b +} + +var unknownDir reflect.ChanDir // not a constant + +func chanOfUnknown() { + // Unknown channel direction: assume all three. + // MakeChan only works on the bi-di channel type. + t := reflect.ChanOf(unknownDir, reflect.TypeOf(&a)) + print(reflect.Zero(t).Interface()) // @concrete <-chan *int | chan<- *int | chan *int + print(reflect.MakeChan(t, 0).Interface()) // @concrete chan *int +} + +func main() { + chanreflect1() + chanreflect2() + chanOfRecv() + chanOfSend() + chanOfBoth() + chanOfUnknown() +} diff --git a/pointer/testdata/chanreflect1.go b/pointer/testdata/chanreflect1.go new file mode 100644 index 00000000..4be85244 --- /dev/null +++ b/pointer/testdata/chanreflect1.go @@ -0,0 +1,35 @@ +// +build ignore + +package main + +import "reflect" + +// +// This test is very sensitive to line-number perturbations! + +// Test of channels with reflection. + +var a, b int + +func chanreflect1() { + ch := make(chan *int, 0) + crv := reflect.ValueOf(ch) + crv.Send(reflect.ValueOf(&a)) + print(crv.Interface()) // @concrete chan *int + print(crv.Interface().(chan *int)) // @pointsto makechan@testdata/chanreflect.go:15:12 + print(<-ch) // @pointsto main.a +} + +func chanreflect2() { + ch := make(chan *int, 0) + ch <- &b + crv := reflect.ValueOf(ch) + r, _ := crv.Recv() + print(r.Interface()) // @concrete *int + print(r.Interface().(*int)) // @pointsto main.b +} + +func main() { + chanreflect1() + chanreflect2() +} diff --git a/pointer/testdata/context.go b/pointer/testdata/context.go new file mode 100644 index 00000000..ed616e7e --- /dev/null +++ b/pointer/testdata/context.go @@ -0,0 +1,48 @@ +// +build ignore + +package main + +// Test of context-sensitive treatment of certain function calls, +// e.g. static calls to simple accessor methods. + +var a, b int + +type T struct{ x *int } + +func (t *T) SetX(x *int) { t.x = x } +func (t *T) GetX() *int { return t.x } + +func context1() { + var t1, t2 T + t1.SetX(&a) + t2.SetX(&b) + print(t1.GetX()) // @pointsto main.a + print(t2.GetX()) // @pointsto main.b +} + +func context2() { + id := func(x *int) *int { + print(x) // @pointsto main.a | main.b + return x + } + print(id(&a)) // @pointsto main.a + print(id(&b)) // @pointsto main.b + + // Same again, but anon func has free vars. + var c int // @line context2c + id2 := func(x *int) (*int, *int) { + print(x) // @pointsto main.a | main.b + return x, &c + } + p, q := id2(&a) + print(p) // @pointsto main.a + print(q) // @pointsto c@context2c:6 + r, s := id2(&b) + print(r) // @pointsto main.b + print(s) // @pointsto c@context2c:6 +} + +func main() { + context1() + context2() +} diff --git a/pointer/testdata/conv.go b/pointer/testdata/conv.go new file mode 100644 index 00000000..49cc8cad --- /dev/null +++ b/pointer/testdata/conv.go @@ -0,0 +1,61 @@ +// +build ignore + +package main + +import "unsafe" + +var a int + +func conv1() { + // Conversions of channel direction. + ch := make(chan int) // @line c1make + print((<-chan int)(ch)) // @pointsto makechan@c1make:12 + print((chan<- int)(ch)) // @pointsto makechan@c1make:12 +} + +func conv2() { + // []byte/[]rune literal + print([]byte("foo")) // @pointsto "foo":[]byte + print([]rune("bar")) // @pointsto "bar":[]rune + + // string -> []byte/[]rune conversion + s := "foo" + ba := []byte(s) // @line c2ba + ra := []rune(s) // @line c2ra + print(ba) // @pointsto convert@c2ba:14 + print(ra) // @pointsto convert@c2ra:14 +} + +func conv3() { + // Conversion of same underlying types. + type PI *int + pi := PI(&a) + print(pi) // @pointsto main.a + + pint := (*int)(pi) + print(pint) // @pointsto main.a + + // Conversions between pointers to identical base types. + var y *PI = &pi + var x **int = (**int)(y) + print(*x) // @pointsto main.a + print(*y) // @pointsto main.a + y = (*PI)(x) + print(*y) // @pointsto main.a +} + +// @warning "main.conv4 contains an unsafe.Pointer conversion" +func conv4() { + // Handling of unsafe.Pointer conversion is unsound: + // we lose the alias to main.a and get something like new(int) instead. + // We require users to provide aliasing summaries. + p := (*int)(unsafe.Pointer(&a)) // @line c2p + print(p) // @pointsto convert@c2p:13 +} + +func main() { + conv1() + conv2() + conv3() + conv4() +} diff --git a/pointer/testdata/finalizer.go b/pointer/testdata/finalizer.go new file mode 100644 index 00000000..c2709b5f --- /dev/null +++ b/pointer/testdata/finalizer.go @@ -0,0 +1,70 @@ +package main + +import "runtime" + +func final1a(x *int) int { + print(x) // @pointsto alloc@newint:10 + return *x +} + +func final1b(x *bool) { + print(x) // @pointsto +} + +func setfinalizer1() { + x := new(int) // @line newint + runtime.SetFinalizer(x, final1a) // ok: final1a's result is ignored + runtime.SetFinalizer(x, final1b) // param type mismatch: no effect +} + +// @calls runtime.SetFinalizer -> main.final1a +// @calls main.setfinalizer1 -> runtime.SetFinalizer + +func final2a(x *bool) { + print(x) // @pointsto alloc@newbool1:10 | alloc@newbool2:10 +} + +func final2b(x *bool) { + print(x) // @pointsto alloc@newbool1:10 | alloc@newbool2:10 +} + +func setfinalizer2() { + x := new(bool) // @line newbool1 + f := final2a + if unknown { + x = new(bool) // @line newbool2 + f = final2b + } + runtime.SetFinalizer(x, f) +} + +// @calls runtime.SetFinalizer -> main.final2a +// @calls runtime.SetFinalizer -> main.final2b +// @calls main.setfinalizer2 -> runtime.SetFinalizer + +// type T int + +// func (t *T) finalize() { +// print(t) // #@pointsto x +// } + +// func setfinalizer3() { +// x := new(T) +// runtime.SetFinalizer(x, (*T).finalize) // go/types gives wrong type to f. +// } + +// #@calls runtime.SetFinalizer -> (*T) finalize + +func funcForPC() { + f := runtime.FuncForPC(0) // @line funcforpc + print(f) // @pointsto reflectAlloc@funcforpc:25 +} + +func main() { + setfinalizer1() + setfinalizer2() + // setfinalizer3() + funcForPC() +} + +var unknown bool // defeat dead-code elimination diff --git a/pointer/testdata/flow.go b/pointer/testdata/flow.go new file mode 100644 index 00000000..06faff57 --- /dev/null +++ b/pointer/testdata/flow.go @@ -0,0 +1,63 @@ +// +build ignore + +package main + +// Demonstration of directionality of flow edges. + +func f1() {} +func f2() {} + +var somepred bool + +// Tracking functions. +func flow1() { + s := f1 + p := f2 + q := p + r := q + if somepred { + r = s + } + print(s) // @pointsto main.f1 + print(p) // @pointsto main.f2 + print(q) // @pointsto main.f2 + print(r) // @pointsto main.f1 | main.f2 +} + +// Tracking concrete types in interfaces. +func flow2() { + var s interface{} = 1 + var p interface{} = "foo" + q := p + r := q + if somepred { + r = s + } + print(s) // @concrete int + print(p) // @concrete string + print(q) // @concrete string + print(r) // @concrete int | string +} + +var g1, g2 int + +// Tracking addresses of globals. +func flow3() { + s := &g1 + p := &g2 + q := p + r := q + if somepred { + r = s + } + print(s) // @pointsto main.g1 + print(p) // @pointsto main.g2 + print(q) // @pointsto main.g2 + print(r) // @pointsto main.g2 | main.g1 +} + +func main() { + flow1() + flow2() + flow3() +} diff --git a/pointer/testdata/fmtexcerpt.go b/pointer/testdata/fmtexcerpt.go new file mode 100644 index 00000000..d738ffb0 --- /dev/null +++ b/pointer/testdata/fmtexcerpt.go @@ -0,0 +1,42 @@ +// +build ignore + +// This is a slice of the fmt package. + +package main + +type pp struct { + field interface{} +} + +func newPrinter() *pp { + return new(pp) +} + +func Fprintln(a ...interface{}) { + p := newPrinter() + p.doPrint(a, true, true) +} + +func Println(a ...interface{}) { + Fprintln(a...) +} + +func (p *pp) doPrint(a []interface{}, addspace, addnewline bool) { + print(a[0]) // @concrete S | string + stringer := a[0].(interface { + String() string + }) + + stringer.String() + print(stringer) // @concrete S +} + +type S int + +func (S) String() string { return "" } + +func main() { + Println("Hello, World!", S(0)) +} + +// @calls (*main.pp).doPrint -> (main.S).String diff --git a/pointer/testdata/func.go b/pointer/testdata/func.go new file mode 100644 index 00000000..ec8883ba --- /dev/null +++ b/pointer/testdata/func.go @@ -0,0 +1,165 @@ +// +build ignore + +package main + +var a, b, c int + +var unknown bool // defeat dead-code elimination + +func func1() { + var h int // @line f1h + f := func(x *int) *int { + if unknown { + return &b + } + return x + } + + // FV(g) = {f, h} + g := func(x *int) *int { + if unknown { + return &h + } + return f(x) + } + + print(g(&a)) // @pointsto main.a | main.b | h@f1h:6 + print(f(&a)) // @pointsto main.a | main.b + print(&a) // @pointsto main.a +} + +// @calls main.func1 -> func@19.7 +// @calls main.func1 -> func@11.7 +// @calls func@19.7 -> func@11.7 + +func func2() { + var x, y *int + defer func() { + x = &a + }() + go func() { + y = &b + }() + print(x) // @pointsto main.a + print(y) // @pointsto main.b +} + +func func3() { + x, y := func() (x, y *int) { + x = &a + y = &b + if unknown { + return nil, &c + } + return + }() + print(x) // @pointsto main.a + print(y) // @pointsto main.b | main.c +} + +func swap(x, y *int) (*int, *int) { // @line swap + print(&x) // @pointsto x@swap:11 + print(x) // @pointsto makeslice[*]@func4make:11 + print(&y) // @pointsto y@swap:14 + print(y) // @pointsto j@f4j:5 + return y, x +} + +func func4() { + a := make([]int, 10) // @line func4make + i, j := 123, 456 // @line f4j + p, q := swap(&a[3], &j) + print(p) // @pointsto j@f4j:5 + print(q) // @pointsto makeslice[*]@func4make:11 + + f := &b + print(f) // @pointsto main.b +} + +type T int + +func (t *T) f(x *int) *int { + print(t) // @pointsto main.a + print(x) // @pointsto main.c + return &b +} + +func (t *T) g(x *int) *int { + print(t) // @pointsto main.a + print(x) // @pointsto main.b + return &c +} + +func (t *T) h(x *int) *int { + print(t) // @pointsto main.a + print(x) // @pointsto main.b + return &c +} + +var h func(*T, *int) *int + +func func5() { + // Static call of method. + t := (*T)(&a) + print(t.f(&c)) // @pointsto main.b + + // Static call of method as function + print((*T).g(t, &b)) // @pointsto main.c + + // Dynamic call (not invoke) of method. + h = (*T).h + print(h(t, &b)) // @pointsto main.c +} + +// @calls main.func5 -> (*main.T).f +// @calls main.func5 -> (*main.T).g +// @calls main.func5 -> (*main.T).h + +func func6() { + A := &a + f := func() *int { + return A // (free variable) + } + print(f()) // @pointsto main.a +} + +// @calls main.func6 -> func@120.7 + +type I interface { + f() +} + +type D struct{} + +func (D) f() {} + +func func7() { + var i I = D{} + imethodClosure := i.f + imethodClosure() + // @calls main.func7 -> bound$(main.I).f + // @calls bound$(main.I).f -> (main.D).f + + var d D + cmethodClosure := d.f + cmethodClosure() + // @calls main.func7 -> bound$(main.D).f + // @calls bound$(main.D).f ->(main.D).f + + methodExpr := D.f + methodExpr(d) + // @calls main.func7 -> (main.D).f +} + +func main() { + func1() + func2() + func3() + func4() + func5() + func6() + func7() +} + +// @calls -> main.main +// @calls -> main.init diff --git a/pointer/testdata/funcreflect.go b/pointer/testdata/funcreflect.go new file mode 100644 index 00000000..e7a8c451 --- /dev/null +++ b/pointer/testdata/funcreflect.go @@ -0,0 +1,30 @@ +// +build ignore + +package main + +// + +import "reflect" + +var a, b int + +func f(p *int) *int { + print(p) // @pointsto + return &b +} + +func g(p *bool) { +} + +func funcreflect1() { + rvf := reflect.ValueOf(f) + res := rvf.Call([]reflect.Value{reflect.ValueOf(&a)}) + print(res[0].Interface()) // @concrete + print(res[0].Interface().(*int)) // @pointsto +} + +// @calls main.funcreflect1 -> main.f + +func main() { + funcreflect1() +} diff --git a/pointer/testdata/hello.go b/pointer/testdata/hello.go new file mode 100644 index 00000000..0cb08ff9 --- /dev/null +++ b/pointer/testdata/hello.go @@ -0,0 +1,21 @@ +// +build ignore + +package main + +import "fmt" + +type S int + +var theS S + +func (s *S) String() string { + print(s) // @pointsto main.theS + return "" +} + +func main() { + fmt.Println("Hello, World!", &theS) +} + +// @calls main.main -> fmt.Println +// @calls (*fmt.pp).handleMethods -> (*main.S).String diff --git a/pointer/testdata/interfaces.go b/pointer/testdata/interfaces.go new file mode 100644 index 00000000..ed08a169 --- /dev/null +++ b/pointer/testdata/interfaces.go @@ -0,0 +1,152 @@ +// +build ignore + +package main + +type I interface { + f() +} + +type C int + +func (*C) f() {} + +type D struct{ ptr *int } + +func (D) f() {} + +type E struct{} + +func (*E) f() {} + +var a, b int + +var unknown bool // defeat dead-code elimination + +func interface1() { + var i interface{} = &a + var j interface{} = D{&b} + k := j + if unknown { + k = i + } + + print(i) // @concrete *int + print(j) // @concrete D + print(k) // @concrete *int | D + + print(i.(*int)) // @pointsto main.a + print(j.(*int)) // @pointsto + print(k.(*int)) // @pointsto main.a + + print(i.(D).ptr) // @pointsto + print(j.(D).ptr) // @pointsto main.b + print(k.(D).ptr) // @pointsto main.b +} + +func interface2() { + var i I = (*C)(&a) + var j I = D{&a} + k := j + if unknown { + k = i + } + + print(i) // @concrete *C + print(j) // @concrete D + print(k) // @concrete *C | D + print(k) // @pointsto makeinterface:main.D | makeinterface:*main.C + + k.f() + // @calls main.interface2 -> (*main.C).f + // @calls main.interface2 -> (main.D).f + + print(i.(*C)) // @pointsto main.a + print(j.(D).ptr) // @pointsto main.a + print(k.(*C)) // @pointsto main.a + + switch x := k.(type) { + case *C: + print(x) // @pointsto main.a + case D: + print(x.ptr) // @pointsto main.a + case *E: + print(x) // @pointsto + } +} + +func interface3() { + // There should be no backflow of concrete types from the type-switch to x. + var x interface{} = 0 + print(x) // @concrete int + switch y := x.(type) { + case int: + case string: + } +} + +func interface4() { + var i interface{} = D{&a} + if unknown { + i = 123 + } + + print(i) // @concrete int | D + + j := i.(I) // interface narrowing type-assertion + print(j) // @concrete D + print(j.(D).ptr) // @pointsto main.a + + var l interface{} = j // interface widening assignment. + print(l) // @concrete D + print(l.(D).ptr) // @pointsto main.a + + m := j.(interface{}) // interface widening type-assertion. + print(m) // @concrete D + print(m.(D).ptr) // @pointsto main.a +} + +// Interface method calls and value flow: + +type J interface { + f(*int) *int +} + +type P struct { + x int +} + +func (p *P) f(pi *int) *int { + print(p) // @pointsto p@i5p:6 + print(pi) // @pointsto i@i5i:6 + return &p.x +} + +func interface5() { + var p P // @line i5p + var j J = &p + var i int // @line i5i + print(j.f(&i)) // @pointsto p.x@i5p:6 + print(&i) // @pointsto i@i5i:6 + + print(j) // @pointsto makeinterface:*main.P +} + +// @calls main.interface5 -> (*main.P).f + +func interface6() { + f := I.f + print(f) // @pointsto (main.I).f + f(new(struct{ D })) +} + +// @calls main.interface6 -> (main.I).f +// @calls (main.I).f -> (*struct{main.D}).f + +func main() { + interface1() + interface2() + interface3() + interface4() + interface5() + interface6() +} diff --git a/pointer/testdata/mapreflect.go b/pointer/testdata/mapreflect.go new file mode 100644 index 00000000..5662badd --- /dev/null +++ b/pointer/testdata/mapreflect.go @@ -0,0 +1,52 @@ +// +build ignore + +package main + +import "reflect" + +// +// This test is very sensitive to line-number perturbations! + +// Test of maps with reflection. + +var a int +var b bool + +func mapreflect1() { + m := make(map[*int]*bool) + m[&a] = &b + + mrv := reflect.ValueOf(m) + print(mrv.Interface()) // @concrete map[*int]*bool + print(mrv.Interface().(map[*int]*bool)) // @pointsto makemap@testdata/mapreflect.go:16:11 + + for _, k := range mrv.MapKeys() { + print(k.Interface()) // @concrete *int + print(k.Interface().(*int)) // @pointsto main.a + + v := mrv.MapIndex(k) + print(v.Interface()) // @concrete *bool + print(v.Interface().(*bool)) // @pointsto main.b + } +} + +func mapreflect2() { + m := make(map[*int]*bool) + mrv := reflect.ValueOf(m) + mrv.SetMapIndex(reflect.ValueOf(&a), reflect.ValueOf(&b)) + + print(m[nil]) // @pointsto main.b + + for _, k := range mrv.MapKeys() { + print(k.Interface()) // @concrete *int + print(k.Interface().(*int)) // @pointsto main.a + } + + print(reflect.Zero(reflect.TypeOf(m).Key()).Interface()) // @concrete *int + print(reflect.Zero(reflect.TypeOf(m).Elem()).Interface()) // @concrete *bool +} + +func main() { + mapreflect1() + mapreflect2() +} diff --git a/pointer/testdata/maps.go b/pointer/testdata/maps.go new file mode 100644 index 00000000..695b698a --- /dev/null +++ b/pointer/testdata/maps.go @@ -0,0 +1,51 @@ +// +build ignore + +package main + +// Test of maps. + +var a, b, c int + +func maps1() { + m1 := map[*int]*int{&a: &b} // @line m1m1 + m2 := make(map[*int]*int) // @line m1m2 + m2[&b] = &a + + print(m1[nil]) // @pointsto main.b | main.c + print(m2[nil]) // @pointsto main.a + + print(m1) // @pointsto makemap@m1m1:21 + print(m2) // @pointsto makemap@m1m2:12 + + m1[&b] = &c + + for k, v := range m1 { + print(k) // @pointsto main.a | main.b + print(v) // @pointsto main.b | main.c + } + + for k, v := range m2 { + print(k) // @pointsto main.b + print(v) // @pointsto main.a + } + + // Lookup doesn't create any aliases. + print(m2[&c]) // @pointsto main.a + if m2v, ok := m2[&a]; ok { + print(m2[&c]) // @pointsto main.a + } +} + +func maps2() { + m1 := map[*int]*int{&a: &b} + m2 := map[*int]*int{&b: &c} + m3 := []map[*int]*int{m1, m2} // (no spurious merging of m1, m2) + + print(m1[nil]) // @pointsto main.b + print(m2[nil]) // @pointsto main.c +} + +func main() { + maps1() + maps2() +} diff --git a/pointer/testdata/panic.go b/pointer/testdata/panic.go new file mode 100644 index 00000000..dd0bc6c4 --- /dev/null +++ b/pointer/testdata/panic.go @@ -0,0 +1,36 @@ +// +build ignore + +package main + +// Test of value flow from panic() to recover(). +// We model them as stores/loads of a global location. +// We ignore concrete panic types originating from the runtime. + +var someval int + +type myPanic struct{} + +func f(int) {} + +func g() string { return "" } + +func deadcode() { + panic(123) // not reached +} + +func main() { + switch someval { + case 0: + panic("oops") + case 1: + panic(myPanic{}) + case 2: + panic(f) + case 3: + panic(g) + } + ex := recover() + print(ex) // @concrete myPanic | string | func(int) | func() string + print(ex.(func(int))) // @pointsto main.f + print(ex.(func() string)) // @pointsto main.g +} diff --git a/pointer/testdata/recur.go b/pointer/testdata/recur.go new file mode 100644 index 00000000..4c7229de --- /dev/null +++ b/pointer/testdata/recur.go @@ -0,0 +1,11 @@ +// +build ignore + +package main + +// Analysis abstraction of recursive calls is finite. + +func main() { + main() +} + +// @calls main.main -> main.main diff --git a/pointer/testdata/reflect.go b/pointer/testdata/reflect.go new file mode 100644 index 00000000..986a649d --- /dev/null +++ b/pointer/testdata/reflect.go @@ -0,0 +1,69 @@ +// +build ignore + +package main + +import "reflect" +import "unsafe" + +var a, b int + +func reflectIndirect() { + ptr := &a + // Pointer: + print(reflect.Indirect(reflect.ValueOf(&ptr)).Interface().(*int)) // @pointsto main.a + // Non-pointer: + print(reflect.Indirect(reflect.ValueOf([]*int{ptr})).Interface().([]*int)[0]) // @pointsto main.a +} + +func reflectNewAt() { + var x [8]byte + print(reflect.NewAt(reflect.TypeOf(3), unsafe.Pointer(&x)).Interface()) // @concrete *int +} + +// @warning "unsound: main.reflectNewAt contains a reflect.NewAt.. call" + +func reflectTypeOf() { + t := reflect.TypeOf(3) + if unknown { + t = reflect.TypeOf("foo") + } + print(t) // @concrete *reflect.rtype + print(reflect.Zero(t).Interface()) // @concrete int | string + newint := reflect.New(t).Interface() // @line rtonew + print(newint) // @concrete *int | *string + print(newint.(*int)) // @pointsto reflectAlloc@rtonew:23 + print(newint.(*string)) // @pointsto reflectAlloc@rtonew:23 +} + +func reflectTypeElem() { + print(reflect.Zero(reflect.TypeOf(&a).Elem()).Interface()) // @concrete int + print(reflect.Zero(reflect.TypeOf([]string{}).Elem()).Interface()) // @concrete string + print(reflect.Zero(reflect.TypeOf(make(chan bool)).Elem()).Interface()) // @concrete bool + print(reflect.Zero(reflect.TypeOf(make(map[string]float64)).Elem()).Interface()) // @concrete float64 + print(reflect.Zero(reflect.TypeOf([3]complex64{}).Elem()).Interface()) // @concrete complex64 + print(reflect.Zero(reflect.TypeOf(3).Elem()).Interface()) // @concrete +} + +func reflectTypeInOut() { + var f func(float64, bool) (string, int) + print(reflect.Zero(reflect.TypeOf(f).In(0)).Interface()) // @concrete float64 + print(reflect.Zero(reflect.TypeOf(f).In(1)).Interface()) // @concrete bool + print(reflect.Zero(reflect.TypeOf(f).In(-1)).Interface()) // @concrete float64 | bool + print(reflect.Zero(reflect.TypeOf(f).In(zero)).Interface()) // @concrete float64 | bool + + print(reflect.Zero(reflect.TypeOf(f).Out(0)).Interface()) // @concrete string + print(reflect.Zero(reflect.TypeOf(f).Out(1)).Interface()) // @concrete int + print(reflect.Zero(reflect.TypeOf(f).Out(2)).Interface()) // @concrete string | int + print(reflect.Zero(reflect.TypeOf(3).Out(0)).Interface()) // @concrete +} + +func main() { + reflectIndirect() + reflectNewAt() + reflectTypeOf() + reflectTypeElem() + reflectTypeInOut() +} + +var unknown bool +var zero int diff --git a/pointer/testdata/structreflect.go b/pointer/testdata/structreflect.go new file mode 100644 index 00000000..510ff091 --- /dev/null +++ b/pointer/testdata/structreflect.go @@ -0,0 +1,25 @@ +// +build ignore + +package main + +import "reflect" + +var a, b int + +type A struct { + f *int + g interface{} + h bool +} + +func structReflect1() { + var a A + fld, _ := reflect.TypeOf(a).FieldByName("f") // "f" is ignored + // TODO(adonovan): what does interface{} even mean here? + print(reflect.Zero(fld.Type).Interface()) // @concrete *int | bool | interface{} + // TODO(adonovan): test promotion/embedding. +} + +func main() { + structReflect1() +} diff --git a/pointer/testdata/structs.go b/pointer/testdata/structs.go new file mode 100644 index 00000000..c9fbc2ab --- /dev/null +++ b/pointer/testdata/structs.go @@ -0,0 +1,101 @@ +// +build ignore + +package main + +var unknown bool // defeat dead-code elimination + +var p, q int + +type A struct { + f *int + g interface{} +} + +func (a A) m1() { + print(a.f) // @pointsto main.p +} + +func (a *A) m2() { + print(a) // @pointsto complit.A@struct1s:9 + print(a.f) // @pointsto main.p +} + +type B struct { + h *int + A +} + +func structs1() { + b := &B{ // @line struct1s + h: &q, + } + b.f = &p + b.g = b + + print(b.h) // @pointsto main.q + print(b.f) // @pointsto main.p + print(b.g) // @concrete *B + + ptr := &b.f + print(*ptr) // @pointsto main.p + + b.m1() + b.m2() +} + +// @calls main.structs1 -> (main.A).m1 +// @calls main.structs1 -> (*main.A).m2 +// @calls (*main.B).m1 -> (main.A).m1 +// @calls (*main.B).m2 -> (*main.A).m2 + +type T struct { + x int + y int +} + +type S struct { + a [3]T + b *[3]T + c [3]*T +} + +func structs2() { + var s S // @line s2s + print(&s) // @pointsto s@s2s:6 + print(&s.a) // @pointsto s.a@s2s:6 + print(&s.a[0]) // @pointsto s.a[*]@s2s:6 + print(&s.a[0].x) // @pointsto s.a[*].x@s2s:6 + print(&s.a[0].y) // @pointsto s.a[*].y@s2s:6 + print(&s.b) // @pointsto s.b@s2s:6 + print(&s.b[0]) // @pointsto + print(&s.b[0].x) // @pointsto + print(&s.b[0].y) // @pointsto + print(&s.c) // @pointsto s.c@s2s:6 + print(&s.c[0]) // @pointsto s.c[*]@s2s:6 + print(&s.c[0].x) // @pointsto + print(&s.c[0].y) // @pointsto + + var s2 S // @line s2s2 + s2.b = new([3]T) // @line s2s2b + print(s2.b) // @pointsto new@s2s2b:12 + print(&s2.b) // @pointsto s2.b@s2s2:6 + print(&s2.b[0]) // @pointsto new[*]@s2s2b:12 + print(&s2.b[0].x) // @pointsto new[*].x@s2s2b:12 + print(&s2.b[0].y) // @pointsto new[*].y@s2s2b:12 + print(&s2.c[0].x) // @pointsto + print(&s2.c[0].y) // @pointsto + + var s3 S // @line s2s3 + s3.c[2] = new(T) // @line s2s3c + print(s3.c) // @pointsto + print(&s3.c) // @pointsto s3.c@s2s3:6 + print(s3.c[1]) // @pointsto new@s2s3c:15 + print(&s3.c[1]) // @pointsto s3.c[*]@s2s3:6 + print(&s3.c[1].x) // @pointsto new.x@s2s3c:15 + print(&s3.c[1].y) // @pointsto new.y@s2s3c:15 +} + +func main() { + structs1() + structs2() +} diff --git a/pointer/util.go b/pointer/util.go new file mode 100644 index 00000000..41bcc24c --- /dev/null +++ b/pointer/util.go @@ -0,0 +1,289 @@ +package pointer + +import ( + "bytes" + "fmt" + + "code.google.com/p/go.tools/go/types" +) + +// CanPoint reports whether the type T is pointerlike, +// for the purposes of this analysis. +func CanPoint(T types.Type) bool { + switch T := T.(type) { + case *types.Named: + return CanPoint(T.Underlying()) + + case *types.Pointer, *types.Interface, *types.Map, *types.Chan, *types.Signature: + return true + } + + return false // array struct tuple builtin basic +} + +// mustDeref returns the element type of its argument, which must be a +// pointer; panic ensues otherwise. +func mustDeref(typ types.Type) types.Type { + return typ.Underlying().(*types.Pointer).Elem() +} + +// A fieldInfo describes one subelement (node) of the flattening-out +// of a type T: the subelement's type and its path from the root of T. +// +// For example, for this type: +// type line struct{ points []struct{x, y int} } +// flatten() of the inner struct yields the following []fieldInfo: +// struct{ x, y int } "" +// int ".x" +// int ".y" +// and flatten(line) yields: +// struct{ points []struct{x, y int} } "" +// struct{ x, y int } ".points[*]" +// int ".points[*].x +// int ".points[*].y" +// +type fieldInfo struct { + typ types.Type + + // op and tail describe the path to the element (e.g. ".a#2.b[*].c"). + op interface{} // *Array: true; *Tuple: int; *Struct: *types.Var; *Named: nil + tail *fieldInfo +} + +// path returns a user-friendly string describing the subelement path. +// +func (fi *fieldInfo) path() string { + var buf bytes.Buffer + for p := fi; p != nil; p = p.tail { + switch op := p.op.(type) { + case bool: + fmt.Fprintf(&buf, "[*]") + case int: + fmt.Fprintf(&buf, "#%d", op) + case *types.Var: + fmt.Fprintf(&buf, ".%s", op.Name()) + } + } + return buf.String() +} + +// flatten returns a list of directly contained fields in the preorder +// traversal of the type tree of t. The resulting elements are all +// scalars (basic types or pointerlike types), except for struct/array +// "identity" nodes, whose type is that of the aggregate. +// +// Callers must not mutate the result. +// +func (a *analysis) flatten(t types.Type) []*fieldInfo { + fl, ok := a.flattenMemo[t] + if !ok { + switch t := t.(type) { + case *types.Named: + u := t.Underlying() + if _, ok := u.(*types.Interface); ok { + // Debuggability hack: don't remove + // the named type from interfaces as + // they're very verbose. + fl = append(fl, &fieldInfo{typ: t}) + } else { + fl = a.flatten(u) + } + + case *types.Basic, + *types.Signature, + *types.Chan, + *types.Map, + *types.Interface, + *types.Slice, + *types.Pointer: + fl = append(fl, &fieldInfo{typ: t}) + + case *types.Array: + fl = append(fl, &fieldInfo{typ: t}) // identity node + for _, fi := range a.flatten(t.Elem()) { + fl = append(fl, &fieldInfo{typ: fi.typ, op: true, tail: fi}) + } + + case *types.Struct: + fl = append(fl, &fieldInfo{typ: t}) // identity node + for i, n := 0, t.NumFields(); i < n; i++ { + f := t.Field(i) + for _, fi := range a.flatten(f.Type()) { + fl = append(fl, &fieldInfo{typ: fi.typ, op: f, tail: fi}) + } + } + + case *types.Tuple: + // No identity node: tuples are never address-taken. + for i, n := 0, t.Len(); i < n; i++ { + f := t.At(i) + for _, fi := range a.flatten(f.Type()) { + fl = append(fl, &fieldInfo{typ: fi.typ, op: i, tail: fi}) + } + } + + case *types.Builtin: + panic("flatten(*types.Builtin)") // not the type of any value + + default: + panic(t) + } + + a.flattenMemo[t] = fl + } + + return fl +} + +// sizeof returns the number of pointerlike abstractions (nodes) in the type t. +func (a *analysis) sizeof(t types.Type) uint32 { + return uint32(len(a.flatten(t))) +} + +// offsetOf returns the (abstract) offset of field index within struct +// or tuple typ. +func (a *analysis) offsetOf(typ types.Type, index int) uint32 { + var offset uint32 + switch t := typ.Underlying().(type) { + case *types.Tuple: + for i := 0; i < index; i++ { + offset += a.sizeof(t.At(i).Type()) + } + case *types.Struct: + offset++ // the node for the struct itself + for i := 0; i < index; i++ { + offset += a.sizeof(t.Field(i).Type()) + } + default: + panic(fmt.Sprintf("offsetOf(%s : %T)", typ, typ)) + } + return offset +} + +// sliceToArray returns the type representing the arrays to which +// slice type slice points. +func sliceToArray(slice types.Type) *types.Array { + return types.NewArray(slice.Underlying().(*types.Slice).Elem(), 1) +} + +// Node set ------------------------------------------------------------------- + +type nodeset map[nodeid]struct{} + +// ---- Accessors ---- + +func (ns nodeset) String() string { + var buf bytes.Buffer + buf.WriteRune('{') + var sep string + for n := range ns { + fmt.Fprintf(&buf, "%sn%d", sep, n) + sep = ", " + } + buf.WriteRune('}') + return buf.String() +} + +// diff returns the set-difference x - y. nil => empty. +// +// TODO(adonovan): opt: extremely inefficient. BDDs do this in +// constant time. Sparse bitvectors are linear but very fast. +func (x nodeset) diff(y nodeset) nodeset { + var z nodeset + for k := range x { + if _, ok := y[k]; !ok { + z.add(k) + } + } + return z +} + +// clone() returns an unaliased copy of x. +func (x nodeset) clone() nodeset { + return x.diff(nil) +} + +// ---- Mutators ---- + +func (ns *nodeset) add(n nodeid) bool { + sz := len(*ns) + if *ns == nil { + *ns = make(nodeset) + } + (*ns)[n] = struct{}{} + return len(*ns) > sz +} + +func (x *nodeset) addAll(y nodeset) bool { + if y == nil { + return false + } + sz := len(*x) + if *x == nil { + *x = make(nodeset) + } + for n := range y { + (*x)[n] = struct{}{} + } + return len(*x) > sz +} + +// Constraint set ------------------------------------------------------------- + +type constraintset map[constraint]struct{} + +func (cs *constraintset) add(c constraint) bool { + sz := len(*cs) + if *cs == nil { + *cs = make(constraintset) + } + (*cs)[c] = struct{}{} + return len(*cs) > sz +} + +// Worklist ------------------------------------------------------------------- + +// TODO(adonovan): interface may not be general enough for certain +// implementations, e.g. priority queue +// +// Uses double-buffering so nodes can be added during iteration. +type worklist interface { + empty() bool // Reports whether active buffer is empty. + swap() bool // Switches to the shadow buffer if empty(). + add(nodeid) // Adds a node to the shadow buffer. + take() nodeid // Takes a node from the active buffer. Precondition: !empty(). +} + +// Horribly naive (and nondeterministic) worklist +// based on two hash-sets. +type mapWorklist struct { + active, shadow nodeset +} + +func (w *mapWorklist) empty() bool { + return len(w.active) == 0 +} + +func (w *mapWorklist) swap() bool { + if w.empty() { + w.shadow, w.active = w.active, w.shadow + return true + } + return false +} + +func (w *mapWorklist) add(n nodeid) { + w.shadow[n] = struct{}{} +} + +func (w *mapWorklist) take() nodeid { + for k := range w.active { + delete(w.active, k) + return k + } + panic("worklist.take(): empty active buffer") +} + +func makeMapWorklist() worklist { + return &mapWorklist{make(nodeset), make(nodeset)} +}