go/pointer: implement pointer equivalence via hash-value numbering, a pre-solver optimization.

This reduces solver time by about 40%.
See hvn.go for detailed description.

Also in this CL:
- Update package docs.
- Added various global opt/debug options for maintainer convenience.
- Added logging of phase timing.
- Added stdlib_test, disabled by default, that runs the analysis
  on all tests in $GOROOT.
- include types when dumping solution

LGTM=crawshaw
R=crawshaw, dannyb
CC=golang-codereviews
https://golang.org/cl/96650048
This commit is contained in:
Alan Donovan 2014-06-16 15:46:07 -04:00
parent 47c0a8f0c3
commit 9b38eafe60
17 changed files with 1768 additions and 285 deletions

View File

@ -17,11 +17,26 @@ CONSTRAINT GENERATION:
3) soundly track physical field offsets. (Summarise dannyb's email here.) 3) soundly track physical field offsets. (Summarise dannyb's email here.)
A downside is that we can't keep the identity field of struct A downside is that we can't keep the identity field of struct
allocations that identifies the object. allocations that identifies the object.
- add to pts(a.panic) a label representing all runtime panics, e.g.
runtime.{TypeAssertionError,errorString,errorCString}.
OPTIMISATIONS OPTIMISATIONS
- pre-solver: PE via HVN/HRU and LE. - pre-solver:
pointer equivalence: extend HVN to HRU
location equivalence
- solver: HCD, LCD. - solver: HCD, LCD.
- experiment with map+slice worklist in lieu of bitset.
It may have faster insert.
MISC: MISC:
- Test on all platforms. - Test on all platforms.
Currently we assume these go/build tags: linux, amd64, !cgo. Currently we assume these go/build tags: linux, amd64, !cgo.
MAINTAINABILITY
- Think about ways to make debugging this code easier. PTA logs
routinely exceed a million lines and require training to read.
BUGS:
- There's a crash bug in stdlib_test + reflection, rVCallConstraint.

View File

@ -12,7 +12,9 @@ import (
"io" "io"
"os" "os"
"reflect" "reflect"
"runtime"
"runtime/debug" "runtime/debug"
"sort"
"code.google.com/p/go.tools/go/callgraph" "code.google.com/p/go.tools/go/callgraph"
"code.google.com/p/go.tools/go/ssa" "code.google.com/p/go.tools/go/ssa"
@ -20,6 +22,18 @@ import (
"code.google.com/p/go.tools/go/types/typeutil" "code.google.com/p/go.tools/go/types/typeutil"
) )
const (
// optimization options; enable all when committing
optRenumber = true // enable renumbering optimization (makes logs hard to read)
optHVN = true // enable pointer equivalence via Hash-Value Numbering
// debugging options; disable all when committing
debugHVN = false // enable assertions in HVN
debugHVNVerbose = false // enable extra HVN logging
debugHVNCrossCheck = false // run solver with/without HVN and compare (caveats below)
debugTimers = true // show running time of each phase
)
// object.flags bitmask values. // object.flags bitmask values.
const ( const (
otTagged = 1 << iota // type-tagged object otTagged = 1 << iota // type-tagged object
@ -72,7 +86,7 @@ type nodeid uint32
type node struct { type node struct {
// If non-nil, this node is the start of an object // If non-nil, this node is the start of an object
// (addressable memory location). // (addressable memory location).
// The following obj.size words implicitly belong to the object; // The following obj.size nodes implicitly belong to the object;
// they locate their object by scanning back. // they locate their object by scanning back.
obj *object obj *object
@ -85,21 +99,10 @@ type node struct {
// an object of aggregate type (struct, tuple, array) this is. // an object of aggregate type (struct, tuple, array) this is.
subelement *fieldInfo // e.g. ".a.b[*].c" subelement *fieldInfo // e.g. ".a.b[*].c"
// Points-to sets. // Solver state for the canonical node of this pointer-
pts nodeset // points-to set of this node // equivalence class. Each node is created with its own state
prevPts nodeset // pts(n) in previous iteration (for difference propagation) // but they become shared after HVN.
solve *solverState
// Graph edges
copyTo nodeset // simple copy constraint edges
// Complex constraints attached to this node (x).
// - *loadConstraint y=*x
// - *offsetAddrConstraint y=&x.f or y=&x[0]
// - *storeConstraint *x=z
// - *typeFilterConstraint y=x.(I)
// - *untagConstraint y=x.(C)
// - *invokeConstraint y=x.f(params...)
complex []constraint
} }
// An analysis instance holds the state of a single pointer analysis problem. // An analysis instance holds the state of a single pointer analysis problem.
@ -119,6 +122,8 @@ type analysis struct {
globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton globalobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
localval map[ssa.Value]nodeid // node for each local ssa.Value localval map[ssa.Value]nodeid // node for each local ssa.Value
localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton localobj map[ssa.Value]nodeid // maps v to sole member of pts(v), if singleton
atFuncs map[*ssa.Function]bool // address-taken functions (for presolver)
mapValues []nodeid // values of makemap objects (indirect in HVN)
work nodeset // solver's worklist work nodeset // solver's worklist
result *Result // results of the analysis result *Result // results of the analysis
track track // pointerlike types whose aliasing we track track track // pointerlike types whose aliasing we track
@ -136,9 +141,10 @@ type analysis struct {
runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer runtimeSetFinalizer *ssa.Function // runtime.SetFinalizer
} }
// enclosingObj returns the object (addressible memory object) that encloses node id. // enclosingObj returns the first node of the addressable memory
// Panic ensues if that node does not belong to any object. // object that encloses node id. Panic ensues if that node does not
func (a *analysis) enclosingObj(id nodeid) *object { // belong to any object.
func (a *analysis) enclosingObj(id nodeid) nodeid {
// Find previous node with obj != nil. // Find previous node with obj != nil.
for i := id; i >= 0; i-- { for i := id; i >= 0; i-- {
n := a.nodes[i] n := a.nodes[i]
@ -146,7 +152,7 @@ func (a *analysis) enclosingObj(id nodeid) *object {
if i+nodeid(obj.size) <= id { if i+nodeid(obj.size) <= id {
break // out of bounds break // out of bounds
} }
return obj return i
} }
} }
panic("node has no enclosing object") panic("node has no enclosing object")
@ -156,7 +162,7 @@ func (a *analysis) enclosingObj(id nodeid) *object {
// Panic ensues if that node is not addressable. // Panic ensues if that node is not addressable.
func (a *analysis) labelFor(id nodeid) *Label { func (a *analysis) labelFor(id nodeid) *Label {
return &Label{ return &Label{
obj: a.enclosingObj(id), obj: a.nodes[a.enclosingObj(id)].obj,
subelement: a.nodes[id].subelement, subelement: a.nodes[id].subelement,
} }
} }
@ -222,6 +228,7 @@ func Analyze(config *Config) (result *Result, err error) {
globalobj: make(map[ssa.Value]nodeid), globalobj: make(map[ssa.Value]nodeid),
flattenMemo: make(map[types.Type][]*fieldInfo), flattenMemo: make(map[types.Type][]*fieldInfo),
trackTypes: make(map[types.Type]bool), trackTypes: make(map[types.Type]bool),
atFuncs: make(map[*ssa.Function]bool),
hasher: typeutil.MakeHasher(), hasher: typeutil.MakeHasher(),
intrinsics: make(map[*ssa.Function]intrinsic), intrinsics: make(map[*ssa.Function]intrinsic),
result: &Result{ result: &Result{
@ -236,7 +243,7 @@ func Analyze(config *Config) (result *Result, err error) {
} }
if a.log != nil { if a.log != nil {
fmt.Fprintln(a.log, "======== NEW ANALYSIS ========") fmt.Fprintln(a.log, "==== Starting analysis")
} }
// Pointer analysis requires a complete program for soundness. // Pointer analysis requires a complete program for soundness.
@ -275,23 +282,54 @@ func Analyze(config *Config) (result *Result, err error) {
a.computeTrackBits() a.computeTrackBits()
a.generate() a.generate()
a.showCounts()
if a.log != nil { if optRenumber {
// Show size of constraint system. a.renumber()
counts := make(map[reflect.Type]int)
for _, c := range a.constraints {
counts[reflect.TypeOf(c)]++
}
fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
for t, n := range counts {
fmt.Fprintf(a.log, "\t%s:\t%d\n", t, n)
}
fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
} }
a.optimize() N := len(a.nodes) // excludes solver-created nodes
if optHVN {
if debugHVNCrossCheck {
// Cross-check: run the solver once without
// optimization, once with, and compare the
// solutions.
savedConstraints := a.constraints
a.solve() a.solve()
a.dumpSolution("A.pts", N)
// Restore.
a.constraints = savedConstraints
for _, n := range a.nodes {
n.solve = new(solverState)
}
a.nodes = a.nodes[:N]
// rtypes is effectively part of the solver state.
a.rtypes = typeutil.Map{}
a.rtypes.SetHasher(a.hasher)
}
a.hvn()
}
if debugHVNCrossCheck {
runtime.GC()
runtime.GC()
}
a.solve()
// Compare solutions.
if optHVN && debugHVNCrossCheck {
a.dumpSolution("B.pts", N)
if !diff("A.pts", "B.pts") {
return nil, fmt.Errorf("internal error: optimization changed solution")
}
}
// Create callgraph.Nodes in deterministic order. // Create callgraph.Nodes in deterministic order.
if cg := a.result.CallGraph; cg != nil { if cg := a.result.CallGraph; cg != nil {
@ -304,7 +342,7 @@ func Analyze(config *Config) (result *Result, err error) {
var space [100]int var space [100]int
for _, caller := range a.cgnodes { for _, caller := range a.cgnodes {
for _, site := range caller.sites { for _, site := range caller.sites {
for _, callee := range a.nodes[site.targets].pts.AppendTo(space[:0]) { for _, callee := range a.nodes[site.targets].solve.pts.AppendTo(space[:0]) {
a.callEdge(caller, site, nodeid(callee)) a.callEdge(caller, site, nodeid(callee))
} }
} }
@ -342,3 +380,65 @@ func (a *analysis) callEdge(caller *cgnode, site *callsite, calleeid nodeid) {
a.warnf(fn.Pos(), " (declared here)") a.warnf(fn.Pos(), " (declared here)")
} }
} }
// dumpSolution writes the PTS solution to the specified file.
//
// It only dumps the nodes that existed before solving. The order in
// which solver-created nodes are created depends on pre-solver
// optimization, so we can't include them in the cross-check.
//
func (a *analysis) dumpSolution(filename string, N int) {
f, err := os.Create(filename)
if err != nil {
panic(err)
}
for id, n := range a.nodes[:N] {
if _, err := fmt.Fprintf(f, "pts(n%d) = {", id); err != nil {
panic(err)
}
var sep string
for _, l := range n.solve.pts.AppendTo(a.deltaSpace) {
if l >= N {
break
}
fmt.Fprintf(f, "%s%d", sep, l)
sep = " "
}
fmt.Fprintf(f, "} : %s\n", n.typ)
}
if err := f.Close(); err != nil {
panic(err)
}
}
// showCounts logs the size of the constraint system. A typical
// optimized distribution is 65% copy, 13% load, 11% addr, 5%
// offsetAddr, 4% store, 2% others.
//
func (a *analysis) showCounts() {
if a.log != nil {
counts := make(map[reflect.Type]int)
for _, c := range a.constraints {
counts[reflect.TypeOf(c)]++
}
fmt.Fprintf(a.log, "# constraints:\t%d\n", len(a.constraints))
var lines []string
for t, n := range counts {
line := fmt.Sprintf("%7d (%2d%%)\t%s", n, 100*n/len(a.constraints), t)
lines = append(lines, line)
}
sort.Sort(sort.Reverse(sort.StringSlice(lines)))
for _, line := range lines {
fmt.Fprintf(a.log, "\t%s\n", line)
}
fmt.Fprintf(a.log, "# nodes:\t%d\n", len(a.nodes))
// Show number of pointer equivalence classes.
m := make(map[*solverState]bool)
for _, n := range a.nodes {
m[n.solve] = true
}
fmt.Fprintf(a.log, "# ptsets:\t%d\n", len(m))
}
}

View File

@ -124,12 +124,12 @@ type Result struct {
// A Pointer is an equivalence class of pointer-like values. // A Pointer is an equivalence class of pointer-like values.
// //
// A pointer doesn't have a unique type because pointers of distinct // A Pointer doesn't have a unique type because pointers of distinct
// types may alias the same object. // types may alias the same object.
// //
type Pointer struct { type Pointer struct {
a *analysis a *analysis
n nodeid // non-zero n nodeid
} }
// A PointsToSet is a set of labels (locations or allocations). // A PointsToSet is a set of labels (locations or allocations).
@ -197,7 +197,7 @@ func (s PointsToSet) DynamicTypes() *typeutil.Map {
pts = PointsToSet{s.a, new(nodeset)} pts = PointsToSet{s.a, new(nodeset)}
tmap.Set(tDyn, pts) tmap.Set(tDyn, pts)
} }
pts.pts.addAll(&s.a.nodes[v].pts) pts.pts.addAll(&s.a.nodes[v].solve.pts)
} }
} }
return &tmap return &tmap
@ -224,7 +224,7 @@ func (p Pointer) PointsTo() PointsToSet {
if p.n == 0 { if p.n == 0 {
return PointsToSet{} return PointsToSet{}
} }
return PointsToSet{p.a, &p.a.nodes[p.n].pts} return PointsToSet{p.a, &p.a.nodes[p.n].solve.pts}
} }
// MayAlias reports whether the receiver pointer may alias // MayAlias reports whether the receiver pointer may alias

View File

@ -20,6 +20,17 @@ type cgnode struct {
callersite *callsite // where called from, if known; nil for shared contours callersite *callsite // where called from, if known; nil for shared contours
} }
// contour returns a description of this node's contour.
func (n *cgnode) contour() string {
if n.callersite == nil {
return "shared contour"
}
if n.callersite.instr != nil {
return fmt.Sprintf("as called from %s", n.callersite.instr.Parent())
}
return fmt.Sprintf("as called from intrinsic (targets=n%d)", n.callersite.targets)
}
func (n *cgnode) String() string { func (n *cgnode) String() string {
return fmt.Sprintf("cg%d:%s", n.obj, n.fn) return fmt.Sprintf("cg%d:%s", n.obj, n.fn)
} }

View File

@ -10,22 +10,18 @@ import (
type constraint interface { type constraint interface {
// For a complex constraint, returns the nodeid of the pointer // For a complex constraint, returns the nodeid of the pointer
// to which it is attached. // to which it is attached. For addr and copy, returns dst.
ptr() nodeid ptr() nodeid
// indirect returns (by appending to the argument) the constraint's
// "indirect" nodes as defined in (Hardekopf 2007b):
// nodes whose points-to relations are not completely
// represented in the initial constraint graph.
//
// TODO(adonovan): I think we need >1 results in some obscure
// cases. If not, just return a nodeid, like ptr().
//
indirect(nodes []nodeid) []nodeid
// renumber replaces each nodeid n in the constraint by mapping[n]. // renumber replaces each nodeid n in the constraint by mapping[n].
renumber(mapping []nodeid) renumber(mapping []nodeid)
// presolve is a hook for constraint-specific behaviour during
// pre-solver optimization. Typical implementations mark as
// indirect the set of nodes to which the solver will add copy
// edges or PTS labels.
presolve(h *hvn)
// solve is called for complex constraints when the pts for // solve is called for complex constraints when the pts for
// the node to which they are attached has changed. // the node to which they are attached has changed.
solve(a *analysis, delta *nodeset) solve(a *analysis, delta *nodeset)
@ -41,10 +37,7 @@ type addrConstraint struct {
src nodeid src nodeid
} }
func (c *addrConstraint) ptr() nodeid { panic("addrConstraint: not a complex constraint") } func (c *addrConstraint) ptr() nodeid { return c.dst }
func (c *addrConstraint) indirect(nodes []nodeid) []nodeid {
panic("addrConstraint: not a complex constraint")
}
func (c *addrConstraint) renumber(mapping []nodeid) { func (c *addrConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst] c.dst = mapping[c.dst]
c.src = mapping[c.src] c.src = mapping[c.src]
@ -53,14 +46,11 @@ func (c *addrConstraint) renumber(mapping []nodeid) {
// dst = src // dst = src
// A simple constraint represented directly as a copyTo graph edge. // A simple constraint represented directly as a copyTo graph edge.
type copyConstraint struct { type copyConstraint struct {
dst nodeid dst nodeid // (ptr)
src nodeid // (ptr) src nodeid
} }
func (c *copyConstraint) ptr() nodeid { panic("copyConstraint: not a complex constraint") } func (c *copyConstraint) ptr() nodeid { return c.dst }
func (c *copyConstraint) indirect(nodes []nodeid) []nodeid {
panic("copyConstraint: not a complex constraint")
}
func (c *copyConstraint) renumber(mapping []nodeid) { func (c *copyConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst] c.dst = mapping[c.dst]
c.src = mapping[c.src] c.src = mapping[c.src]
@ -70,12 +60,11 @@ func (c *copyConstraint) renumber(mapping []nodeid) {
// A complex constraint attached to src (the pointer) // A complex constraint attached to src (the pointer)
type loadConstraint struct { type loadConstraint struct {
offset uint32 offset uint32
dst nodeid // (indirect) dst nodeid
src nodeid // (ptr) src nodeid // (ptr)
} }
func (c *loadConstraint) ptr() nodeid { return c.src } func (c *loadConstraint) ptr() nodeid { return c.src }
func (c *loadConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.dst) }
func (c *loadConstraint) renumber(mapping []nodeid) { func (c *loadConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst] c.dst = mapping[c.dst]
c.src = mapping[c.src] c.src = mapping[c.src]
@ -90,7 +79,6 @@ type storeConstraint struct {
} }
func (c *storeConstraint) ptr() nodeid { return c.dst } func (c *storeConstraint) ptr() nodeid { return c.dst }
func (c *storeConstraint) indirect(nodes []nodeid) []nodeid { return nodes }
func (c *storeConstraint) renumber(mapping []nodeid) { func (c *storeConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst] c.dst = mapping[c.dst]
c.src = mapping[c.src] c.src = mapping[c.src]
@ -100,12 +88,11 @@ func (c *storeConstraint) renumber(mapping []nodeid) {
// A complex constraint attached to dst (the pointer) // A complex constraint attached to dst (the pointer)
type offsetAddrConstraint struct { type offsetAddrConstraint struct {
offset uint32 offset uint32
dst nodeid // (indirect) dst nodeid
src nodeid // (ptr) src nodeid // (ptr)
} }
func (c *offsetAddrConstraint) ptr() nodeid { return c.src } func (c *offsetAddrConstraint) ptr() nodeid { return c.src }
func (c *offsetAddrConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.dst) }
func (c *offsetAddrConstraint) renumber(mapping []nodeid) { func (c *offsetAddrConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst] c.dst = mapping[c.dst]
c.src = mapping[c.src] c.src = mapping[c.src]
@ -116,12 +103,11 @@ func (c *offsetAddrConstraint) renumber(mapping []nodeid) {
// No representation change: pts(dst) and pts(src) contains tagged objects. // No representation change: pts(dst) and pts(src) contains tagged objects.
type typeFilterConstraint struct { type typeFilterConstraint struct {
typ types.Type // an interface type typ types.Type // an interface type
dst nodeid // (indirect) dst nodeid
src nodeid // (ptr) src nodeid // (ptr)
} }
func (c *typeFilterConstraint) ptr() nodeid { return c.src } func (c *typeFilterConstraint) ptr() nodeid { return c.src }
func (c *typeFilterConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.dst) }
func (c *typeFilterConstraint) renumber(mapping []nodeid) { func (c *typeFilterConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst] c.dst = mapping[c.dst]
c.src = mapping[c.src] c.src = mapping[c.src]
@ -139,13 +125,12 @@ func (c *typeFilterConstraint) renumber(mapping []nodeid) {
// pts(dst) contains their payloads. // pts(dst) contains their payloads.
type untagConstraint struct { type untagConstraint struct {
typ types.Type // a concrete type typ types.Type // a concrete type
dst nodeid // (indirect) dst nodeid
src nodeid // (ptr) src nodeid // (ptr)
exact bool exact bool
} }
func (c *untagConstraint) ptr() nodeid { return c.src } func (c *untagConstraint) ptr() nodeid { return c.src }
func (c *untagConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.dst) }
func (c *untagConstraint) renumber(mapping []nodeid) { func (c *untagConstraint) renumber(mapping []nodeid) {
c.dst = mapping[c.dst] c.dst = mapping[c.dst]
c.src = mapping[c.src] c.src = mapping[c.src]
@ -156,11 +141,10 @@ func (c *untagConstraint) renumber(mapping []nodeid) {
type invokeConstraint struct { type invokeConstraint struct {
method *types.Func // the abstract method method *types.Func // the abstract method
iface nodeid // (ptr) the interface iface nodeid // (ptr) the interface
params nodeid // (indirect) the first param in the params/results block params nodeid // the start of the identity/params/results block
} }
func (c *invokeConstraint) ptr() nodeid { return c.iface } func (c *invokeConstraint) ptr() nodeid { return c.iface }
func (c *invokeConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.params) }
func (c *invokeConstraint) renumber(mapping []nodeid) { func (c *invokeConstraint) renumber(mapping []nodeid) {
c.iface = mapping[c.iface] c.iface = mapping[c.iface]
c.params = mapping[c.params] c.params = mapping[c.params]

View File

@ -7,21 +7,20 @@
Package pointer implements Andersen's analysis, an inclusion-based Package pointer implements Andersen's analysis, an inclusion-based
pointer analysis algorithm first described in (Andersen, 1994). pointer analysis algorithm first described in (Andersen, 1994).
The implementation is similar to that described in (Pearce et al, A pointer analysis relates every pointer expression in a whole program
PASTE'04). Unlike many algorithms which interleave constraint to the set of memory locations to which it might point. This
generation and solving, constructing the callgraph as they go, this information can be used to construct a call graph of the program that
implementation for the most part observes a phase ordering (generation precisely represents the destinations of dynamic function and method
before solving), with only simple (copy) constraints being generated calls. It can also be used to determine, for example, which pairs of
during solving. (The exception is reflection, which creates various channel operations operate on the same channel.
constraints during solving as new types flow to reflect.Value
operations.) This improves the traction of presolver optimisations,
but imposes certain restrictions, e.g. potential context sensitivity
is limited since all variants must be created a priori.
We intend to add various presolving optimisations such as Pointer and The package allows the client to request a set of expressions of
Location Equivalence from (Hardekopf & Lin, SAS '07) and solver interest for which the points-to information will be returned once the
optimisations such as Hybrid- and Lazy- Cycle Detection from analysis is complete. In addition, the client may request that a
(Hardekopf & Lin, PLDI'07), callgraph is constructed. The example program in example_test.go
demonstrates both of these features. Clients should not request more
information than they need since it may increase the cost of the
analysis significantly.
CLASSIFICATION CLASSIFICATION
@ -41,7 +40,7 @@ fields, such as x and y in struct { x, y *int }.
It is mostly CONTEXT-INSENSITIVE: most functions are analyzed once, It is mostly CONTEXT-INSENSITIVE: most functions are analyzed once,
so values can flow in at one call to the function and return out at so values can flow in at one call to the function and return out at
another. Only some smaller functions are analyzed with consideration another. Only some smaller functions are analyzed with consideration
to their calling context. of their calling context.
It has a CONTEXT-SENSITIVE HEAP: objects are named by both allocation It has a CONTEXT-SENSITIVE HEAP: objects are named by both allocation
site and context, so the objects returned by two distinct calls to f: site and context, so the objects returned by two distinct calls to f:
@ -54,11 +53,87 @@ complete Go program and summaries for native code.
See the (Hind, PASTE'01) survey paper for an explanation of these terms. See the (Hind, PASTE'01) survey paper for an explanation of these terms.
SOUNDNESS
The analysis is fully sound when invoked on pure Go programs that do not
use reflection or unsafe.Pointer conversions. In other words, if there
is any possible execution of the program in which pointer P may point to
object O, the analysis will report that fact.
REFLECTION
By default, the "reflect" library is ignored by the analysis, as if all
its functions were no-ops, but if the client enables the Reflection flag,
the analysis will make a reasonable attempt to model the effects of
calls into this library. However, this comes at a significant
performance cost, and not all features of that library are yet
implemented. In addition, some simplifying approximations must be made
to ensure that the analysis terminates; for example, reflection can be
used to construct an infinite set of types and values of those types,
but the analysis arbitrarily bounds the depth of such types.
Most but not all reflection operations are supported.
In particular, addressable reflect.Values are not yet implemented, so
operations such as (reflect.Value).Set have no analytic effect.
UNSAFE POINTER CONVERSIONS
The pointer analysis makes no attempt to understand aliasing between the
operand x and result y of an unsafe.Pointer conversion:
y = (*T)(unsafe.Pointer(x))
It is as if the conversion allocated an entirely new object:
y = new(T)
NATIVE CODE
The analysis cannot model the aliasing effects of functions written in
languages other than Go, such as runtime intrinsics in C or assembly, or
code accessed via cgo. The result is as if such functions are no-ops.
However, various important intrinsics are understood by the analysis,
along with built-ins such as append.
The analysis currently provides no way for users to specify the aliasing
effects of native code.
------------------------------------------------------------------------
IMPLEMENTATION
The remaining documentation is intended for package maintainers and
pointer analysis specialists. Maintainers should have a solid
understanding of the referenced papers (especially those by H&L and PKH)
before making making significant changes.
The implementation is similar to that described in (Pearce et al,
PASTE'04). Unlike many algorithms which interleave constraint
generation and solving, constructing the callgraph as they go, this
implementation for the most part observes a phase ordering (generation
before solving), with only simple (copy) constraints being generated
during solving. (The exception is reflection, which creates various
constraints during solving as new types flow to reflect.Value
operations.) This improves the traction of presolver optimisations,
but imposes certain restrictions, e.g. potential context sensitivity
is limited since all variants must be created a priori.
TERMINOLOGY TERMINOLOGY
A type is said to be "pointer-like" if it is a reference to an object.
Pointer-like types include pointers and also interfaces, maps, channels,
functions and slices.
We occasionally use C's x->f notation to distinguish the case where x We occasionally use C's x->f notation to distinguish the case where x
is a struct pointer from x.f where is a struct value. is a struct pointer from x.f where is a struct value.
Pointer analysis literature (and our comments) often uses the notation
dst=*src+offset to mean something different than what it means in Go.
It means: for each node index p in pts(src), the node index p+offset is
in pts(dst). Similarly *dst+offset=src is used for store constraints
and dst=src+offset for offset-address constraints.
NODES NODES
@ -68,18 +143,19 @@ pointers) and members of points-to sets (things that can be pointed
at, i.e. "labels"). at, i.e. "labels").
Nodes are naturally numbered. The numbering enables compact Nodes are naturally numbered. The numbering enables compact
representations of sets of nodes such as bitvectors or BDDs; and the representations of sets of nodes such as bitvectors (or BDDs); and the
ordering enables a very cheap way to group related nodes together. ordering enables a very cheap way to group related nodes together. For
(For example, passing n parameters consists of generating n parallel example, passing n parameters consists of generating n parallel
constraints from caller+i to callee+i for 0<=i<n.) constraints from caller+i to callee+i for 0<=i<n.
The zero nodeid means "not a pointer". Currently it's only used for The zero nodeid means "not a pointer". For simplicity, we generate flow
struct{} or (). We generate all flow constraints, even for non-pointer constraints even for non-pointer types such as int. The pointer
types, with the expectations that (a) presolver optimisations will equivalence (PE) presolver optimization detects which variables cannot
quickly collapse all the non-pointer ones and (b) we may get more point to anything; this includes not only all variables of non-pointer
precise results by treating uintptr as a possible pointer. types (such as int) but also variables of pointer-like types if they are
always nil, or are parameters to a function that is never called.
Each node represents a scalar (word-sized) part of a value or object. Each node represents a scalar part of a value or object.
Aggregate types (structs, tuples, arrays) are recursively flattened Aggregate types (structs, tuples, arrays) are recursively flattened
out into a sequential list of scalar component types, and all the out into a sequential list of scalar component types, and all the
elements of an array are represented by a single node. (The elements of an array are represented by a single node. (The
@ -103,26 +179,24 @@ Objects include:
- variable allocations in the stack frame or heap; - variable allocations in the stack frame or heap;
- maps, channels and slices created by calls to make(); - maps, channels and slices created by calls to make();
- allocations to construct an interface; - allocations to construct an interface;
- allocations caused by literals and conversions, - allocations caused by conversions, e.g. []byte(str).
e.g. []byte("foo"), []byte(str).
- arrays allocated by calls to append(); - arrays allocated by calls to append();
Many objects have no Go types. For example, the func, map and chan Many objects have no Go types. For example, the func, map and chan type
type kinds in Go are all varieties of pointers, but the respective kinds in Go are all varieties of pointers, but their respective objects
objects are actual functions, maps, and channels. Given the way we are actual functions (executable code), maps (hash tables), and channels
model interfaces, they too are pointers to tagged objects with no (synchronized queues). Given the way we model interfaces, they too are
Go type. And an *ssa.Global denotes the address of a global variable, pointers to "tagged" objects with no Go type. And an *ssa.Global denotes
but the object for a Global is the actual data. So, types of objects the address of a global variable, but the object for a Global is the
are usually "off by one indirection". actual data. So, the types of an ssa.Value that creates an object is
"off by one indirection": a pointer to the object.
The individual nodes of an object are sometimes referred to as The individual nodes of an object are sometimes referred to as "labels".
"labels".
Objects containing no nodes (i.e. just empty structs; tuples may be For uniformity, all objects have a non-zero number of fields, even those
values but never objects in Go) are padded with an invalid-type node of the empty type struct{}. (All arrays are treated as if of length 1,
to have at least one node so that there is something to point to. so there are no empty arrays. The empty tuple is never address-taken,
(TODO(adonovan): I think this is unnecessary now that we have identity so is never an object.)
nodes; check.)
TAGGED OBJECTS TAGGED OBJECTS
@ -133,7 +207,7 @@ An tagged object has the following layout:
v v
... ...
The T node's typ field is the dynamic type of the "payload", the value The T node's typ field is the dynamic type of the "payload": the value
v which follows, flattened out. The T node's obj has the otTagged v which follows, flattened out. The T node's obj has the otTagged
flag. flag.
@ -143,7 +217,7 @@ as a pointer that exclusively points to tagged objects.
Tagged objects may be indirect (obj.flags {otIndirect}) meaning that Tagged objects may be indirect (obj.flags {otIndirect}) meaning that
the value v is not of type T but *T; this is used only for the value v is not of type T but *T; this is used only for
reflect.Values that represent lvalues. reflect.Values that represent lvalues. (These are not implemented yet.)
ANALYSIS ABSTRACTION OF EACH TYPE ANALYSIS ABSTRACTION OF EACH TYPE
@ -153,7 +227,7 @@ single node: basic types, pointers, channels, maps, slices, 'func'
pointers, interfaces. pointers, interfaces.
Pointers Pointers
Nothing to say here. Nothing to say here, oddly.
Basic types (bool, string, numbers, unsafe.Pointer) Basic types (bool, string, numbers, unsafe.Pointer)
Currently all fields in the flattening of a type, including Currently all fields in the flattening of a type, including
@ -172,9 +246,8 @@ Basic types (bool, string, numbers, unsafe.Pointer)
zero nodeid, and fields of these types within aggregate other types zero nodeid, and fields of these types within aggregate other types
are omitted. are omitted.
unsafe.Pointer conversions are not yet modelled as pointer unsafe.Pointers are not modelled as pointers, so a conversion of an
conversions. Consequently uintptr is always a number and uintptr unsafe.Pointer to *T is (unsoundly) treated equivalent to new(T).
nodes do not point to any object.
Channels Channels
An expression of type 'chan T' is a kind of pointer that points An expression of type 'chan T' is a kind of pointer that points
@ -227,12 +300,14 @@ Functions
The first node is the function's identity node. The first node is the function's identity node.
Associated with every callsite is a special "targets" variable, Associated with every callsite is a special "targets" variable,
whose pts(·) contains the identity node of each function to which whose pts() contains the identity node of each function to which
the call may dispatch. Identity words are not otherwise used. the call may dispatch. Identity words are not otherwise used during
the analysis, but we construct the call graph from the pts()
solution for such nodes.
The following block of nodes represent the flattened-out types of The following block of contiguous nodes represents the flattened-out
the parameters and results of the function object, and are types of the parameters ("P-block") and results ("R-block") of the
collectively known as its "P/R block". function object.
The treatment of free variables of closures (*ssa.FreeVar) is like The treatment of free variables of closures (*ssa.FreeVar) is like
that of global variables; it is not context-sensitive. that of global variables; it is not context-sensitive.
@ -255,18 +330,20 @@ Interfaces
all of the concrete type's methods; we can't tell a priori which all of the concrete type's methods; we can't tell a priori which
ones may be called. ones may be called.
TypeAssert y = x.(T) is implemented by a dynamic filter triggered by TypeAssert y = x.(T) is implemented by a dynamic constraint
each tagged object E added to pts(x). If T is an interface that E.T triggered by each tagged object O added to pts(x): a typeFilter
implements, E is added to pts(y). If T is a concrete type then edge constraint if T is an interface type, or an untag constraint if T is
E.v -> pts(y) is added. a concrete type. A typeFilter tests whether O.typ implements T; if
so, O is added to pts(y). An untagFilter tests whether O.typ is
assignable to T,and if so, a copy edge O.v -> y is added.
ChangeInterface is a simple copy because the representation of ChangeInterface is a simple copy because the representation of
tagged objects is independent of the interface type (in contrast tagged objects is independent of the interface type (in contrast
to the "method tables" approach used by the gc runtime). to the "method tables" approach used by the gc runtime).
y := Invoke x.m(...) is implemented by allocating a contiguous P/R y := Invoke x.m(...) is implemented by allocating contiguous P/R
block for the callsite and adding a dynamic rule triggered by each blocks for the callsite and adding a dynamic rule triggered by each
tagged object E added to pts(x). The rule adds param/results copy tagged object added to pts(x). The rule adds param/results copy
edges to/from each discovered concrete method. edges to/from each discovered concrete method.
(Q. Why do we model an interface as a pointer to a pair of type and (Q. Why do we model an interface as a pointer to a pair of type and
@ -279,8 +356,7 @@ Interfaces
reflect.Value reflect.Value
A reflect.Value is modelled very similar to an interface{}, i.e. as A reflect.Value is modelled very similar to an interface{}, i.e. as
a pointer exclusively to tagged objects, but with two a pointer exclusively to tagged objects, but with two generalizations.
generalizations.
1) a reflect.Value that represents an lvalue points to an indirect 1) a reflect.Value that represents an lvalue points to an indirect
(obj.flags {otIndirect}) tagged object, which has a similar (obj.flags {otIndirect}) tagged object, which has a similar
@ -304,6 +380,8 @@ reflect.Value
corresponds to the user-visible dynamic type, and the existence corresponds to the user-visible dynamic type, and the existence
of a pointer is an implementation detail. of a pointer is an implementation detail.
(NB: indirect tagged objects are not yet implemented)
2) The dynamic type tag of a tagged object pointed to by a 2) The dynamic type tag of a tagged object pointed to by a
reflect.Value may be an interface type; it need not be concrete. reflect.Value may be an interface type; it need not be concrete.
@ -350,15 +428,15 @@ Structs
struct is a pointer to its identity node. That node allows us to struct is a pointer to its identity node. That node allows us to
distinguish a pointer to a struct from a pointer to its first field. distinguish a pointer to a struct from a pointer to its first field.
Field offsets are currently the logical field offsets (plus one for Field offsets are logical field offsets (plus one for the identity
the identity node), so the sizes of the fields can be ignored by the node), so the sizes of the fields can be ignored by the analysis.
analysis.
Sound treatment of unsafe.Pointer conversions (not yet implemented) (The identity node is non-traditional but enables the distiction
would require us to model memory layout using physical field offsets described above, which is valuable for code comprehension tools.
to ascertain which object field(s) might be aliased by a given Typical pointer analyses for C, whose purpose is compiler
FieldAddr of a different base pointer type. It would also require optimization, must soundly model unsafe.Pointer (void*) conversions,
us to dispense with the identity node. and this requires fidelity to the actual memory layout using physical
field offsets.)
*ssa.Field y = x.f creates a simple edge to y from x's node at f's offset. *ssa.Field y = x.f creates a simple edge to y from x's node at f's offset.
@ -403,10 +481,16 @@ FUNCTION CALLS
A static call consists three steps: A static call consists three steps:
- finding the function object of the callee; - finding the function object of the callee;
- creating copy edges from the actual parameter value nodes to the - creating copy edges from the actual parameter value nodes to the
params block in the function object (this includes the receiver P-block in the function object (this includes the receiver if
if the callee is a method); the callee is a method);
- creating copy edges from the results block in the function - creating copy edges from the R-block in the function object to
object to the value nodes for the result of the call. the value nodes for the result of the call.
A static function call is little more than two struct value copies
between the P/R blocks of caller and callee:
callee.P = caller.P
caller.R = callee.R
Context sensitivity Context sensitivity
@ -422,13 +506,12 @@ FUNCTION CALLS
Dynamic calls work in a similar manner except that the creation of Dynamic calls work in a similar manner except that the creation of
copy edges occurs dynamically, in a similar fashion to a pair of copy edges occurs dynamically, in a similar fashion to a pair of
struct copies: struct copies in which the callee is indirect:
*fn->params = callargs callee->P = caller.P
callresult = *fn->results caller.R = callee->R
(Recall that the function object's params and results blocks are (Recall that the function object's P- and R-blocks are contiguous.)
contiguous.)
Interface method invocation Interface method invocation
@ -436,7 +519,8 @@ FUNCTION CALLS
callsite and attach a dynamic closure rule to the interface. For callsite and attach a dynamic closure rule to the interface. For
each new tagged object that flows to the interface, we look up each new tagged object that flows to the interface, we look up
the concrete method, find its function object, and connect its P/R the concrete method, find its function object, and connect its P/R
block to the callsite's P/R block. blocks to the callsite's P/R blocks, adding copy edges to the graph
during solving.
Recording call targets Recording call targets
@ -451,14 +535,38 @@ FUNCTION CALLS
internally this just iterates all "targets" variables' pts(·)s. internally this just iterates all "targets" variables' pts(·)s.
PRESOLVER
We implement Hash-Value Numbering (HVN), a pre-solver constraint
optimization described in Hardekopf & Lin, SAS'07. This is documented
in more detail in hvn.go. We intend to add its cousins HR and HU in
future.
SOLVER SOLVER
The solver is currently a very naive Andersen-style implementation, The solver is currently a naive Andersen-style implementation; it does
although it does use difference propagation (Pearce et al, SQC'04). not perform online cycle detection, though we plan to add solver
There is much to do. optimisations such as Hybrid- and Lazy- Cycle Detection from (Hardekopf
& Lin, PLDI'07).
It uses difference propagation (Pearce et al, SQC'04) to avoid
redundant re-triggering of closure rules for values already seen.
Points-to sets are represented using sparse bit vectors (similar to
those used in LLVM and gcc), which are more space- and time-efficient
than sets based on Go's built-in map type or dense bit vectors.
Nodes are permuted prior to solving so that object nodes (which may
appear in points-to sets) are lower numbered than non-object (var)
nodes. This improves the density of the set over which the PTSs
range, and thus the efficiency of the representation.
Partly thanks to avoiding map iteration, the execution of the solver is
100% deterministic, a great help during debugging.
FURTHER READING. FURTHER READING
Andersen, L. O. 1994. Program analysis and specialization for the C Andersen, L. O. 1994. Program analysis and specialization for the C
programming language. Ph.D. dissertation. DIKU, University of programming language. Ph.D. dissertation. DIKU, University of
@ -492,5 +600,11 @@ international conference on Static Analysis (SAS'07), Hanne Riis
Nielson and Gilberto Filé (Eds.). Springer-Verlag, Berlin, Heidelberg, Nielson and Gilberto Filé (Eds.). Springer-Verlag, Berlin, Heidelberg,
265-280. 265-280.
Atanas Rountev and Satish Chandra. 2000. Off-line variable substitution
for scaling points-to analysis. In Proceedings of the ACM SIGPLAN 2000
conference on Programming language design and implementation (PLDI '00).
ACM, New York, NY, USA, 47-56. DOI=10.1145/349299.349310
http://doi.acm.org/10.1145/349299.349310
*/ */
package pointer package pointer

View File

@ -59,7 +59,7 @@ func (a *analysis) addNodes(typ types.Type, comment string) nodeid {
// //
func (a *analysis) addOneNode(typ types.Type, comment string, subelement *fieldInfo) nodeid { func (a *analysis) addOneNode(typ types.Type, comment string, subelement *fieldInfo) nodeid {
id := a.nextNode() id := a.nextNode()
a.nodes = append(a.nodes, &node{typ: typ, subelement: subelement}) a.nodes = append(a.nodes, &node{typ: typ, subelement: subelement, solve: new(solverState)})
if a.log != nil { if a.log != nil {
fmt.Fprintf(a.log, "\tcreate n%d %s for %s%s\n", fmt.Fprintf(a.log, "\tcreate n%d %s for %s%s\n",
id, typ, comment, subelement.path()) id, typ, comment, subelement.path())
@ -178,6 +178,9 @@ func (a *analysis) makeTagged(typ types.Type, cgn *cgnode, data interface{}) nod
// makeRtype returns the canonical tagged object of type *rtype whose // makeRtype returns the canonical tagged object of type *rtype whose
// payload points to the sole rtype object for T. // payload points to the sole rtype object for T.
//
// TODO(adonovan): move to reflect.go; it's part of the solver really.
//
func (a *analysis) makeRtype(T types.Type) nodeid { func (a *analysis) makeRtype(T types.Type) nodeid {
if v := a.rtypes.At(T); v != nil { if v := a.rtypes.At(T); v != nil {
return v.(nodeid) return v.(nodeid)
@ -261,7 +264,7 @@ func (a *analysis) taggedValue(obj nodeid) (tDyn types.Type, v nodeid, indirect
return n.typ, obj + 1, flags&otIndirect != 0 return n.typ, obj + 1, flags&otIndirect != 0
} }
// funcParams returns the first node of the params block of the // funcParams returns the first node of the params (P) block of the
// function whose object node (obj.flags&otFunction) is id. // function whose object node (obj.flags&otFunction) is id.
// //
func (a *analysis) funcParams(id nodeid) nodeid { func (a *analysis) funcParams(id nodeid) nodeid {
@ -272,7 +275,7 @@ func (a *analysis) funcParams(id nodeid) nodeid {
return id + 1 return id + 1
} }
// funcResults returns the first node of the results block of the // funcResults returns the first node of the results (R) block of the
// function whose object node (obj.flags&otFunction) is id. // function whose object node (obj.flags&otFunction) is id.
// //
func (a *analysis) funcResults(id nodeid) nodeid { func (a *analysis) funcResults(id nodeid) nodeid {
@ -662,9 +665,9 @@ func (a *analysis) genDynamicCall(caller *cgnode, site *callsite, call *ssa.Call
// pts(targets) will be the set of possible call targets. // pts(targets) will be the set of possible call targets.
site.targets = a.valueNode(call.Value) site.targets = a.valueNode(call.Value)
// We add dynamic closure rules that store the arguments into, // We add dynamic closure rules that store the arguments into
// and load the results from, the P/R block of each function // the P-block and load the results from the R-block of each
// discovered in pts(targets). // function discovered in pts(targets).
sig := call.Signature() sig := call.Signature()
var offset uint32 = 1 // P/R block starts at offset 1 var offset uint32 = 1 // P/R block starts at offset 1
@ -705,9 +708,9 @@ func (a *analysis) genInvoke(caller *cgnode, site *callsite, call *ssa.CallCommo
a.copy(result, r, a.sizeof(sig.Results())) a.copy(result, r, a.sizeof(sig.Results()))
} }
// We add a dynamic invoke constraint that will add // We add a dynamic invoke constraint that will connect the
// edges from the caller's P/R block to the callee's // caller's and the callee's P/R blocks for each discovered
// P/R block for each discovered call target. // call target.
a.addConstraint(&invokeConstraint{call.Method, a.valueNode(call.Value), block}) a.addConstraint(&invokeConstraint{call.Method, a.valueNode(call.Value), block})
} }
@ -749,7 +752,7 @@ func (a *analysis) genInvokeReflectType(caller *cgnode, site *callsite, call *ss
a.copy(params, rtype, 1) a.copy(params, rtype, 1)
params++ params++
// Copy actual parameters into formal params block. // Copy actual parameters into formal P-block.
// Must loop, since the actuals aren't contiguous. // Must loop, since the actuals aren't contiguous.
for i, arg := range call.Args { for i, arg := range call.Args {
sz := a.sizeof(sig.Params().At(i).Type()) sz := a.sizeof(sig.Params().At(i).Type())
@ -757,7 +760,7 @@ func (a *analysis) genInvokeReflectType(caller *cgnode, site *callsite, call *ss
params += nodeid(sz) params += nodeid(sz)
} }
// Copy formal results block to actual result. // Copy formal R-block to actual R-block.
if result != 0 { if result != 0 {
a.copy(result, a.funcResults(obj), a.sizeof(sig.Results())) a.copy(result, a.funcResults(obj), a.sizeof(sig.Results()))
} }
@ -790,6 +793,7 @@ func (a *analysis) genCall(caller *cgnode, instr ssa.CallInstruction) {
caller.sites = append(caller.sites, site) caller.sites = append(caller.sites, site)
if a.log != nil { if a.log != nil {
// TODO(adonovan): debug: improve log message.
fmt.Fprintf(a.log, "\t%s to targets %s from %s\n", site, site.targets, caller) fmt.Fprintf(a.log, "\t%s to targets %s from %s\n", site, site.targets, caller)
} }
} }
@ -863,7 +867,15 @@ func (a *analysis) objectNode(cgn *cgnode, v ssa.Value) nodeid {
obj = a.nextNode() obj = a.nextNode()
tmap := v.Type().Underlying().(*types.Map) tmap := v.Type().Underlying().(*types.Map)
a.addNodes(tmap.Key(), "makemap.key") a.addNodes(tmap.Key(), "makemap.key")
a.addNodes(tmap.Elem(), "makemap.value") elem := a.addNodes(tmap.Elem(), "makemap.value")
// To update the value field, MapUpdate
// generates store-with-offset constraints which
// the presolver can't model, so we must mark
// those nodes indirect.
for id, end := elem, elem+nodeid(a.sizeof(tmap.Elem())); id < end; id++ {
a.mapValues = append(a.mapValues, id)
}
a.endObject(obj, cgn, v) a.endObject(obj, cgn, v)
case *ssa.MakeInterface: case *ssa.MakeInterface:
@ -1140,8 +1152,7 @@ func (a *analysis) genFunc(cgn *cgnode) {
impl := a.findIntrinsic(fn) impl := a.findIntrinsic(fn)
if a.log != nil { if a.log != nil {
fmt.Fprintln(a.log) fmt.Fprintf(a.log, "\n\n==== Generating constraints for %s, %s\n", cgn, cgn.contour())
fmt.Fprintln(a.log)
// Hack: don't display body if intrinsic. // Hack: don't display body if intrinsic.
if impl != nil { if impl != nil {
@ -1187,6 +1198,7 @@ func (a *analysis) genFunc(cgn *cgnode) {
// Create value nodes for all value instructions // Create value nodes for all value instructions
// since SSA may contain forward references. // since SSA may contain forward references.
var space [10]*ssa.Value
for _, b := range fn.Blocks { for _, b := range fn.Blocks {
for _, instr := range b.Instrs { for _, instr := range b.Instrs {
switch instr := instr.(type) { switch instr := instr.(type) {
@ -1202,6 +1214,19 @@ func (a *analysis) genFunc(cgn *cgnode) {
id := a.addNodes(instr.Type(), comment) id := a.addNodes(instr.Type(), comment)
a.setValueNode(instr, id, cgn) a.setValueNode(instr, id, cgn)
} }
// Record all address-taken functions (for presolver).
rands := instr.Operands(space[:0])
if call, ok := instr.(ssa.CallInstruction); ok && !call.Common().IsInvoke() {
// Skip CallCommon.Value in "call" mode.
// TODO(adonovan): fix: relies on unspecified ordering. Specify it.
rands = rands[1:]
}
for _, rand := range rands {
if atf, ok := (*rand).(*ssa.Function); ok {
a.atFuncs[atf] = true
}
}
} }
} }
@ -1218,14 +1243,29 @@ func (a *analysis) genFunc(cgn *cgnode) {
// genMethodsOf generates nodes and constraints for all methods of type T. // genMethodsOf generates nodes and constraints for all methods of type T.
func (a *analysis) genMethodsOf(T types.Type) { func (a *analysis) genMethodsOf(T types.Type) {
itf := isInterface(T)
// TODO(adonovan): can we skip this entirely if itf is true?
// I think so, but the answer may depend on reflection.
mset := a.prog.MethodSets.MethodSet(T) mset := a.prog.MethodSets.MethodSet(T)
for i, n := 0, mset.Len(); i < n; i++ { for i, n := 0, mset.Len(); i < n; i++ {
a.valueNode(a.prog.Method(mset.At(i))) m := a.prog.Method(mset.At(i))
a.valueNode(m)
if !itf {
// Methods of concrete types are address-taken functions.
a.atFuncs[m] = true
}
} }
} }
// generate generates offline constraints for the entire program. // generate generates offline constraints for the entire program.
func (a *analysis) generate() { func (a *analysis) generate() {
start("Constraint generation")
if a.log != nil {
fmt.Fprintln(a.log, "==== Generating constraints")
}
// Create a dummy node since we use the nodeid 0 for // Create a dummy node since we use the nodeid 0 for
// non-pointerlike variables. // non-pointerlike variables.
a.addNodes(tInvalid, "(zero)") a.addNodes(tInvalid, "(zero)")
@ -1273,4 +1313,6 @@ func (a *analysis) generate() {
a.globalval = nil a.globalval = nil
a.localval = nil a.localval = nil
a.localobj = nil a.localobj = nil
stop("Constraint generation")
} }

968
go/pointer/hvn.go Normal file
View File

@ -0,0 +1,968 @@
package pointer
// This file implements Hash-Value Numbering (HVN), a pre-solver
// constraint optimization described in Hardekopf & Lin, SAS'07 (see
// doc.go) that analyses the graph topology to determine which sets of
// variables are "pointer equivalent" (PE), i.e. must have identical
// points-to sets in the solution.
//
// A separate ("offline") graph is constructed. Its nodes are those of
// the main-graph, plus an additional node *X for each pointer node X.
// With this graph we can reason about the unknown points-to set of
// dereferenced pointers. (We do not generalize this to represent
// unknown fields x->f, perhaps because such fields would be numerous,
// though it might be worth an experiment.)
//
// Nodes whose points-to relations are not entirely captured by the
// graph are marked as "indirect": the *X nodes, the parameters of
// address-taken functions (which includes all functions in method
// sets), or nodes updated by the solver rules for reflection, etc.
//
// All addr (y=&x) nodes are initially assigned a pointer-equivalence
// (PE) label equal to x's nodeid in the main graph. (These are the
// only PE labels that are less than len(a.nodes).)
//
// All offsetAddr (y=&x.f) constraints are initially assigned a PE
// label; such labels are memoized, keyed by (x, f), so that equivalent
// nodes y as assigned the same label.
//
// Then we process each strongly connected component (SCC) of the graph
// in topological order, assigning it a PE label based on the set P of
// PE labels that flow to it from its immediate dependencies.
//
// If any node in P is "indirect", the entire SCC is assigned a fresh PE
// label. Otherwise:
//
// |P|=0 if P is empty, all nodes in the SCC are non-pointers (e.g.
// uninitialized variables, or formal params of dead functions)
// and the SCC is assigned the PE label of zero.
//
// |P|=1 if P is a singleton, the SCC is assigned the same label as the
// sole element of P.
//
// |P|>1 if P contains multiple labels, a unique label representing P is
// invented and recorded in an hash table, so that other
// equivalent SCCs may also be assigned this label, akin to
// conventional hash-value numbering in a compiler.
//
// Finally, a renumbering is computed such that each node is replaced by
// the lowest-numbered node with the same PE label. All constraints are
// renumbered, and any resulting duplicates are eliminated.
//
// The only nodes that are not renumbered are the objects x in addr
// (y=&x) constraints, since the ids of these nodes (and fields derived
// from them via offsetAddr rules) are the elements of all points-to
// sets, so they must remain as they are if we want the same solution.
//
// The solverStates (node.solve) for nodes in the same equivalence class
// are linked together so that all nodes in the class have the same
// solution. This avoids the need to renumber nodeids buried in
// Queries, cgnodes, etc (like (*analysis).renumber() does) since only
// the solution is needed.
//
// The result of HVN is that the number of distinct nodes and
// constraints is reduced, but the solution is identical (almost---see
// CROSS-CHECK below). In particular, both linear and cyclic chains of
// copies are each replaced by a single node.
//
// Nodes and constraints created "online" (e.g. while solving reflection
// constraints) are not subject to this optimization.
//
// PERFORMANCE
//
// In two benchmarks (oracle and godoc), HVN eliminates about two thirds
// of nodes, the majority accounted for by non-pointers: nodes of
// non-pointer type, pointers that remain nil, formal parameters of dead
// functions, nodes of untracked types, etc. It also reduces the number
// of constraints, also by about two thirds, and the solving time by
// 30--42%, although we must pay about 15% for the running time of HVN
// itself. The benefit is greater for larger applications.
//
// There are many possible optimizations to improve the performance:
// * Use fewer than 1:1 onodes to main graph nodes: many of the onodes
// we create are not needed.
// * HU (HVN with Union---see paper): coalesce "union" peLabels when
// their expanded-out sets are equal.
// * HR (HVN with deReference---see paper): this will require that we
// apply HVN until fixed point, which may need more bookkeeping of the
// correspondance of main nodes to onodes.
// * Location Equivalence (see paper): have points-to sets contain not
// locations but location-equivalence class labels, each representing
// a set of locations.
// * HVN with field-sensitive ref: model each of the fields of a
// pointer-to-struct.
//
// CROSS-CHECK
//
// To verify the soundness of the optimization, when the
// debugHVNCrossCheck option is enabled, we run the solver twice, once
// before and once after running HVN, dumping the solution to disk, and
// then we compare the results. If they are not identical, the analysis
// panics.
//
// The solution dumped to disk includes only the N*N submatrix of the
// complete solution where N is the number of nodes after generation.
// In other words, we ignore pointer variables and objects created by
// the solver itself, since their numbering depends on the solver order,
// which is affected by the optimization. In any case, that's the only
// part the client cares about.
//
// The cross-check is too strict and may fail spuriously. Although the
// H&L paper describing HVN states that the solutions obtained should be
// identical, this is not the case in practice because HVN can collapse
// cycles involving *p even when pts(p)={}. Consider this example
// distilled from testdata/hello.go:
//
// var x T
// func f(p **T) {
// t0 = *p
// ...
// t1 = φ(t0, &x)
// *p = t1
// }
//
// If f is dead code, we get:
// unoptimized: pts(p)={} pts(t0)={} pts(t1)={&x}
// optimized: pts(p)={} pts(t0)=pts(t1)=pts(*p)={&x}
//
// It's hard to argue that this is a bug: the result is sound and the
// loss of precision is inconsequential---f is dead code, after all.
// But unfortunately it limits the usefulness of the cross-check since
// failures must be carefully analyzed. Ben Hardekopf suggests (in
// personal correspondence) some approaches to mitigating it:
//
// If there is a node with an HVN points-to set that is a superset
// of the NORM points-to set, then either it's a bug or it's a
// result of this issue. If it's a result of this issue, then in
// the offline constraint graph there should be a REF node inside
// some cycle that reaches this node, and in the NORM solution the
// pointer being dereferenced by that REF node should be the empty
// set. If that isn't true then this is a bug. If it is true, then
// you can further check that in the NORM solution the "extra"
// points-to info in the HVN solution does in fact come from that
// purported cycle (if it doesn't, then this is still a bug). If
// you're doing the further check then you'll need to do it for
// each "extra" points-to element in the HVN points-to set.
//
// There are probably ways to optimize these checks by taking
// advantage of graph properties. For example, extraneous points-to
// info will flow through the graph and end up in many
// nodes. Rather than checking every node with extra info, you
// could probably work out the "origin point" of the extra info and
// just check there. Note that the check in the first bullet is
// looking for soundness bugs, while the check in the second bullet
// is looking for precision bugs; depending on your needs, you may
// care more about one than the other.
//
// which we should evaluate. The cross-check is nonetheless invaluable
// for all but one of the programs in the pointer_test suite.
import (
"fmt"
"io"
"log"
"reflect"
"code.google.com/p/go.tools/container/intsets"
"code.google.com/p/go.tools/go/types"
)
// A peLabel is a pointer-equivalence label: two nodes with the same
// peLabel have identical points-to solutions.
//
// The numbers are allocated consecutively like so:
// 0 not a pointer
// 1..N-1 addrConstraints (equals the constraint's .src field, hence sparse)
// ... offsetAddr constraints
// ... SCCs (with indirect nodes or multiple inputs)
//
// Each PE label denotes a set of pointers containing a single addr, a
// single offsetAddr, or some set of other PE labels.
//
type peLabel int
type hvn struct {
a *analysis
N int // len(a.nodes) immediately after constraint generation
log io.Writer // (optional) log of HVN lemmas
onodes []*onode // nodes of the offline graph
label peLabel // the next available PE label
hvnLabel map[string]peLabel // hash-value numbering (PE label) for each set of onodeids
stack []onodeid // DFS stack
index int32 // next onode.index, from Tarjan's SCC algorithm
// For each distinct offsetAddrConstraint (src, offset) pair,
// offsetAddrLabels records a unique PE label >= N.
offsetAddrLabels map[offsetAddr]peLabel
}
// The index of an node in the offline graph.
// (Currently the first N align with the main nodes,
// but this may change with HRU.)
type onodeid uint32
// An onode is a node in the offline constraint graph.
// (Where ambiguous, members of analysis.nodes are referred to as
// "main graph" nodes.)
//
// Edges in the offline constraint graph (edges and implicit) point to
// the source, i.e. against the flow of values: they are dependencies.
// Implicit edges are used for SCC computation, but not for gathering
// incoming labels.
//
type onode struct {
rep onodeid // index of representative of SCC in offline constraint graph
edges intsets.Sparse // constraint edges X-->Y (this onode is X)
implicit intsets.Sparse // implicit edges *X-->*Y (this onode is X)
peLabels intsets.Sparse // set of peLabels are pointer-equivalent to this one
indirect bool // node has points-to relations not represented in graph
// Tarjan's SCC algorithm
index, lowlink int32 // Tarjan numbering
scc int32 // -ve => on stack; 0 => unvisited; +ve => node is root of a found SCC
}
type offsetAddr struct {
ptr nodeid
offset uint32
}
// nextLabel issues the next unused pointer-equivalence label.
func (h *hvn) nextLabel() peLabel {
h.label++
return h.label
}
// ref(X) returns the index of the onode for *X.
func (h *hvn) ref(id onodeid) onodeid {
return id + onodeid(len(h.a.nodes))
}
// hvn computes pointer-equivalence labels (peLabels) using the Hash-based
// Value Numbering (HVN) algorithm described in Hardekopf & Lin, SAS'07.
//
func (a *analysis) hvn() {
start("HVN")
if a.log != nil {
fmt.Fprintf(a.log, "\n\n==== Pointer equivalence optimization\n\n")
}
h := hvn{
a: a,
N: len(a.nodes),
log: a.log,
hvnLabel: make(map[string]peLabel),
offsetAddrLabels: make(map[offsetAddr]peLabel),
}
if h.log != nil {
fmt.Fprintf(h.log, "\nCreating offline graph nodes...\n")
}
// Create offline nodes. The first N nodes correspond to main
// graph nodes; the next N are their corresponding ref() nodes.
h.onodes = make([]*onode, 2*h.N)
for id := range a.nodes {
id := onodeid(id)
h.onodes[id] = &onode{}
h.onodes[h.ref(id)] = &onode{indirect: true}
}
// Each node initially represents just itself.
for id, o := range h.onodes {
o.rep = onodeid(id)
}
h.markIndirectNodes()
// Reserve the first N PE labels for addrConstraints.
h.label = peLabel(h.N)
// Add offline constraint edges.
if h.log != nil {
fmt.Fprintf(h.log, "\nAdding offline graph edges...\n")
}
for _, c := range a.constraints {
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "; %s\n", c)
}
c.presolve(&h)
}
// Find and collapse SCCs.
if h.log != nil {
fmt.Fprintf(h.log, "\nFinding SCCs...\n")
}
h.index = 1
for id, o := range h.onodes {
if id > 0 && o.index == 0 {
// Start depth-first search at each unvisited node.
h.visit(onodeid(id))
}
}
// Dump the solution
// (NB: somewhat redundant with logging from simplify().)
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\nPointer equivalences:\n")
for id, o := range h.onodes {
if id == 0 {
continue
}
if id == int(h.N) {
fmt.Fprintf(h.log, "---\n")
}
fmt.Fprintf(h.log, "o%d\t", id)
if o.rep != onodeid(id) {
fmt.Fprintf(h.log, "rep=o%d", o.rep)
} else {
fmt.Fprintf(h.log, "p%d", o.peLabels.Min())
if o.indirect {
fmt.Fprint(h.log, " indirect")
}
}
fmt.Fprintln(h.log)
}
}
// Simplify the main constraint graph
h.simplify()
a.showCounts()
stop("HVN")
}
// ---- constraint-specific rules ----
// dst := &src
func (c *addrConstraint) presolve(h *hvn) {
// Each object (src) is an initial PE label.
label := peLabel(c.src) // label < N
if debugHVNVerbose && h.log != nil {
// duplicate log messages are possible
fmt.Fprintf(h.log, "\tcreate p%d: {&n%d}\n", label, c.src)
}
odst := onodeid(c.dst)
osrc := onodeid(c.src)
// Assign dst this label.
h.onodes[odst].peLabels.Insert(int(label))
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\to%d has p%d\n", odst, label)
}
h.addImplicitEdge(h.ref(odst), osrc) // *dst ~~> src.
}
// dst = src
func (c *copyConstraint) presolve(h *hvn) {
odst := onodeid(c.dst)
osrc := onodeid(c.src)
h.addEdge(odst, osrc) // dst --> src
h.addImplicitEdge(h.ref(odst), h.ref(osrc)) // *dst ~~> *src
}
// dst = *src + offset
func (c *loadConstraint) presolve(h *hvn) {
odst := onodeid(c.dst)
osrc := onodeid(c.src)
if c.offset == 0 {
h.addEdge(odst, h.ref(osrc)) // dst --> *src
} else {
// We don't interpret load-with-offset, e.g. results
// of map value lookup, R-block of dynamic call, slice
// copy/append, reflection.
h.markIndirect(odst, "load with offset")
}
}
// *dst + offset = src
func (c *storeConstraint) presolve(h *hvn) {
odst := onodeid(c.dst)
osrc := onodeid(c.src)
if c.offset == 0 {
h.onodes[h.ref(odst)].edges.Insert(int(osrc)) // *dst --> src
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\to%d --> o%d\n", h.ref(odst), osrc)
}
} else {
// We don't interpret store-with-offset.
// See discussion of soundness at markIndirectNodes.
}
}
// dst = &src.offset
func (c *offsetAddrConstraint) presolve(h *hvn) {
// Give each distinct (addr, offset) pair a fresh PE label.
// The cache performs CSE, effectively.
key := offsetAddr{c.src, c.offset}
label, ok := h.offsetAddrLabels[key]
if !ok {
label = h.nextLabel()
h.offsetAddrLabels[key] = label
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\tcreate p%d: {&n%d.#%d}\n",
label, c.src, c.offset)
}
}
// Assign dst this label.
h.onodes[c.dst].peLabels.Insert(int(label))
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\to%d has p%d\n", c.dst, label)
}
}
// dst = src.(typ) where typ is an interface
func (c *typeFilterConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.dst), "typeFilter result")
}
// dst = src.(typ) where typ is concrete
func (c *untagConstraint) presolve(h *hvn) {
odst := onodeid(c.dst)
for end := odst + onodeid(h.a.sizeof(c.typ)); odst < end; odst++ {
h.markIndirect(odst, "untag result")
}
}
// dst = src.method(c.params...)
func (c *invokeConstraint) presolve(h *hvn) {
// All methods are address-taken functions, so
// their formal P-blocks were already marked indirect.
// Mark the caller's targets node as indirect.
sig := c.method.Type().(*types.Signature)
id := c.params
h.markIndirect(onodeid(c.params), "invoke targets node")
id++
id += nodeid(h.a.sizeof(sig.Params()))
// Mark the caller's R-block as indirect.
end := id + nodeid(h.a.sizeof(sig.Results()))
for id < end {
h.markIndirect(onodeid(id), "invoke R-block")
id++
}
}
// markIndirectNodes marks as indirect nodes whose points-to relations
// are not entirely captured by the offline graph, including:
//
// (a) All address-taken nodes (including the following nodes within
// the same object). This is described in the paper.
//
// The most subtle cause of indirect nodes is the generation of
// store-with-offset constraints since the offline graph doesn't
// represent them. A global audit of constraint generation reveals the
// following uses of store-with-offset:
//
// (b) genDynamicCall, for P-blocks of dynamically called functions,
// to which dynamic copy edges will be added to them during
// solving: from storeConstraint for standalone functions,
// and from invokeConstraint for methods.
// All such P-blocks must be marked indirect.
// (c) MakeUpdate, to update the value part of a map object.
// All MakeMap objects's value parts must be marked indirect.
// (d) copyElems, to update the destination array.
// All array elements must be marked indirect.
//
// Not all indirect marking happens here. ref() nodes are marked
// indirect at construction, and each constraint's presolve() method may
// mark additional nodes.
//
func (h *hvn) markIndirectNodes() {
// (a) all address-taken nodes, plus all nodes following them
// within the same object, since these may be indirectly
// stored or address-taken.
for _, c := range h.a.constraints {
if c, ok := c.(*addrConstraint); ok {
start := h.a.enclosingObj(c.src)
end := start + nodeid(h.a.nodes[start].obj.size)
for id := c.src; id < end; id++ {
h.markIndirect(onodeid(id), "A-T object")
}
}
}
// (b) P-blocks of all address-taken functions.
for id := 0; id < h.N; id++ {
obj := h.a.nodes[id].obj
// TODO(adonovan): opt: if obj.cgn.fn is a method and
// obj.cgn is not its shared contour, this is an
// "inlined" static method call. We needn't consider it
// address-taken since no invokeConstraint will affect it.
if obj != nil && obj.flags&otFunction != 0 && h.a.atFuncs[obj.cgn.fn] {
// address-taken function
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "n%d is address-taken: %s\n", id, obj.cgn.fn)
}
h.markIndirect(onodeid(id), "A-T func identity")
id++
sig := obj.cgn.fn.Signature
psize := h.a.sizeof(sig.Params())
if sig.Recv() != nil {
psize += h.a.sizeof(sig.Recv().Type())
}
for end := id + int(psize); id < end; id++ {
h.markIndirect(onodeid(id), "A-T func P-block")
}
id--
continue
}
}
// (c) all map objects' value fields.
for _, id := range h.a.mapValues {
h.markIndirect(onodeid(id), "makemap.value")
}
// (d) all array element objects.
// TODO(adonovan): opt: can we do better?
for id := 0; id < h.N; id++ {
// Identity node for an object of array type?
if tArray, ok := h.a.nodes[id].typ.(*types.Array); ok {
// Mark the array element nodes indirect.
// (Skip past the identity field.)
for _ = range h.a.flatten(tArray.Elem()) {
id++
h.markIndirect(onodeid(id), "array elem")
}
}
}
}
func (h *hvn) markIndirect(oid onodeid, comment string) {
h.onodes[oid].indirect = true
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\to%d is indirect: %s\n", oid, comment)
}
}
// Adds an edge dst-->src.
// Note the unusual convention: edges are dependency (contraflow) edges.
func (h *hvn) addEdge(odst, osrc onodeid) {
h.onodes[odst].edges.Insert(int(osrc))
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\to%d --> o%d\n", odst, osrc)
}
}
func (h *hvn) addImplicitEdge(odst, osrc onodeid) {
h.onodes[odst].implicit.Insert(int(osrc))
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\to%d ~~> o%d\n", odst, osrc)
}
}
// visit implements the depth-first search of Tarjan's SCC algorithm.
// Precondition: x is canonical.
func (h *hvn) visit(x onodeid) {
h.checkCanonical(x)
xo := h.onodes[x]
xo.index = h.index
xo.lowlink = h.index
h.index++
h.stack = append(h.stack, x) // push
assert(xo.scc == 0, "node revisited")
xo.scc = -1
var deps []int
deps = xo.edges.AppendTo(deps)
deps = xo.implicit.AppendTo(deps)
for _, y := range deps {
// Loop invariant: x is canonical.
y := h.find(onodeid(y))
if x == y {
continue // nodes already coalesced
}
xo := h.onodes[x]
yo := h.onodes[y]
switch {
case yo.scc > 0:
// y is already a collapsed SCC
case yo.scc < 0:
// y is on the stack, and thus in the current SCC.
if yo.index < xo.lowlink {
xo.lowlink = yo.index
}
default:
// y is unvisited; visit it now.
h.visit(y)
// Note: x and y are now non-canonical.
x = h.find(onodeid(x))
if yo.lowlink < xo.lowlink {
xo.lowlink = yo.lowlink
}
}
}
h.checkCanonical(x)
// Is x the root of an SCC?
if xo.lowlink == xo.index {
// Coalesce all nodes in the SCC.
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "scc o%d\n", x)
}
for {
// Pop y from stack.
i := len(h.stack) - 1
y := h.stack[i]
h.stack = h.stack[:i]
h.checkCanonical(x)
xo := h.onodes[x]
h.checkCanonical(y)
yo := h.onodes[y]
if xo == yo {
// SCC is complete.
xo.scc = 1
h.labelSCC(x)
break
}
h.coalesce(x, y)
}
}
}
// Precondition: x is canonical.
func (h *hvn) labelSCC(x onodeid) {
h.checkCanonical(x)
xo := h.onodes[x]
xpe := &xo.peLabels
// All indirect nodes get new labels.
if xo.indirect {
label := h.nextLabel()
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\tcreate p%d: indirect SCC\n", label)
fmt.Fprintf(h.log, "\to%d has p%d\n", x, label)
}
// Remove pre-labeling, in case a direct pre-labeled node was
// merged with an indirect one.
xpe.Clear()
xpe.Insert(int(label))
return
}
// Invariant: all peLabels sets are non-empty.
// Those that are logically empty contain zero as their sole element.
// No other sets contains zero.
// Find all labels coming in to the coalesced SCC node.
for _, y := range xo.edges.AppendTo(nil) {
y := h.find(onodeid(y))
if y == x {
continue // already coalesced
}
ype := &h.onodes[y].peLabels
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\tedge from o%d = %s\n", y, ype)
}
if ype.IsEmpty() {
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\tnode has no PE label\n")
}
}
assert(!ype.IsEmpty(), "incoming node has no PE label")
if ype.Has(0) {
// {0} represents a non-pointer.
assert(ype.Len() == 1, "PE set contains {0, ...}")
} else {
xpe.UnionWith(ype)
}
}
switch xpe.Len() {
case 0:
// SCC has no incoming non-zero PE labels: it is a non-pointer.
xpe.Insert(0)
case 1:
// already a singleton
default:
// SCC has multiple incoming non-zero PE labels.
// Find the canonical label representing this set.
// We use String() as a fingerprint consistent with Equals().
key := xpe.String()
label, ok := h.hvnLabel[key]
if !ok {
label = h.nextLabel()
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\tcreate p%d: union %s\n", label, xpe.String())
}
h.hvnLabel[key] = label
}
xpe.Clear()
xpe.Insert(int(label))
}
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\to%d has p%d\n", x, xpe.Min())
}
}
// coalesce combines two nodes in the offline constraint graph.
// Precondition: x and y are canonical.
func (h *hvn) coalesce(x, y onodeid) {
xo := h.onodes[x]
yo := h.onodes[y]
// x becomes y's canonical representative.
yo.rep = x
if debugHVNVerbose && h.log != nil {
fmt.Fprintf(h.log, "\tcoalesce o%d into o%d\n", y, x)
}
// x accumulates y's edges.
xo.edges.UnionWith(&yo.edges)
yo.edges.Clear()
// x accumulates y's implicit edges.
xo.implicit.UnionWith(&yo.implicit)
yo.implicit.Clear()
// x accumulates y's pointer-equivalence labels.
xo.peLabels.UnionWith(&yo.peLabels)
yo.peLabels.Clear()
// x accumulates y's indirect flag.
if yo.indirect {
xo.indirect = true
}
}
// simplify computes a degenerate renumbering of nodeids from the PE
// labels assigned by the hvn, and uses it to simplify the main
// constraint graph, eliminating non-pointer nodes and duplicate
// constraints.
//
func (h *hvn) simplify() {
// canon maps each peLabel to its canonical main node.
canon := make([]nodeid, h.label)
for i := range canon {
canon[i] = nodeid(h.N) // indicates "unset"
}
// mapping maps each main node index to the index of the canonical node.
mapping := make([]nodeid, len(h.a.nodes))
for id := range h.a.nodes {
id := nodeid(id)
if id == 0 {
canon[0] = 0
mapping[0] = 0
continue
}
oid := h.find(onodeid(id))
peLabels := &h.onodes[oid].peLabels
assert(peLabels.Len() == 1, "PE class is not a singleton")
label := peLabel(peLabels.Min())
canonId := canon[label]
if canonId == nodeid(h.N) {
// id becomes the representative of the PE label.
canonId = id
canon[label] = canonId
if h.a.log != nil {
fmt.Fprintf(h.a.log, "\tpts(n%d) is canonical : \t(%s)\n",
id, h.a.nodes[id].typ)
}
} else {
// Link the solver states for the two nodes.
assert(h.a.nodes[canonId].solve != nil, "missing solver state")
h.a.nodes[id].solve = h.a.nodes[canonId].solve
if h.a.log != nil {
// TODO(adonovan): debug: reorganize the log so it prints
// one line:
// pe y = x1, ..., xn
// for each canonical y. Requires allocation.
fmt.Fprintf(h.a.log, "\tpts(n%d) = pts(n%d) : %s\n",
id, canonId, h.a.nodes[id].typ)
}
}
mapping[id] = canonId
}
// Renumber the constraints, eliminate duplicates, and eliminate
// any containing non-pointers (n0).
addrs := make(map[addrConstraint]bool)
copys := make(map[copyConstraint]bool)
loads := make(map[loadConstraint]bool)
stores := make(map[storeConstraint]bool)
offsetAddrs := make(map[offsetAddrConstraint]bool)
untags := make(map[untagConstraint]bool)
typeFilters := make(map[typeFilterConstraint]bool)
invokes := make(map[invokeConstraint]bool)
nbefore := len(h.a.constraints)
cc := h.a.constraints[:0] // in-situ compaction
for _, c := range h.a.constraints {
// Renumber.
switch c := c.(type) {
case *addrConstraint:
// Don't renumber c.src since it is the label of
// an addressable object and will appear in PT sets.
c.dst = mapping[c.dst]
default:
c.renumber(mapping)
}
if c.ptr() == 0 {
continue // skip: constraint attached to non-pointer
}
var dup bool
switch c := c.(type) {
case *addrConstraint:
_, dup = addrs[*c]
addrs[*c] = true
case *copyConstraint:
if c.src == c.dst {
continue // skip degenerate copies
}
if c.src == 0 {
continue // skip copy from non-pointer
}
_, dup = copys[*c]
copys[*c] = true
case *loadConstraint:
if c.src == 0 {
continue // skip load from non-pointer
}
_, dup = loads[*c]
loads[*c] = true
case *storeConstraint:
if c.src == 0 {
continue // skip store from non-pointer
}
_, dup = stores[*c]
stores[*c] = true
case *offsetAddrConstraint:
if c.src == 0 {
continue // skip offset from non-pointer
}
_, dup = offsetAddrs[*c]
offsetAddrs[*c] = true
case *untagConstraint:
if c.src == 0 {
continue // skip untag of non-pointer
}
_, dup = untags[*c]
untags[*c] = true
case *typeFilterConstraint:
if c.src == 0 {
continue // skip filter of non-pointer
}
_, dup = typeFilters[*c]
typeFilters[*c] = true
case *invokeConstraint:
if c.params == 0 {
panic("non-pointer invoke.params")
}
if c.iface == 0 {
continue // skip invoke on non-pointer
}
_, dup = invokes[*c]
invokes[*c] = true
default:
// We don't bother de-duping advanced constraints
// (e.g. reflection) since they are uncommon.
// Eliminate constraints containing non-pointer nodeids.
//
// We use reflection to find the fields to avoid
// adding yet another method to constraint.
//
// TODO(adonovan): experiment with a constraint
// method that returns a slice of pointers to
// nodeids fields to enable uniform iteration;
// the renumber() method could be removed and
// implemented using the new one.
//
// TODO(adonovan): opt: this is unsound since
// some constraints still have an effect if one
// of the operands is zero: rVCall, rVMapIndex,
// rvSetMapIndex. Handle them specially.
rtNodeid := reflect.TypeOf(nodeid(0))
x := reflect.ValueOf(c).Elem()
for i, nf := 0, x.NumField(); i < nf; i++ {
f := x.Field(i)
if f.Type() == rtNodeid {
if f.Uint() == 0 {
dup = true // skip it
break
}
}
}
}
if dup {
continue // skip duplicates
}
cc = append(cc, c)
}
h.a.constraints = cc
log.Printf("#constraints: was %d, now %d\n", nbefore, len(h.a.constraints))
}
// find returns the canonical onodeid for x.
// (The onodes form a disjoint set forest.)
func (h *hvn) find(x onodeid) onodeid {
// TODO(adonovan): opt: this is a CPU hotspot. Try "union by rank".
xo := h.onodes[x]
rep := xo.rep
if rep != x {
rep = h.find(rep) // simple path compression
xo.rep = rep
}
return rep
}
func (h *hvn) checkCanonical(x onodeid) {
if debugHVN {
assert(x == h.find(x), "not canonical")
}
}
func assert(p bool, msg string) {
if debugHVN && !p {
panic("assertion failed: " + msg)
}
}

View File

@ -255,13 +255,15 @@ func ext۰NotYetImplemented(a *analysis, cgn *cgnode) {
// runtime.SetFinalizer(x, f) // runtime.SetFinalizer(x, f)
type runtimeSetFinalizerConstraint struct { type runtimeSetFinalizerConstraint struct {
targets nodeid targets nodeid // (indirect)
f nodeid // (ptr) f nodeid // (ptr)
x nodeid x nodeid
} }
func (c *runtimeSetFinalizerConstraint) ptr() nodeid { return c.f } func (c *runtimeSetFinalizerConstraint) ptr() nodeid { return c.f }
func (c *runtimeSetFinalizerConstraint) indirect(nodes []nodeid) []nodeid { return nodes } func (c *runtimeSetFinalizerConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.targets), "SetFinalizer.targets")
}
func (c *runtimeSetFinalizerConstraint) renumber(mapping []nodeid) { func (c *runtimeSetFinalizerConstraint) renumber(mapping []nodeid) {
c.targets = mapping[c.targets] c.targets = mapping[c.targets]
c.f = mapping[c.f] c.f = mapping[c.f]

View File

@ -4,17 +4,12 @@
package pointer package pointer
// This file defines the constraint optimiser ("pre-solver"). // This file implements renumbering, a pre-solver optimization to
// improve the efficiency of the solver's points-to set representation.
//
// TODO(adonovan): rename file "renumber.go"
import ( import "fmt"
"fmt"
)
func (a *analysis) optimize() {
a.renumber()
// TODO(adonovan): opt: PE (HVN, HRU), LE, etc.
}
// renumber permutes a.nodes so that all nodes within an addressable // renumber permutes a.nodes so that all nodes within an addressable
// object appear before all non-addressable nodes, maintaining the // object appear before all non-addressable nodes, maintaining the
@ -30,7 +25,14 @@ func (a *analysis) optimize() {
// NB: nodes added during solving (e.g. for reflection, SetFinalizer) // NB: nodes added during solving (e.g. for reflection, SetFinalizer)
// will be appended to the end. // will be appended to the end.
// //
// Renumbering makes the PTA log inscrutable. To aid debugging, later
// phases (e.g. HVN) must not rely on it having occurred.
//
func (a *analysis) renumber() { func (a *analysis) renumber() {
if a.log != nil {
fmt.Fprintf(a.log, "\n\n==== Renumbering\n\n")
}
N := nodeid(len(a.nodes)) N := nodeid(len(a.nodes))
newNodes := make([]*node, N, N) newNodes := make([]*node, N, N)
renumbering := make([]nodeid, N, N) // maps old to new renumbering := make([]nodeid, N, N) // maps old to new

View File

@ -44,7 +44,7 @@ var inputs = []string{
"testdata/fmtexcerpt.go", "testdata/fmtexcerpt.go",
"testdata/func.go", "testdata/func.go",
"testdata/funcreflect.go", "testdata/funcreflect.go",
"testdata/hello.go", "testdata/hello.go", // NB: causes spurious failure of HVN cross-check
"testdata/interfaces.go", "testdata/interfaces.go",
"testdata/mapreflect.go", "testdata/mapreflect.go",
"testdata/maps.go", "testdata/maps.go",
@ -287,6 +287,7 @@ func doOneInput(input, filename string) bool {
} }
var log bytes.Buffer var log bytes.Buffer
fmt.Fprintf(&log, "Input: %s\n", filename)
// Run the analysis. // Run the analysis.
config := &pointer.Config{ config := &pointer.Config{

View File

@ -35,7 +35,7 @@ func (c *untagConstraint) String() string {
} }
func (c *invokeConstraint) String() string { func (c *invokeConstraint) String() string {
return fmt.Sprintf("invoke n%d.%s(n%d ...)", c.iface, c.method.Name(), c.params+1) return fmt.Sprintf("invoke n%d.%s(n%d ...)", c.iface, c.method.Name(), c.params)
} }
func (n nodeid) String() string { func (n nodeid) String() string {

View File

@ -20,6 +20,9 @@ package pointer
// yet implemented) correspond to reflect.Values with // yet implemented) correspond to reflect.Values with
// reflect.flagAddr.] // reflect.flagAddr.]
// A picture would help too. // A picture would help too.
//
// TODO(adonovan): try factoring up the common parts of the majority of
// these constraints that are single input, single output.
import ( import (
"fmt" "fmt"
@ -160,7 +163,9 @@ type rVBytesConstraint struct {
} }
func (c *rVBytesConstraint) ptr() nodeid { return c.v } func (c *rVBytesConstraint) ptr() nodeid { return c.v }
func (c *rVBytesConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *rVBytesConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "rVBytes.result")
}
func (c *rVBytesConstraint) renumber(mapping []nodeid) { func (c *rVBytesConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v] c.v = mapping[c.v]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -205,7 +210,7 @@ func ext۰reflect۰Value۰Bytes(a *analysis, cgn *cgnode) {
// result = v.Call(in) // result = v.Call(in)
type rVCallConstraint struct { type rVCallConstraint struct {
cgn *cgnode cgn *cgnode
targets nodeid targets nodeid // (indirect)
v nodeid // (ptr) v nodeid // (ptr)
arg nodeid // = in[*] arg nodeid // = in[*]
result nodeid // (indirect) result nodeid // (indirect)
@ -213,13 +218,9 @@ type rVCallConstraint struct {
} }
func (c *rVCallConstraint) ptr() nodeid { return c.v } func (c *rVCallConstraint) ptr() nodeid { return c.v }
func (c *rVCallConstraint) indirect(nodes []nodeid) []nodeid { func (c *rVCallConstraint) presolve(h *hvn) {
nodes = append(nodes, c.result) h.markIndirect(onodeid(c.targets), "rVCall.targets")
// TODO(adonovan): we may be able to handle 'targets' out-of-band h.markIndirect(onodeid(c.result), "rVCall.result")
// so that all implementations indirect() return a single value.
// We can then dispense with the slice.
nodes = append(nodes, c.targets)
return nodes
} }
func (c *rVCallConstraint) renumber(mapping []nodeid) { func (c *rVCallConstraint) renumber(mapping []nodeid) {
c.targets = mapping[c.targets] c.targets = mapping[c.targets]
@ -364,7 +365,9 @@ type rVElemConstraint struct {
} }
func (c *rVElemConstraint) ptr() nodeid { return c.v } func (c *rVElemConstraint) ptr() nodeid { return c.v }
func (c *rVElemConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *rVElemConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "rVElem.result")
}
func (c *rVElemConstraint) renumber(mapping []nodeid) { func (c *rVElemConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v] c.v = mapping[c.v]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -427,7 +430,9 @@ type rVIndexConstraint struct {
} }
func (c *rVIndexConstraint) ptr() nodeid { return c.v } func (c *rVIndexConstraint) ptr() nodeid { return c.v }
func (c *rVIndexConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *rVIndexConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "rVIndex.result")
}
func (c *rVIndexConstraint) renumber(mapping []nodeid) { func (c *rVIndexConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v] c.v = mapping[c.v]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -489,7 +494,9 @@ type rVInterfaceConstraint struct {
} }
func (c *rVInterfaceConstraint) ptr() nodeid { return c.v } func (c *rVInterfaceConstraint) ptr() nodeid { return c.v }
func (c *rVInterfaceConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *rVInterfaceConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "rVInterface.result")
}
func (c *rVInterfaceConstraint) renumber(mapping []nodeid) { func (c *rVInterfaceConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v] c.v = mapping[c.v]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -542,7 +549,9 @@ type rVMapIndexConstraint struct {
} }
func (c *rVMapIndexConstraint) ptr() nodeid { return c.v } func (c *rVMapIndexConstraint) ptr() nodeid { return c.v }
func (c *rVMapIndexConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *rVMapIndexConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "rVMapIndex.result")
}
func (c *rVMapIndexConstraint) renumber(mapping []nodeid) { func (c *rVMapIndexConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v] c.v = mapping[c.v]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -596,7 +605,9 @@ type rVMapKeysConstraint struct {
} }
func (c *rVMapKeysConstraint) ptr() nodeid { return c.v } func (c *rVMapKeysConstraint) ptr() nodeid { return c.v }
func (c *rVMapKeysConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *rVMapKeysConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "rVMapKeys.result")
}
func (c *rVMapKeysConstraint) renumber(mapping []nodeid) { func (c *rVMapKeysConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v] c.v = mapping[c.v]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -650,7 +661,7 @@ func ext۰reflect۰Value۰MapKeys(a *analysis, cgn *cgnode) {
func ext۰reflect۰Value۰Method(a *analysis, cgn *cgnode) {} // TODO(adonovan) func ext۰reflect۰Value۰Method(a *analysis, cgn *cgnode) {} // TODO(adonovan)
func ext۰reflect۰Value۰MethodByName(a *analysis, cgn *cgnode) {} // TODO(adonovan) func ext۰reflect۰Value۰MethodByName(a *analysis, cgn *cgnode) {} // TODO(adonovan)
// ---------- func (Value).Recv(Value) ---------- // ---------- func (Value).Recv(Value) Value ----------
// result, _ = v.Recv() // result, _ = v.Recv()
type rVRecvConstraint struct { type rVRecvConstraint struct {
@ -660,7 +671,9 @@ type rVRecvConstraint struct {
} }
func (c *rVRecvConstraint) ptr() nodeid { return c.v } func (c *rVRecvConstraint) ptr() nodeid { return c.v }
func (c *rVRecvConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *rVRecvConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "rVRecv.result")
}
func (c *rVRecvConstraint) renumber(mapping []nodeid) { func (c *rVRecvConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v] c.v = mapping[c.v]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -715,7 +728,7 @@ type rVSendConstraint struct {
} }
func (c *rVSendConstraint) ptr() nodeid { return c.v } func (c *rVSendConstraint) ptr() nodeid { return c.v }
func (c *rVSendConstraint) indirect(nodes []nodeid) []nodeid { return nodes } func (c *rVSendConstraint) presolve(*hvn) {}
func (c *rVSendConstraint) renumber(mapping []nodeid) { func (c *rVSendConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v] c.v = mapping[c.v]
c.x = mapping[c.x] c.x = mapping[c.x]
@ -768,7 +781,7 @@ type rVSetBytesConstraint struct {
} }
func (c *rVSetBytesConstraint) ptr() nodeid { return c.v } func (c *rVSetBytesConstraint) ptr() nodeid { return c.v }
func (c *rVSetBytesConstraint) indirect(nodes []nodeid) []nodeid { return nodes } func (c *rVSetBytesConstraint) presolve(*hvn) {}
func (c *rVSetBytesConstraint) renumber(mapping []nodeid) { func (c *rVSetBytesConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v] c.v = mapping[c.v]
c.x = mapping[c.x] c.x = mapping[c.x]
@ -817,7 +830,7 @@ type rVSetMapIndexConstraint struct {
} }
func (c *rVSetMapIndexConstraint) ptr() nodeid { return c.v } func (c *rVSetMapIndexConstraint) ptr() nodeid { return c.v }
func (c *rVSetMapIndexConstraint) indirect(nodes []nodeid) []nodeid { return nodes } func (c *rVSetMapIndexConstraint) presolve(*hvn) {}
func (c *rVSetMapIndexConstraint) renumber(mapping []nodeid) { func (c *rVSetMapIndexConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v] c.v = mapping[c.v]
c.key = mapping[c.key] c.key = mapping[c.key]
@ -868,7 +881,7 @@ func ext۰reflect۰Value۰SetMapIndex(a *analysis, cgn *cgnode) {
func ext۰reflect۰Value۰SetPointer(a *analysis, cgn *cgnode) {} // TODO(adonovan) func ext۰reflect۰Value۰SetPointer(a *analysis, cgn *cgnode) {} // TODO(adonovan)
// ---------- func (Value).Slice(v Value, i, j int) ---------- // ---------- func (Value).Slice(v Value, i, j int) Value ----------
// result = v.Slice(_, _) // result = v.Slice(_, _)
type rVSliceConstraint struct { type rVSliceConstraint struct {
@ -878,7 +891,9 @@ type rVSliceConstraint struct {
} }
func (c *rVSliceConstraint) ptr() nodeid { return c.v } func (c *rVSliceConstraint) ptr() nodeid { return c.v }
func (c *rVSliceConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *rVSliceConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "rVSlice.result")
}
func (c *rVSliceConstraint) renumber(mapping []nodeid) { func (c *rVSliceConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v] c.v = mapping[c.v]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -957,7 +972,9 @@ type reflectChanOfConstraint struct {
} }
func (c *reflectChanOfConstraint) ptr() nodeid { return c.t } func (c *reflectChanOfConstraint) ptr() nodeid { return c.t }
func (c *reflectChanOfConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *reflectChanOfConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "reflectChanOf.result")
}
func (c *reflectChanOfConstraint) renumber(mapping []nodeid) { func (c *reflectChanOfConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t] c.t = mapping[c.t]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1029,7 +1046,9 @@ type reflectIndirectConstraint struct {
} }
func (c *reflectIndirectConstraint) ptr() nodeid { return c.v } func (c *reflectIndirectConstraint) ptr() nodeid { return c.v }
func (c *reflectIndirectConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *reflectIndirectConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "reflectIndirect.result")
}
func (c *reflectIndirectConstraint) renumber(mapping []nodeid) { func (c *reflectIndirectConstraint) renumber(mapping []nodeid) {
c.v = mapping[c.v] c.v = mapping[c.v]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1081,7 +1100,9 @@ type reflectMakeChanConstraint struct {
} }
func (c *reflectMakeChanConstraint) ptr() nodeid { return c.typ } func (c *reflectMakeChanConstraint) ptr() nodeid { return c.typ }
func (c *reflectMakeChanConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *reflectMakeChanConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "reflectMakeChan.result")
}
func (c *reflectMakeChanConstraint) renumber(mapping []nodeid) { func (c *reflectMakeChanConstraint) renumber(mapping []nodeid) {
c.typ = mapping[c.typ] c.typ = mapping[c.typ]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1139,7 +1160,9 @@ type reflectMakeMapConstraint struct {
} }
func (c *reflectMakeMapConstraint) ptr() nodeid { return c.typ } func (c *reflectMakeMapConstraint) ptr() nodeid { return c.typ }
func (c *reflectMakeMapConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *reflectMakeMapConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "reflectMakeMap.result")
}
func (c *reflectMakeMapConstraint) renumber(mapping []nodeid) { func (c *reflectMakeMapConstraint) renumber(mapping []nodeid) {
c.typ = mapping[c.typ] c.typ = mapping[c.typ]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1196,7 +1219,9 @@ type reflectMakeSliceConstraint struct {
} }
func (c *reflectMakeSliceConstraint) ptr() nodeid { return c.typ } func (c *reflectMakeSliceConstraint) ptr() nodeid { return c.typ }
func (c *reflectMakeSliceConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *reflectMakeSliceConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "reflectMakeSlice.result")
}
func (c *reflectMakeSliceConstraint) renumber(mapping []nodeid) { func (c *reflectMakeSliceConstraint) renumber(mapping []nodeid) {
c.typ = mapping[c.typ] c.typ = mapping[c.typ]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1253,7 +1278,9 @@ type reflectNewConstraint struct {
} }
func (c *reflectNewConstraint) ptr() nodeid { return c.typ } func (c *reflectNewConstraint) ptr() nodeid { return c.typ }
func (c *reflectNewConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *reflectNewConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "reflectNew.result")
}
func (c *reflectNewConstraint) renumber(mapping []nodeid) { func (c *reflectNewConstraint) renumber(mapping []nodeid) {
c.typ = mapping[c.typ] c.typ = mapping[c.typ]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1315,7 +1342,9 @@ type reflectPtrToConstraint struct {
} }
func (c *reflectPtrToConstraint) ptr() nodeid { return c.t } func (c *reflectPtrToConstraint) ptr() nodeid { return c.t }
func (c *reflectPtrToConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *reflectPtrToConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "reflectPtrTo.result")
}
func (c *reflectPtrToConstraint) renumber(mapping []nodeid) { func (c *reflectPtrToConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t] c.t = mapping[c.t]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1364,7 +1393,9 @@ type reflectSliceOfConstraint struct {
} }
func (c *reflectSliceOfConstraint) ptr() nodeid { return c.t } func (c *reflectSliceOfConstraint) ptr() nodeid { return c.t }
func (c *reflectSliceOfConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *reflectSliceOfConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "reflectSliceOf.result")
}
func (c *reflectSliceOfConstraint) renumber(mapping []nodeid) { func (c *reflectSliceOfConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t] c.t = mapping[c.t]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1411,7 +1442,9 @@ type reflectTypeOfConstraint struct {
} }
func (c *reflectTypeOfConstraint) ptr() nodeid { return c.i } func (c *reflectTypeOfConstraint) ptr() nodeid { return c.i }
func (c *reflectTypeOfConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *reflectTypeOfConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "reflectTypeOf.result")
}
func (c *reflectTypeOfConstraint) renumber(mapping []nodeid) { func (c *reflectTypeOfConstraint) renumber(mapping []nodeid) {
c.i = mapping[c.i] c.i = mapping[c.i]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1462,7 +1495,9 @@ type reflectZeroConstraint struct {
} }
func (c *reflectZeroConstraint) ptr() nodeid { return c.typ } func (c *reflectZeroConstraint) ptr() nodeid { return c.typ }
func (c *reflectZeroConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *reflectZeroConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "reflectZero.result")
}
func (c *reflectZeroConstraint) renumber(mapping []nodeid) { func (c *reflectZeroConstraint) renumber(mapping []nodeid) {
c.typ = mapping[c.typ] c.typ = mapping[c.typ]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1522,7 +1557,9 @@ type rtypeElemConstraint struct {
} }
func (c *rtypeElemConstraint) ptr() nodeid { return c.t } func (c *rtypeElemConstraint) ptr() nodeid { return c.t }
func (c *rtypeElemConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *rtypeElemConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "rtypeElem.result")
}
func (c *rtypeElemConstraint) renumber(mapping []nodeid) { func (c *rtypeElemConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t] c.t = mapping[c.t]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1573,7 +1610,9 @@ type rtypeFieldByNameConstraint struct {
} }
func (c *rtypeFieldByNameConstraint) ptr() nodeid { return c.t } func (c *rtypeFieldByNameConstraint) ptr() nodeid { return c.t }
func (c *rtypeFieldByNameConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *rtypeFieldByNameConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result+3), "rtypeFieldByName.result.Type")
}
func (c *rtypeFieldByNameConstraint) renumber(mapping []nodeid) { func (c *rtypeFieldByNameConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t] c.t = mapping[c.t]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1662,7 +1701,9 @@ type rtypeInOutConstraint struct {
} }
func (c *rtypeInOutConstraint) ptr() nodeid { return c.t } func (c *rtypeInOutConstraint) ptr() nodeid { return c.t }
func (c *rtypeInOutConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *rtypeInOutConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "rtypeInOut.result")
}
func (c *rtypeInOutConstraint) renumber(mapping []nodeid) { func (c *rtypeInOutConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t] c.t = mapping[c.t]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1737,7 +1778,9 @@ type rtypeKeyConstraint struct {
} }
func (c *rtypeKeyConstraint) ptr() nodeid { return c.t } func (c *rtypeKeyConstraint) ptr() nodeid { return c.t }
func (c *rtypeKeyConstraint) indirect(nodes []nodeid) []nodeid { return append(nodes, c.result) } func (c *rtypeKeyConstraint) presolve(h *hvn) {
h.markIndirect(onodeid(c.result), "rtypeKey.result")
}
func (c *rtypeKeyConstraint) renumber(mapping []nodeid) { func (c *rtypeKeyConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t] c.t = mapping[c.t]
c.result = mapping[c.result] c.result = mapping[c.result]
@ -1784,8 +1827,9 @@ type rtypeMethodByNameConstraint struct {
} }
func (c *rtypeMethodByNameConstraint) ptr() nodeid { return c.t } func (c *rtypeMethodByNameConstraint) ptr() nodeid { return c.t }
func (c *rtypeMethodByNameConstraint) indirect(nodes []nodeid) []nodeid { func (c *rtypeMethodByNameConstraint) presolve(h *hvn) {
return append(nodes, c.result) h.markIndirect(onodeid(c.result+3), "rtypeMethodByName.result.Type")
h.markIndirect(onodeid(c.result+4), "rtypeMethodByName.result.Func")
} }
func (c *rtypeMethodByNameConstraint) renumber(mapping []nodeid) { func (c *rtypeMethodByNameConstraint) renumber(mapping []nodeid) {
c.t = mapping[c.t] c.t = mapping[c.t]

View File

@ -13,15 +13,22 @@ import (
"code.google.com/p/go.tools/go/types" "code.google.com/p/go.tools/go/types"
) )
func (a *analysis) solve() { type solverState struct {
var delta nodeset complex []constraint // complex constraints attached to this node
copyTo nodeset // simple copy constraint edges
// Solver main loop. pts nodeset // points-to set of this node
for round := 1; ; round++ { prevPTS nodeset // pts(n) in previous iteration (for difference propagation)
if a.log != nil {
fmt.Fprintf(a.log, "Solving, round %d\n", round)
} }
func (a *analysis) solve() {
start("Solving")
if a.log != nil {
fmt.Fprintf(a.log, "\n\n==== Solving constraints\n\n")
}
// Solver main loop.
var delta nodeset
for {
// Add new constraints to the graph: // Add new constraints to the graph:
// static constraints from SSA on round 1, // static constraints from SSA on round 1,
// dynamic constraints from reflection thereafter. // dynamic constraints from reflection thereafter.
@ -39,22 +46,33 @@ func (a *analysis) solve() {
n := a.nodes[id] n := a.nodes[id]
// Difference propagation. // Difference propagation.
delta.Difference(&n.pts.Sparse, &n.prevPts.Sparse) delta.Difference(&n.solve.pts.Sparse, &n.solve.prevPTS.Sparse)
if delta.IsEmpty() { if delta.IsEmpty() {
continue continue
} }
n.prevPts.Copy(&n.pts.Sparse) if a.log != nil {
fmt.Fprintf(a.log, "\t\tpts(n%d : %s) = %s + %s\n",
id, n.typ, &delta, &n.solve.prevPTS)
}
n.solve.prevPTS.Copy(&n.solve.pts.Sparse)
// Apply all resolution rules attached to n. // Apply all resolution rules attached to n.
a.solveConstraints(n, &delta) a.solveConstraints(n, &delta)
if a.log != nil { if a.log != nil {
fmt.Fprintf(a.log, "\t\tpts(n%d) = %s\n", id, &n.pts) fmt.Fprintf(a.log, "\t\tpts(n%d) = %s\n", id, &n.solve.pts)
} }
} }
if !a.nodes[0].pts.IsEmpty() { if !a.nodes[0].solve.pts.IsEmpty() {
panic(fmt.Sprintf("pts(0) is nonempty: %s", &a.nodes[0].pts)) panic(fmt.Sprintf("pts(0) is nonempty: %s", &a.nodes[0].solve.pts))
}
// Release working state (but keep final PTS).
for _, n := range a.nodes {
n.solve.complex = nil
n.solve.copyTo.Clear()
n.solve.prevPTS.Clear()
} }
if a.log != nil { if a.log != nil {
@ -62,11 +80,12 @@ func (a *analysis) solve() {
// Dump solution. // Dump solution.
for i, n := range a.nodes { for i, n := range a.nodes {
if !n.pts.IsEmpty() { if !n.solve.pts.IsEmpty() {
fmt.Fprintf(a.log, "pts(n%d) = %s : %s\n", i, &n.pts, n.typ) fmt.Fprintf(a.log, "pts(n%d) = %s : %s\n", i, &n.solve.pts, n.typ)
} }
} }
} }
stop("Solving")
} }
// processNewConstraints takes the new constraints from a.constraints // processNewConstraints takes the new constraints from a.constraints
@ -84,13 +103,13 @@ func (a *analysis) processNewConstraints() {
for _, c := range constraints { for _, c := range constraints {
if c, ok := c.(*addrConstraint); ok { if c, ok := c.(*addrConstraint); ok {
dst := a.nodes[c.dst] dst := a.nodes[c.dst]
dst.pts.add(c.src) dst.solve.pts.add(c.src)
// Populate the worklist with nodes that point to // Populate the worklist with nodes that point to
// something initially (due to addrConstraints) and // something initially (due to addrConstraints) and
// have other constraints attached. // have other constraints attached.
// (A no-op in round 1.) // (A no-op in round 1.)
if !dst.copyTo.IsEmpty() || len(dst.complex) > 0 { if !dst.solve.copyTo.IsEmpty() || len(dst.solve.complex) > 0 {
a.addWork(c.dst) a.addWork(c.dst)
} }
} }
@ -107,16 +126,16 @@ func (a *analysis) processNewConstraints() {
case *copyConstraint: case *copyConstraint:
// simple (copy) constraint // simple (copy) constraint
id = c.src id = c.src
a.nodes[id].copyTo.add(c.dst) a.nodes[id].solve.copyTo.add(c.dst)
default: default:
// complex constraint // complex constraint
id = c.ptr() id = c.ptr()
ptr := a.nodes[id] solve := a.nodes[id].solve
ptr.complex = append(ptr.complex, c) solve.complex = append(solve.complex, c)
} }
if n := a.nodes[id]; !n.pts.IsEmpty() { if n := a.nodes[id]; !n.solve.pts.IsEmpty() {
if !n.prevPts.IsEmpty() { if !n.solve.prevPTS.IsEmpty() {
stale.add(id) stale.add(id)
} }
a.addWork(id) a.addWork(id)
@ -125,8 +144,8 @@ func (a *analysis) processNewConstraints() {
// Apply new constraints to pre-existing PTS labels. // Apply new constraints to pre-existing PTS labels.
var space [50]int var space [50]int
for _, id := range stale.AppendTo(space[:0]) { for _, id := range stale.AppendTo(space[:0]) {
n := a.nodes[id] n := a.nodes[nodeid(id)]
a.solveConstraints(n, &n.prevPts) a.solveConstraints(n, &n.solve.prevPTS)
} }
} }
@ -140,7 +159,7 @@ func (a *analysis) solveConstraints(n *node, delta *nodeset) {
} }
// Process complex constraints dependent on n. // Process complex constraints dependent on n.
for _, c := range n.complex { for _, c := range n.solve.complex {
if a.log != nil { if a.log != nil {
fmt.Fprintf(a.log, "\t\tconstraint %s\n", c) fmt.Fprintf(a.log, "\t\tconstraint %s\n", c)
} }
@ -149,10 +168,10 @@ func (a *analysis) solveConstraints(n *node, delta *nodeset) {
// Process copy constraints. // Process copy constraints.
var copySeen nodeset var copySeen nodeset
for _, x := range n.copyTo.AppendTo(a.deltaSpace) { for _, x := range n.solve.copyTo.AppendTo(a.deltaSpace) {
mid := nodeid(x) mid := nodeid(x)
if copySeen.add(mid) { if copySeen.add(mid) {
if a.nodes[mid].pts.addAll(delta) { if a.nodes[mid].solve.pts.addAll(delta) {
a.addWork(mid) a.addWork(mid)
} }
} }
@ -161,7 +180,11 @@ func (a *analysis) solveConstraints(n *node, delta *nodeset) {
// addLabel adds label to the points-to set of ptr and reports whether the set grew. // addLabel adds label to the points-to set of ptr and reports whether the set grew.
func (a *analysis) addLabel(ptr, label nodeid) bool { func (a *analysis) addLabel(ptr, label nodeid) bool {
return a.nodes[ptr].pts.add(label) b := a.nodes[ptr].solve.pts.add(label)
if b && a.log != nil {
fmt.Fprintf(a.log, "\t\tpts(n%d) += n%d\n", ptr, label)
}
return b
} }
func (a *analysis) addWork(id nodeid) { func (a *analysis) addWork(id nodeid) {
@ -180,7 +203,7 @@ func (a *analysis) addWork(id nodeid) {
// //
func (a *analysis) onlineCopy(dst, src nodeid) bool { func (a *analysis) onlineCopy(dst, src nodeid) bool {
if dst != src { if dst != src {
if nsrc := a.nodes[src]; nsrc.copyTo.add(dst) { if nsrc := a.nodes[src]; nsrc.solve.copyTo.add(dst) {
if a.log != nil { if a.log != nil {
fmt.Fprintf(a.log, "\t\t\tdynamic copy n%d <- n%d\n", dst, src) fmt.Fprintf(a.log, "\t\t\tdynamic copy n%d <- n%d\n", dst, src)
} }
@ -188,7 +211,7 @@ func (a *analysis) onlineCopy(dst, src nodeid) bool {
// are followed by addWork, possibly batched // are followed by addWork, possibly batched
// via a 'changed' flag; see if there's a // via a 'changed' flag; see if there's a
// noticeable penalty to calling addWork here. // noticeable penalty to calling addWork here.
return a.nodes[dst].pts.addAll(&nsrc.pts) return a.nodes[dst].solve.pts.addAll(&nsrc.solve.pts)
} }
} }
return false return false
@ -239,7 +262,7 @@ func (c *offsetAddrConstraint) solve(a *analysis, delta *nodeset) {
dst := a.nodes[c.dst] dst := a.nodes[c.dst]
for _, x := range delta.AppendTo(a.deltaSpace) { for _, x := range delta.AppendTo(a.deltaSpace) {
k := nodeid(x) k := nodeid(x)
if dst.pts.add(k + nodeid(c.offset)) { if dst.solve.pts.add(k + nodeid(c.offset)) {
a.addWork(c.dst) a.addWork(c.dst)
} }
} }

133
go/pointer/stdlib_test.go Normal file
View File

@ -0,0 +1,133 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pointer
// This file runs the pointer analysis on all packages and tests beneath
// $GOROOT. It provides a "smoke test" that the analysis doesn't crash
// on a large input, and a benchmark for performance measurement.
//
// Because it is relatively slow, the --stdlib flag must be enabled for
// this test to run:
// % go test -v code.google.com/p/go.tools/go/pointer --stdlib
import (
"flag"
"go/token"
"os"
"path/filepath"
"runtime"
"strings"
"testing"
"time"
"code.google.com/p/go.tools/go/loader"
"code.google.com/p/go.tools/go/ssa"
"code.google.com/p/go.tools/go/ssa/ssautil"
)
var runStdlibTest = flag.Bool("stdlib", false, "Run the (slow) stdlib test")
// TODO(adonovan): move this to go/buildutil package since we have four copies:
// go/{loader,pointer,ssa}/stdlib_test.go and godoc/analysis/analysis.go.
func allPackages() []string {
var pkgs []string
root := filepath.Join(runtime.GOROOT(), "src/pkg") + string(os.PathSeparator)
filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
// Prune the search if we encounter any of these names:
switch filepath.Base(path) {
case "testdata", ".hg":
return filepath.SkipDir
}
if info.IsDir() {
pkg := filepath.ToSlash(strings.TrimPrefix(path, root))
switch pkg {
case "builtin", "pkg":
return filepath.SkipDir // skip these subtrees
case "":
return nil // ignore root of tree
}
pkgs = append(pkgs, pkg)
}
return nil
})
return pkgs
}
func TestStdlib(t *testing.T) {
if !*runStdlibTest {
t.Skip("skipping (slow) stdlib test (use --stdlib)")
}
// Load, parse and type-check the program.
var conf loader.Config
conf.SourceImports = true
if _, err := conf.FromArgs(allPackages(), true); err != nil {
t.Errorf("FromArgs failed: %v", err)
return
}
iprog, err := conf.Load()
if err != nil {
t.Fatalf("Load failed: %v", err)
}
// Create SSA packages.
prog := ssa.Create(iprog, 0)
prog.BuildAll()
numPkgs := len(prog.AllPackages())
if want := 140; numPkgs < want {
t.Errorf("Loaded only %d packages, want at least %d", numPkgs, want)
}
// Determine the set of packages/tests to analyze.
var testPkgs []*ssa.Package
for _, info := range iprog.InitialPackages() {
testPkgs = append(testPkgs, prog.Package(info.Pkg))
}
testmain := prog.CreateTestMainPackage(testPkgs...)
if testmain == nil {
t.Fatal("analysis scope has tests")
}
// Run the analysis.
config := &Config{
Reflection: false, // TODO(adonovan): fix remaining bug in rVCallConstraint, then enable.
BuildCallGraph: true,
Mains: []*ssa.Package{testmain},
}
// TODO(adonovan): add some query values (affects track bits).
t0 := time.Now()
result, err := Analyze(config)
if err != nil {
t.Fatal(err) // internal error in pointer analysis
}
_ = result // TODO(adonovan): measure something
t1 := time.Now()
// Dump some statistics.
allFuncs := ssautil.AllFunctions(prog)
var numInstrs int
for fn := range allFuncs {
for _, b := range fn.Blocks {
numInstrs += len(b.Instrs)
}
}
// determine line count
var lineCount int
prog.Fset.Iterate(func(f *token.File) bool {
lineCount += f.LineCount()
return true
})
t.Log("#Source lines: ", lineCount)
t.Log("#Instructions: ", numInstrs)
t.Log("Pointer analysis: ", t1.Sub(t0))
}

View File

@ -68,6 +68,13 @@ func runtimeSetFinalizerIndirect() {
setFinalizer(x, final4) setFinalizer(x, final4)
} }
// Exercise the elimination of SetFinalizer
// constraints with non-pointer operands.
func runtimeSetFinalizerNonpointer() {
runtime.SetFinalizer(nil, (*T).finalize) // x is a non-pointer
runtime.SetFinalizer((*T).finalize, nil) // f is a non-pointer
}
// @calls main.runtimeSetFinalizerIndirect -> runtime.SetFinalizer // @calls main.runtimeSetFinalizerIndirect -> runtime.SetFinalizer
// @calls runtime.SetFinalizer -> main.final4 // @calls runtime.SetFinalizer -> main.final4
@ -76,6 +83,7 @@ func main() {
runtimeSetFinalizer2() runtimeSetFinalizer2()
runtimeSetFinalizer3() runtimeSetFinalizer3()
runtimeSetFinalizerIndirect() runtimeSetFinalizerIndirect()
runtimeSetFinalizerNonpointer()
} }
var unknown bool // defeat dead-code elimination var unknown bool // defeat dead-code elimination

View File

@ -7,6 +7,11 @@ package pointer
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"log"
"os"
"os/exec"
"runtime"
"time"
"code.google.com/p/go.tools/container/intsets" "code.google.com/p/go.tools/container/intsets"
"code.google.com/p/go.tools/go/types" "code.google.com/p/go.tools/go/types"
@ -220,7 +225,7 @@ func (a *analysis) shouldTrack(T types.Type) bool {
} }
a.trackTypes[T] = track a.trackTypes[T] = track
if !track && a.log != nil { if !track && a.log != nil {
fmt.Fprintf(a.log, "Type not tracked: %s\n", T) fmt.Fprintf(a.log, "\ttype not tracked: %s\n", T)
} }
} }
return track return track
@ -280,3 +285,34 @@ func (ns *nodeset) add(n nodeid) bool {
func (x *nodeset) addAll(y *nodeset) bool { func (x *nodeset) addAll(y *nodeset) bool {
return x.UnionWith(&y.Sparse) return x.UnionWith(&y.Sparse)
} }
// Profiling & debugging -------------------------------------------------------
var timers = make(map[string]time.Time)
func start(name string) {
if debugTimers {
timers[name] = time.Now()
log.Printf("%s...\n", name)
}
}
func stop(name string) {
if debugTimers {
log.Printf("%s took %s\n", name, time.Since(timers[name]))
}
}
// diff runs the command "diff a b" and reports its success.
func diff(a, b string) bool {
var cmd *exec.Cmd
switch runtime.GOOS {
case "plan9":
cmd = exec.Command("/bin/diff", "-c", a, b)
default:
cmd = exec.Command("/usr/bin/diff", "-u", a, b)
}
cmd.Stdout = os.Stderr
cmd.Stderr = os.Stderr
return cmd.Run() == nil
}