go.tools/pointer: make os.Args point to something.

Since the Go runtime treats it specially, so must the pointer analysis.

Details:
- Combine object.{val,typ} fields into 'data interface{}'.
  It may now hold a string, describing an instrinsically
  allocated object such as the command-line args.
- extend Label accordingly; add Label.ReflectType() accessor.

Also: document pointer analysis algorithm classification.

R=crawshaw
CC=golang-dev
https://golang.org/cl/14156043
This commit is contained in:
Alan Donovan 2013-10-01 09:46:33 -04:00
parent d20f86cc8e
commit ae060fe849
7 changed files with 104 additions and 40 deletions

View File

@ -34,6 +34,5 @@ API:
Think about them sooner rather than later. Think about them sooner rather than later.
MISC: MISC:
- os.Args should point to something; currently they don't.
- Test on all platforms. - Test on all platforms.
Currently we assume these go/build tags: linux, amd64, !cgo. Currently we assume these go/build tags: linux, amd64, !cgo.

View File

@ -39,17 +39,18 @@ type object struct {
// allocation. Zero for all other nodes. // allocation. Zero for all other nodes.
size uint32 size uint32
// The SSA operation that caused this object to be allocated. // data describes this object; it has one of these types:
// May be nil for (e.g.) intrinsic allocations. //
val ssa.Value // ssa.Value for an object allocated by an SSA operation.
// types.Type for an rtype instance object or *rtype-tagged object.
// string for an instrinsic object, e.g. the array behind os.Args.
// nil for an object allocated by an instrinsic.
// (cgn provides the identity of the intrinsic.)
data interface{}
// The call-graph node (=context) in which this object was allocated. // The call-graph node (=context) in which this object was allocated.
// May be nil for global objects: Global, Const, some Functions. // May be nil for global objects: Global, Const, some Functions.
cgn *cgnode cgn *cgnode
// If this is an rtype instance object, or a *rtype-tagged
// object, this is its type.
rtype types.Type
} }
// nodeid denotes a node. // nodeid denotes a node.

View File

@ -24,6 +24,36 @@ optimisatisions such as Hybrid- and Lazy- Cycle Detection from
(Hardekopf & Lin, PLDI'07), (Hardekopf & Lin, PLDI'07),
CLASSIFICATION
Our algorithm is INCLUSION-BASED: the points-to sets for x and y will
be related by pts(y) pts(x) if the program contains the statement
y = x.
It is FLOW-INSENSITIVE: it ignores all control flow constructs and the
order of statements in a program. It is therefore a "MAY ALIAS"
analysis: its facts are of the form "P may/may not point to L",
not "P must point to L".
It is FIELD-SENSITIVE: it builds separate points-to sets for distinct
fields, such as x and y in struct { x, y *int }.
It is mostly CONTEXT-INSENSITIVE: most functions are analyzed once,
so values can flow in at one call to the function and return out at
another. Only some smaller functions are analyzed with consideration
to their calling context.
It has a CONTEXT-SENSITIVE HEAP: objects are named by both allocation
site and context, so the objects returned by two distinct calls to f:
func f() *T { return new(T) }
are distinguished up to the limits of the calling context.
It is a WHOLE PROGRAM analysis: it requires SSA-form IR for the
complete Go program and summaries for native code.
See the (Hind, PASTE'01) survey paper for an explanation of these terms.
TERMINOLOGY TERMINOLOGY
We occasionally use C's x->f notation to distinguish the case where x We occasionally use C's x->f notation to distinguish the case where x

View File

@ -95,9 +95,9 @@ func (a *analysis) setValueNode(v ssa.Value, id nodeid, cgn *cgnode) {
// a single object allocation. // a single object allocation.
// //
// obj is the start node of the object, from a prior call to nextNode. // obj is the start node of the object, from a prior call to nextNode.
// Its size, flags and (optionally) data will be updated. // Its size, flags and optional data will be updated.
// //
func (a *analysis) endObject(obj nodeid, cgn *cgnode, val ssa.Value) *object { func (a *analysis) endObject(obj nodeid, cgn *cgnode, data interface{}) *object {
// Ensure object is non-empty by padding; // Ensure object is non-empty by padding;
// the pad will be the object node. // the pad will be the object node.
size := uint32(a.nextNode() - obj) size := uint32(a.nextNode() - obj)
@ -108,12 +108,9 @@ func (a *analysis) endObject(obj nodeid, cgn *cgnode, val ssa.Value) *object {
o := &object{ o := &object{
size: size, // excludes padding size: size, // excludes padding
cgn: cgn, cgn: cgn,
val: val, data: data,
} }
objNode.obj = o objNode.obj = o
if val != nil && a.log != nil {
fmt.Fprintf(a.log, "\tobj[%s] = n%d\n", val, obj)
}
return o return o
} }
@ -150,10 +147,10 @@ func (a *analysis) makeFunctionObject(fn *ssa.Function) nodeid {
} }
// makeTagged creates a tagged object of type typ. // makeTagged creates a tagged object of type typ.
func (a *analysis) makeTagged(typ types.Type, cgn *cgnode, val ssa.Value) nodeid { func (a *analysis) makeTagged(typ types.Type, cgn *cgnode, data interface{}) nodeid {
obj := a.addOneNode(typ, "tagged.T", nil) // NB: type may be non-scalar! obj := a.addOneNode(typ, "tagged.T", nil) // NB: type may be non-scalar!
a.addNodes(typ, "tagged.v") a.addNodes(typ, "tagged.v")
a.endObject(obj, cgn, val).flags |= otTagged a.endObject(obj, cgn, data).flags |= otTagged
return obj return obj
} }
@ -168,10 +165,9 @@ func (a *analysis) makeRtype(T types.Type) nodeid {
// ordinarily a large struct but here a single node will do. // ordinarily a large struct but here a single node will do.
obj := a.nextNode() obj := a.nextNode()
a.addOneNode(T, "reflect.rtype", nil) a.addOneNode(T, "reflect.rtype", nil)
a.endObject(obj, nil, nil).rtype = T a.endObject(obj, nil, T)
id := a.makeTagged(a.reflectRtypePtr, nil, nil) id := a.makeTagged(a.reflectRtypePtr, nil, T)
a.nodes[id].obj.rtype = T
a.nodes[id+1].typ = T // trick (each *rtype tagged object is a singleton) a.nodes[id+1].typ = T // trick (each *rtype tagged object is a singleton)
a.addressOf(id+1, obj) a.addressOf(id+1, obj)
@ -809,6 +805,10 @@ func (a *analysis) objectNode(cgn *cgnode, v ssa.Value) nodeid {
// For now, Captures have the same cardinality as globals. // For now, Captures have the same cardinality as globals.
// TODO(adonovan): treat captures context-sensitively. // TODO(adonovan): treat captures context-sensitively.
} }
if a.log != nil {
fmt.Fprintf(a.log, "\tglobalobj[%s] = n%d\n", a.nodes[obj].obj, obj)
}
a.globalobj[v] = obj a.globalobj[v] = obj
} }
return obj return obj
@ -874,6 +874,10 @@ func (a *analysis) objectNode(cgn *cgnode, v ssa.Value) nodeid {
// - unsafe.Pointer->*T conversion acts like Alloc // - unsafe.Pointer->*T conversion acts like Alloc
// - string->[]byte/[]rune conversion acts like MakeSlice // - string->[]byte/[]rune conversion acts like MakeSlice
} }
if a.log != nil {
fmt.Fprintf(a.log, "\tlocalobj[%s] = n%d\n", a.nodes[obj].obj, obj)
}
a.localobj[v] = obj a.localobj[v] = obj
} }
return obj return obj
@ -1222,5 +1226,13 @@ func (a *analysis) generate() *cgnode {
a.genFunc(cgn) a.genFunc(cgn)
} }
// The runtime magically allocates os.Args; so should we.
if os := a.prog.ImportedPackage("os"); os != nil {
// In effect: os.Args = new([1]string)[:]
obj := a.addNodes(types.NewArray(types.Typ[types.String], 1), "<command-line args>")
a.endObject(obj, nil, "<command-line args>")
a.addressOf(a.objectNode(nil, os.Var("Args")), obj)
}
return root return root
} }

View File

@ -25,6 +25,7 @@ import (
// - stack- and heap-allocated variables (including composite literals) // - stack- and heap-allocated variables (including composite literals)
// - channels, maps and arrays created by make() // - channels, maps and arrays created by make()
// - instrinsic or reflective operations that allocate (e.g. append, reflect.New) // - instrinsic or reflective operations that allocate (e.g. append, reflect.New)
// - instrinsic objects, e.g. the initial array behind os.Args.
// - and their subelements, e.g. "alloc.y[*].z" // - and their subelements, e.g. "alloc.y[*].z"
// //
// Labels are so varied that they defy good generalizations; // Labels are so varied that they defy good generalizations;
@ -32,16 +33,25 @@ import (
// Many objects have types that are inexpressible in Go: // Many objects have types that are inexpressible in Go:
// maps, channels, functions, tagged objects. // maps, channels, functions, tagged objects.
// //
// At most one of Value() or ReflectType() may return non-nil.
//
type Label struct { type Label struct {
obj *object // the addressable memory location containing this label obj *object // the addressable memory location containing this label
subelement *fieldInfo // subelement path within obj, e.g. ".a.b[*].c" subelement *fieldInfo // subelement path within obj, e.g. ".a.b[*].c"
} }
// Value returns the ssa.Value that allocated this label's object, // Value returns the ssa.Value that allocated this label's object, if any.
// or nil if it was allocated by an intrinsic.
//
func (l Label) Value() ssa.Value { func (l Label) Value() ssa.Value {
return l.obj.val val, _ := l.obj.data.(ssa.Value)
return val
}
// ReflectType returns the type represented by this label if it is an
// reflect.rtype instance object or *reflect.rtype-tagged object.
//
func (l Label) ReflectType() types.Type {
rtype, _ := l.obj.data.(types.Type)
return rtype
} }
// Context returns the analytic context in which this label's object was allocated, // Context returns the analytic context in which this label's object was allocated,
@ -60,11 +70,11 @@ func (l Label) Path() string {
// Pos returns the position of this label, if known, zero otherwise. // Pos returns the position of this label, if known, zero otherwise.
func (l Label) Pos() token.Pos { func (l Label) Pos() token.Pos {
if v := l.Value(); v != nil { switch data := l.obj.data.(type) {
return v.Pos() case ssa.Value:
} return data.Pos()
if l.obj.rtype != nil { case types.Type:
if nt, ok := deref(l.obj.rtype).(*types.Named); ok { if nt, ok := deref(data).(*types.Named); ok {
return nt.Obj().Pos() return nt.Obj().Pos()
} }
} }
@ -84,6 +94,7 @@ func (l Label) Pos() token.Pos {
// makeinterface (tagged object allocated by makeinterface) // makeinterface (tagged object allocated by makeinterface)
// <alloc in reflect.Zero> (allocation in instrinsic) // <alloc in reflect.Zero> (allocation in instrinsic)
// sync.Mutex (a reflect.rtype instance) // sync.Mutex (a reflect.rtype instance)
// <command-line arguments> (an instrinsic object)
// //
// Labels within compound objects have subelement paths: // Labels within compound objects have subelement paths:
// x.y[*].z (a struct variable, x) // x.y[*].z (a struct variable, x)
@ -92,18 +103,25 @@ func (l Label) Pos() token.Pos {
// //
func (l Label) String() string { func (l Label) String() string {
var s string var s string
switch v := l.obj.val.(type) { switch v := l.obj.data.(type) {
case types.Type:
return v.String()
case string:
s = v // an intrinsic object (e.g. os.Args[*])
case nil: case nil:
if l.obj.rtype != nil {
return l.obj.rtype.String()
}
if l.obj.cgn != nil { if l.obj.cgn != nil {
// allocation by intrinsic or reflective operation // allocation by intrinsic or reflective operation
return fmt.Sprintf("<alloc in %s>", l.obj.cgn.Func()) s = fmt.Sprintf("<alloc in %s>", l.obj.cgn.Func())
} else {
s = "<unknown>" // should be unreachable
} }
return "<unknown>" // should be unreachable
case *ssa.Function, *ssa.Global: case *ssa.Function:
s = v.String()
case *ssa.Global:
s = v.String() s = v.String()
case *ssa.Const: case *ssa.Const:
@ -132,7 +150,7 @@ func (l Label) String() string {
s = "makeinterface:" + v.X.Type().String() s = "makeinterface:" + v.X.Type().String()
default: default:
panic(fmt.Sprintf("unhandled Label.val type: %T", v)) panic(fmt.Sprintf("unhandled object data type: %T", v))
} }
return s + l.subelement.path() return s + l.subelement.path()

View File

@ -751,7 +751,7 @@ func (c *rtypeElemConstraint) solve(a *analysis, _ *node, delta nodeset) {
} }
changed := false changed := false
for tObj := range delta { for tObj := range delta {
T := a.nodes[tObj].obj.rtype T := a.nodes[tObj].obj.data.(types.Type)
if tHasElem, ok := T.Underlying().(hasElem); ok { if tHasElem, ok := T.Underlying().(hasElem); ok {
if a.addLabel(c.result, a.makeRtype(tHasElem.Elem())) { if a.addLabel(c.result, a.makeRtype(tHasElem.Elem())) {
changed = true changed = true
@ -797,7 +797,7 @@ func (c *rtypeInOutConstraint) ptr() nodeid {
func (c *rtypeInOutConstraint) solve(a *analysis, _ *node, delta nodeset) { func (c *rtypeInOutConstraint) solve(a *analysis, _ *node, delta nodeset) {
changed := false changed := false
for tObj := range delta { for tObj := range delta {
T := a.nodes[tObj].obj.rtype T := a.nodes[tObj].obj.data.(types.Type)
sig, ok := T.Underlying().(*types.Signature) sig, ok := T.Underlying().(*types.Signature)
if !ok { if !ok {
continue // not a func type continue // not a func type
@ -861,7 +861,7 @@ func (c *rtypeKeyConstraint) ptr() nodeid {
func (c *rtypeKeyConstraint) solve(a *analysis, _ *node, delta nodeset) { func (c *rtypeKeyConstraint) solve(a *analysis, _ *node, delta nodeset) {
changed := false changed := false
for tObj := range delta { for tObj := range delta {
T := a.nodes[tObj].obj.rtype T := a.nodes[tObj].obj.data.(types.Type)
if tMap, ok := T.Underlying().(*types.Map); ok { if tMap, ok := T.Underlying().(*types.Map); ok {
if a.addLabel(c.result, a.makeRtype(tMap.Key())) { if a.addLabel(c.result, a.makeRtype(tMap.Key())) {
changed = true changed = true

View File

@ -2,7 +2,10 @@
package main package main
import "fmt" import (
"fmt"
"os"
)
type S int type S int
@ -14,6 +17,7 @@ func (s *S) String() string {
} }
func main() { func main() {
print(os.Args) // @pointsto <command-line args>
fmt.Println("Hello, World!", &theS) fmt.Println("Hello, World!", &theS)
} }