diff --git a/dashboard/coordinator/main.go b/dashboard/coordinator/main.go index 40eb9058..3a883004 100644 --- a/dashboard/coordinator/main.go +++ b/dashboard/coordinator/main.go @@ -8,10 +8,13 @@ package main // import "golang.org/x/tools/dashboard/coordinator" import ( + "archive/tar" "bytes" + "compress/gzip" "crypto/hmac" "crypto/md5" "encoding/json" + "errors" "flag" "fmt" "html" @@ -19,8 +22,11 @@ import ( "io/ioutil" "log" "net/http" + "net/url" "os" "os/exec" + "path" + "regexp" "sort" "strconv" "strings" @@ -34,8 +40,8 @@ import ( ) var ( - masterKeyFile = flag.String("masterkey", "", "Path to builder master key. Else fetched using GCE project attribute 'builder-master-key'.") - maxBuilds = flag.Int("maxbuilds", 6, "Max concurrent builds") + masterKeyFile = flag.String("masterkey", "", "Path to builder master key. Else fetched using GCE project attribute 'builder-master-key'.") + maxLocalBuilds = flag.Int("maxbuilds", 6, "Max concurrent Docker builds (VM builds don't count)") cleanZones = flag.String("zones", "us-central1-a,us-central1-b,us-central1-f", "Comma-separated list of zones to periodically clean of stale build VMs (ones that failed to shut themselves down)") @@ -47,8 +53,8 @@ var ( var ( startTime = time.Now() - builders = map[string]buildConfig{} // populated once at startup - watchers = map[string]watchConfig{} // populated once at startup + builders = map[string]buildConfig{} // populated at startup, keys like "openbsd-amd64-56" + watchers = map[string]watchConfig{} // populated at startup, keyed by repo, e.g. "https://go.googlesource.com/go" donec = make(chan builderRev) // reports of finished builders statusMu sync.Mutex // guards both status (ongoing ones) and statusDone (just finished) @@ -58,6 +64,37 @@ var ( const maxStatusDone = 30 +// Initialized by initGCE: +var ( + projectID string + projectZone string + computeService *compute.Service +) + +func initGCE() error { + if !metadata.OnGCE() { + return errors.New("not running on GCE; VM support disabled") + } + var err error + projectID, err = metadata.ProjectID() + if err != nil { + return fmt.Errorf("failed to get current GCE ProjectID: %v", err) + } + projectZone, err = metadata.Get("instance/zone") + if err != nil || projectZone == "" { + return fmt.Errorf("failed to get current GCE zone: %v", err) + } + // Convert the zone from "projects/1234/zones/us-central1-a" to "us-central1-a". + projectZone = path.Base(projectZone) + if !hasComputeScope() { + return errors.New("The coordinator is not running with access to read and write Compute resources. VM support disabled.") + + } + ts := google.ComputeTokenSource("default") + computeService, _ = compute.New(oauth2.NewClient(oauth2.NoContext, ts)) + return nil +} + type imageInfo struct { url string // of tar file @@ -74,8 +111,15 @@ var images = map[string]*imageInfo{ "gobuilders/linux-x86-sid": {url: "https://storage.googleapis.com/go-builder-data/docker-linux.sid.tar.gz"}, } +// A buildConfig describes how to run either a Docker-based or VM-based build. type buildConfig struct { - name string // "linux-amd64-race" + name string // "linux-amd64-race" + + // VM-specific settings: (used if vmImage != "") + vmImage string // e.g. "openbsd-amd64-56" + machineType string // optional GCE instance type + + // Docker-specific settings: (used if vmImage == "") image string // Docker image to use to build cmd string // optional -cmd flag (relative to go/src/) env []string // extra environment ("key=value") pairs @@ -83,6 +127,31 @@ type buildConfig struct { tool string // the tool this configuration is for } +func (c *buildConfig) usesDocker() bool { return c.vmImage == "" } +func (c *buildConfig) usesVM() bool { return c.vmImage != "" } + +func (c *buildConfig) MachineType() string { + if v := c.machineType; v != "" { + return v + } + return "n1-highcpu-4" +} + +// recordResult sends build results to the dashboard +func (b *buildConfig) recordResult(ok bool, hash, buildLog string, runTime time.Duration) error { + req := map[string]interface{}{ + "Builder": b.name, + "PackagePath": "", + "Hash": hash, + "GoHash": "", + "OK": ok, + "Log": buildLog, + "RunTime": runTime, + } + args := url.Values{"key": {builderKey(b.name)}, "builder": {b.name}} + return dash("POST", "result", args, req, nil) +} + type watchConfig struct { repo string // "https://go.googlesource.com/go" dash string // "https://build.golang.org/" (must end in /) @@ -91,6 +160,11 @@ type watchConfig struct { func main() { flag.Parse() + + if err := initGCE(); err != nil { + log.Printf("VM support disabled due to error initializing GCE: %v", err) + } + addBuilder(buildConfig{name: "linux-386"}) addBuilder(buildConfig{name: "linux-386-387", env: []string{"GO386=387"}}) addBuilder(buildConfig{name: "linux-amd64"}) @@ -118,6 +192,10 @@ func main() { addBuilder(buildConfig{name: "linux-386-clang", image: "gobuilders/linux-x86-clang"}) addBuilder(buildConfig{name: "linux-amd64-clang", image: "gobuilders/linux-x86-clang"}) + // VMs: + // addBuilder(buildConfig{name: "openbsd-amd64-gce56", vmImage: "openbsd-amd64-56"}) + // addBuilder(buildConfig{name: "plan9-386-gce", vmImage: "plan9-386"}) + addWatcher(watchConfig{repo: "https://go.googlesource.com/go", dash: "https://build.golang.org/"}) // TODO(adg,cmang): fix gccgo watcher // addWatcher(watchConfig{repo: "https://code.google.com/p/gofrontend", dash: "https://build.golang.org/gccgo/"}) @@ -146,6 +224,7 @@ func main() { go cleanUpOldContainers() go cleanUpOldVMs() + stopWatchers() // clean up before we start new ones for _, watcher := range watchers { if err := startWatching(watchers[watcher.repo]); err != nil { log.Printf("Error starting watcher for %s: %v", watcher.repo, err) @@ -161,17 +240,11 @@ func main() { for { select { case work := <-workc: - log.Printf("workc received %+v; len(status) = %v, maxBuilds = %v; cur = %p", work, len(status), *maxBuilds, status[work]) - mayBuild := mayBuildRev(work) - if mayBuild { - if numBuilds() > *maxBuilds { - mayBuild = false - } - } - if mayBuild { - if st, err := startBuilding(builders[work.name], work.rev); err == nil { + log.Printf("workc received %+v; len(status) = %v, maxLocalBuilds = %v; cur = %p", work, len(status), *maxLocalBuilds, status[work]) + if mayBuildRev(work) { + conf := builders[work.name] + if st, err := startBuilding(conf, work.rev); err == nil { setStatus(work, st) - log.Printf("%v now building in %v", work, st.container) } else { log.Printf("Error starting to build %v: %v", work, err) } @@ -193,10 +266,28 @@ func numCurrentBuilds() int { return len(status) } +// mayBuildRev reports whether the build type & revision should be started. +// It returns true if it's not already building, and there is capacity. func mayBuildRev(work builderRev) bool { + conf := builders[work.name] + statusMu.Lock() - defer statusMu.Unlock() - return len(status) < *maxBuilds && status[work] == nil + _, building := status[work] + statusMu.Unlock() + + if building { + return false + } + if conf.usesVM() { + // These don't count towards *maxLocalBuilds. + return true + } + numDocker, err := numDockerBuilds() + if err != nil { + log.Printf("not starting %v due to docker ps failure: %v", work, err) + return false + } + return numDocker < *maxLocalBuilds } func setStatus(work builderRev, st *buildStatus) { @@ -220,14 +311,20 @@ func markDone(work builderRev) { statusDone = append(statusDone, st) } -func getStatus(work builderRev) *buildStatus { +// statusPtrStr disambiguates which status to return if there are +// multiple in the history (e.g. recent failures where the build +// didn't finish for reasons outside of all.bash failing) +func getStatus(work builderRev, statusPtrStr string) *buildStatus { statusMu.Lock() defer statusMu.Unlock() - if st, ok := status[work]; ok { + match := func(st *buildStatus) bool { + return statusPtrStr == "" || fmt.Sprintf("%p", st) == statusPtrStr + } + if st, ok := status[work]; ok && match(st) { return st } for _, st := range statusDone { - if st.builderRev == work { + if st.builderRev == work && match(st) { return st } } @@ -248,14 +345,23 @@ func handleStatus(w http.ResponseWriter, r *http.Request) { active = append(active, st) } recent = append(recent, statusDone...) + numTotal := len(status) + numDocker, err := numDockerBuilds() statusMu.Unlock() sort.Sort(byAge(active)) - sort.Sort(byAge(recent)) + sort.Sort(sort.Reverse(byAge(recent))) io.WriteString(w, "
", len(status), *maxBuilds) + if err != nil { + fmt.Fprintf(w, "Error
Error fetching Docker build count: %s\n", html.EscapeString(err.Error())) + } + + fmt.Fprintf(w, "running
%d total builds active (Docker: %d/%d; VMs: %d/∞):", + numTotal, numDocker, *maxLocalBuilds, numTotal-numDocker) + + io.WriteString(w, "
") for _, st := range active { io.WriteString(w, st.htmlStatusLine()) } @@ -276,7 +382,7 @@ func diskFree() string { } func handleLogs(w http.ResponseWriter, r *http.Request) { - st := getStatus(builderRev{r.FormValue("name"), r.FormValue("rev")}) + st := getStatus(builderRev{r.FormValue("name"), r.FormValue("rev")}, r.FormValue("st")) if st == nil { http.NotFound(w, r) return @@ -367,6 +473,10 @@ func (conf buildConfig) dockerRunArgs(rev string) (args []string) { } func addBuilder(c buildConfig) { + if c.tool == "gccgo" { + // TODO(cmang,bradfitz,adg): fix gccgo + return + } if c.name == "" { panic("empty name") } @@ -394,8 +504,11 @@ func addBuilder(c buildConfig) { if strings.HasPrefix(c.name, "linux-") && c.image == "" { c.image = "gobuilders/linux-x86-base" } - if c.image == "" { - panic("empty image") + if c.image == "" && c.vmImage == "" { + panic("empty image and vmImage") + } + if c.image != "" && c.vmImage != "" { + panic("can't specify both image and vmImage") } builders[c.name] = c } @@ -478,21 +591,29 @@ func condUpdateImage(img string) error { return nil } -// numBuilds finds the number of go builder instances currently running. -func numBuilds() int { - out, _ := exec.Command("docker", "ps").Output() - numBuilds := 0 - ps := bytes.Split(out, []byte("\n")) - for _, p := range ps { - if bytes.HasPrefix(p, []byte("gobuilders/")) { - numBuilds++ +// numDockerBuilds finds the number of go builder instances currently running. +func numDockerBuilds() (n int, err error) { + out, err := exec.Command("docker", "ps").Output() + if err != nil { + return 0, err + } + for _, line := range strings.Split(string(out), "\n") { + if strings.Contains(line, "gobuilders/") { + n++ } } - log.Printf("num current docker builds: %d", numBuilds) - return numBuilds + return n, nil } func startBuilding(conf buildConfig, rev string) (*buildStatus, error) { + if conf.usesVM() { + return startBuildingInVM(conf, rev) + } else { + return startBuildingInDocker(conf, rev) + } +} + +func startBuildingInDocker(conf buildConfig, rev string) (*buildStatus, error) { if err := condUpdateImage(conf.image); err != nil { log.Printf("Failed to setup container for %v %v: %v", conf.name, rev, err) return nil, err @@ -505,14 +626,16 @@ func startBuilding(conf buildConfig, rev string) (*buildStatus, error) { return nil, err } container := strings.TrimSpace(string(all)) - bs := &buildStatus{ - builderRev: builderRev{ - name: conf.name, - rev: rev, - }, - container: container, - start: time.Now(), + brev := builderRev{ + name: conf.name, + rev: rev, } + st := &buildStatus{ + builderRev: brev, + container: container, + start: time.Now(), + } + log.Printf("%v now building in Docker container %v", brev, st.container) go func() { all, err := exec.Command("docker", "wait", container).CombinedOutput() output := strings.TrimSpace(string(all)) @@ -521,15 +644,15 @@ func startBuilding(conf buildConfig, rev string) (*buildStatus, error) { exit, err := strconv.Atoi(output) ok = (err == nil && exit == 0) } - bs.setDone(ok) + st.setDone(ok) log.Printf("docker wait %s/%s: %v, %s", container, rev, err, output) donec <- builderRev{conf.name, rev} exec.Command("docker", "rm", container).Run() }() go func() { cmd := exec.Command("docker", "logs", "-f", container) - cmd.Stdout = bs - cmd.Stderr = bs + cmd.Stdout = st + cmd.Stderr = st if err := cmd.Run(); err != nil { // The docker logs subcommand always returns // success, even if the underlying process @@ -537,7 +660,273 @@ func startBuilding(conf buildConfig, rev string) (*buildStatus, error) { log.Printf("failed to follow docker logs of %s: %v", container, err) } }() - return bs, nil + return st, nil +} + +var osArchRx = regexp.MustCompile(`^(\w+-\w+)`) + +// startBuildingInVM starts a VM on GCE running the buildlet binary to build rev. +func startBuildingInVM(conf buildConfig, rev string) (*buildStatus, error) { + brev := builderRev{ + name: conf.name, + rev: rev, + } + st := &buildStatus{ + builderRev: brev, + start: time.Now(), + } + + // name is the project-wide unique name of the GCE instance. It can't be longer + // than 61 bytes, so we only use the first 8 bytes of the rev. + name := "buildlet-" + conf.name + "-" + rev[:8] + + // buildletURL is the URL of the buildlet binary which the VMs + // are configured to download at boot and run. This lets us + // update the buildlet more easily than rebuilding the whole + // VM image. We put this URL in a well-known GCE metadata attribute. + // The value will be of the form: + // http://storage.googleapis.com/go-builder-data/buildlet.GOOS-GOARCH + m := osArchRx.FindStringSubmatch(conf.name) + if m == nil { + return nil, fmt.Errorf("invalid builder name %q", conf.name) + } + buildletURL := "http://storage.googleapis.com/go-builder-data/buildlet." + m[1] + + prefix := "https://www.googleapis.com/compute/v1/projects/" + projectID + machType := prefix + "/zones/" + projectZone + "/machineTypes/" + conf.MachineType() + + instance := &compute.Instance{ + Name: name, + Description: fmt.Sprintf("Go Builder building %s %s", conf.name, rev), + MachineType: machType, + Disks: []*compute.AttachedDisk{ + { + AutoDelete: true, + Boot: true, + Type: "PERSISTENT", + InitializeParams: &compute.AttachedDiskInitializeParams{ + DiskName: name, + SourceImage: "https://www.googleapis.com/compute/v1/projects/" + projectID + "/global/images/" + conf.vmImage, + DiskType: "https://www.googleapis.com/compute/v1/projects/" + projectID + "/zones/" + projectZone + "/diskTypes/pd-ssd", + }, + }, + }, + Tags: &compute.Tags{ + // Warning: do NOT list "http-server" or "allow-ssh" (our + // project's custom tag to allow ssh access) here; the + // buildlet provides full remote code execution. + Items: []string{}, + }, + Metadata: &compute.Metadata{ + Items: []*compute.MetadataItems{ + { + Key: "buildlet-binary-url", + Value: buildletURL, + }, + // In case the VM gets away from us (generally: if the + // coordinator dies while a build is running), then we + // set this attribute of when it should be killed so + // we can kill it later when the coordinator is + // restarted. The cleanUpOldVMs goroutine loop handles + // that killing. + { + Key: "delete-at", + Value: fmt.Sprint(time.Now().Add(30 * time.Minute).Unix()), + }, + }, + }, + NetworkInterfaces: []*compute.NetworkInterface{ + &compute.NetworkInterface{ + AccessConfigs: []*compute.AccessConfig{ + &compute.AccessConfig{ + Type: "ONE_TO_ONE_NAT", + Name: "External NAT", + }, + }, + Network: prefix + "/global/networks/default", + }, + }, + } + op, err := computeService.Instances.Insert(projectID, projectZone, instance).Do() + if err != nil { + return nil, fmt.Errorf("Failed to create instance: %v", err) + } + st.createOp = op.Name + st.instName = name + log.Printf("%v now building in VM %v", brev, st.instName) + // Start the goroutine to monitor the VM now that it's booting. This might + // take minutes for it to come up, and then even more time to do the build. + go func() { + err := watchVM(st) + deleteVM(projectZone, st.instName) + st.setDone(err == nil) + if err != nil { + fmt.Fprintf(st, "\n\nError: %v\n", err) + } + donec <- builderRev{conf.name, rev} + }() + return st, nil +} + +// watchVM monitors a VM doing a build. +func watchVM(st *buildStatus) (err error) { + goodRes := func(res *http.Response, err error, what string) bool { + if err != nil { + err = fmt.Errorf("%s: %v", what, err) + return false + } + if res.StatusCode/100 != 2 { + err = fmt.Errorf("%s: %v", what, res.Status) + return false + + } + return true + } + st.logEventTime("instance_create_requested") + // Wait for instance create operation to succeed. +OpLoop: + for { + time.Sleep(2 * time.Second) + op, err := computeService.ZoneOperations.Get(projectID, projectZone, st.createOp).Do() + if err != nil { + return fmt.Errorf("Failed to get op %s: %v", st.createOp, err) + } + switch op.Status { + case "PENDING", "RUNNING": + continue + case "DONE": + if op.Error != nil { + for _, operr := range op.Error.Errors { + return fmt.Errorf("Error creating instance: %+v", operr) + } + return errors.New("Failed to start.") + } + break OpLoop + default: + log.Fatalf("Unknown status %q: %+v", op.Status, op) + } + } + st.logEventTime("instance_created") + + inst, err := computeService.Instances.Get(projectID, projectZone, st.instName).Do() + if err != nil { + return fmt.Errorf("Error getting instance %s details after creation: %v", st.instName, err) + } + st.logEventTime("got_instance_info") + + // Find its internal IP. + var ip string + for _, iface := range inst.NetworkInterfaces { + if strings.HasPrefix(iface.NetworkIP, "10.") { + ip = iface.NetworkIP + } + } + if ip == "" { + return errors.New("didn't find its internal IP address") + } + + // Wait for it to boot and its buildlet to come up on port 80. + st.logEventTime("waiting_for_buildlet") + buildletURL := "http://" + ip + const numTries = 60 + var alive bool + for i := 1; i <= numTries; i++ { + res, err := http.Get(buildletURL) + if err != nil { + time.Sleep(1 * time.Second) + continue + } + res.Body.Close() + if res.StatusCode != 200 { + return fmt.Errorf("buildlet returned HTTP status code %d on try number %d", res.StatusCode, i) + } + st.logEventTime("buildlet_up") + alive = true + break + } + if !alive { + return fmt.Errorf("buildlet didn't come up in %d seconds", numTries) + } + + // Write the VERSION file. + st.logEventTime("start_write_version_tar") + verReq, err := http.NewRequest("PUT", buildletURL+"/writetgz", versionTgz(st.rev)) + if err != nil { + return err + } + verRes, err := http.DefaultClient.Do(verReq) + if !goodRes(verRes, err, "writing VERSION tgz") { + return + } + + // Feed the buildlet a tar file for it to extract. + // TODO: cache these. + st.logEventTime("start_fetch_gerrit_tgz") + tarRes, err := http.Get("https://go.googlesource.com/go/+archive/" + st.rev + ".tar.gz") + if !goodRes(tarRes, err, "fetching tarball from Gerrit") { + return + } + + st.logEventTime("start_write_tar") + putReq, err := http.NewRequest("PUT", buildletURL+"/writetgz", tarRes.Body) + if err != nil { + tarRes.Body.Close() + return err + } + putRes, err := http.DefaultClient.Do(putReq) + st.logEventTime("end_write_tar") + tarRes.Body.Close() + if !goodRes(putRes, err, "writing tarball to buildlet") { + return + } + + // Run the builder + cmd := "all.bash" + if strings.HasPrefix(st.name, "windows-") { + cmd = "all.bat" + } else if strings.HasPrefix(st.name, "plan9-") { + cmd = "all.rc" + } + execStartTime := time.Now() + st.logEventTime("start_exec") + res, err := http.PostForm(buildletURL+"/exec", url.Values{"cmd": {"src/" + cmd}}) + if !goodRes(res, err, "running "+cmd) { + return + } + defer res.Body.Close() + st.logEventTime("running_exec") + // Stream the output: + if _, err := io.Copy(st, res.Body); err != nil { + return fmt.Errorf("error copying response: %v", err) + } + st.logEventTime("done") + state := res.Trailer.Get("Process-State") + + // Don't record to the dashboard unless we heard the trailer from + // the buildlet, otherwise it was probably some unrelated error + // (like the VM being killed, or the buildlet crashing due to + // e.g. https://golang.org/issue/9309, since we require a tip + // build of the buildlet to get Trailers support) + if state != "" { + conf := builders[st.name] + var log string + if state != "ok" { + log = st.logs() + } + if err := conf.recordResult(state == "ok", st.rev, log, time.Since(execStartTime)); err != nil { + return fmt.Errorf("Status was %q but failed to report it to the dashboard: %v", state, err) + } + } + if state != "ok" { + + return fmt.Errorf("got Trailer process state %q", state) + } + return nil +} + +type eventAndTime struct { + evt string + t time.Time } // buildStatus is the status of a build. @@ -547,10 +936,15 @@ type buildStatus struct { start time.Time container string // container ID for docker, else it's a VM + // Immutable, used by VM only: + createOp string // Instances.Insert operation name + instName string + mu sync.Mutex // guards following done time.Time // finished running succeeded bool // set when done output bytes.Buffer // stdout and stderr + events []eventAndTime } func (st *buildStatus) setDone(succeeded bool) { @@ -560,6 +954,12 @@ func (st *buildStatus) setDone(succeeded bool) { st.done = time.Now() } +func (st *buildStatus) logEventTime(event string) { + st.mu.Lock() + defer st.mu.Unlock() + st.events = append(st.events, eventAndTime{event, time.Now()}) +} + // htmlStatusLine returns the HTML to show within theblock on // the main page's list of active builds. func (st *buildStatus) htmlStatusLine() string { @@ -583,8 +983,11 @@ func (st *buildStatus) htmlStatusLine() string { buf.WriteString(", failed") } + logsURL := fmt.Sprintf("/logs?name=%s&rev=%s&st=%p", st.name, st.rev, st) if st.container != "" { - fmt.Fprintf(&buf, " in container %s", st.name, st.rev, st.container) + fmt.Fprintf(&buf, " in container %s", logsURL, st.container) + } else { + fmt.Fprintf(&buf, " in VM %s", logsURL, st.instName) } t := st.done @@ -592,6 +995,17 @@ func (st *buildStatus) htmlStatusLine() string { t = st.start } fmt.Fprintf(&buf, ", %v ago\n", time.Since(t)) + for i, evt := range st.events { + var elapsed string + if i != 0 { + elapsed = fmt.Sprintf("+%0.1fs", evt.t.Sub(st.events[i-1].t).Seconds()) + } + msg := evt.evt + if msg == "running_exec" { + msg = fmt.Sprintf("%s", logsURL, msg) + } + fmt.Fprintf(&buf, " %7s %v %s\n", elapsed, evt.t.Format(time.RFC3339), msg) + } return buf.String() } @@ -607,10 +1021,28 @@ func (st *buildStatus) Write(p []byte) (n int, err error) { st.mu.Lock() defer st.mu.Unlock() const maxBufferSize = 2 << 20 // 2MB of output is way more than we expect. + plen := len(p) if st.output.Len()+len(p) > maxBufferSize { p = p[:maxBufferSize-st.output.Len()] } - return st.output.Write(p) + st.output.Write(p) // bytes.Buffer can't fail + return plen, nil +} + +// Stop any previous go-commit-watcher Docker tasks, so they don't +// pile up upon restarts of the coordinator. +func stopWatchers() { + out, err := exec.Command("docker", "ps").Output() + if err != nil { + return + } + for _, line := range strings.Split(string(out), "\n") { + if !strings.Contains(line, "go-commit-watcher:") { + continue + } + f := strings.Fields(line) + exec.Command("docker", "rm", "-f", "-v", f[0]).Run() + } } func startWatching(conf watchConfig) (err error) { @@ -723,16 +1155,13 @@ func oldContainers() []string { // "delete-at" metadata attribute on them when created to some time // that's well beyond their expected lifetime. func cleanUpOldVMs() { - if !hasComputeScope() { - log.Printf("The coordinator is not running with access to read and write Compute resources. Background VM cleaning disabled.") + if computeService == nil { return } - ts := google.ComputeTokenSource("default") - computeService, _ := compute.New(oauth2.NewClient(oauth2.NoContext, ts)) for { for _, zone := range strings.Split(*cleanZones, ",") { zone = strings.TrimSpace(zone) - if err := cleanZoneVMs(computeService, zone); err != nil { + if err := cleanZoneVMs(zone); err != nil { log.Printf("Error cleaning VMs in zone %q: %v", zone, err) } } @@ -741,11 +1170,7 @@ func cleanUpOldVMs() { } // cleanZoneVMs is part of cleanUpOldVMs, operating on a single zone. -func cleanZoneVMs(svc *compute.Service, zone string) error { - proj, err := metadata.ProjectID() - if err != nil { - return fmt.Errorf("failed to get current GCE ProjectID: %v", err) - } +func cleanZoneVMs(zone string) error { // Fetch the first 500 (default) running instances and clean // thoes. We expect that we'll be running many fewer than // that. Even if we have more, eventually the first 500 will @@ -753,7 +1178,7 @@ func cleanZoneVMs(svc *compute.Service, zone string) error { // partially-different 500. // TODO(bradfitz): revist this code if we ever start running // thousands of VMs. - list, err := svc.Instances.List(proj, zone).Do() + list, err := computeService.Instances.List(projectID, zone).Do() if err != nil { return fmt.Errorf("listing instances: %v", err) } @@ -770,7 +1195,7 @@ func cleanZoneVMs(svc *compute.Service, zone string) error { } if err == nil && time.Now().Unix() > unixDeadline { log.Printf("Deleting expired VM %q in zone %q ...", inst.Name, zone) - deleteVM(svc, zone, inst.Name) + deleteVM(zone, inst.Name) } } } @@ -778,13 +1203,8 @@ func cleanZoneVMs(svc *compute.Service, zone string) error { return nil } -func deleteVM(svc *compute.Service, zone, instName string) { - proj, err := metadata.ProjectID() - if err != nil { - log.Printf("failed to get project id to delete instace: %v", err) - return - } - op, err := svc.Instances.Delete(proj, zone, instName).Do() +func deleteVM(zone, instName string) { + op, err := computeService.Instances.Delete(projectID, zone, instName).Do() if err != nil { log.Printf("Failed to delete instance %q in zone %q: %v", instName, zone, err) return @@ -808,3 +1228,102 @@ func hasComputeScope() bool { } return false } + +// dash is copied from the builder binary. It runs the given method and command on the dashboard. +// +// TODO(bradfitz,adg): unify this somewhere? +// +// If args is non-nil it is encoded as the URL query string. +// If req is non-nil it is JSON-encoded and passed as the body of the HTTP POST. +// If resp is non-nil the server's response is decoded into the value pointed +// to by resp (resp must be a pointer). +func dash(meth, cmd string, args url.Values, req, resp interface{}) error { + const builderVersion = 1 // keep in sync with dashboard/app/build/handler.go + argsCopy := url.Values{"version": {fmt.Sprint(builderVersion)}} + for k, v := range args { + if k == "version" { + panic(`dash: reserved args key: "version"`) + } + argsCopy[k] = v + } + var r *http.Response + var err error + cmd = "https://build.golang.org/" + cmd + "?" + argsCopy.Encode() + switch meth { + case "GET": + if req != nil { + log.Panicf("%s to %s with req", meth, cmd) + } + r, err = http.Get(cmd) + case "POST": + var body io.Reader + if req != nil { + b, err := json.Marshal(req) + if err != nil { + return err + } + body = bytes.NewBuffer(b) + } + r, err = http.Post(cmd, "text/json", body) + default: + log.Panicf("%s: invalid method %q", cmd, meth) + panic("invalid method: " + meth) + } + if err != nil { + return err + } + defer r.Body.Close() + if r.StatusCode != http.StatusOK { + return fmt.Errorf("bad http response: %v", r.Status) + } + body := new(bytes.Buffer) + if _, err := body.ReadFrom(r.Body); err != nil { + return err + } + + // Read JSON-encoded Response into provided resp + // and return an error if present. + var result = struct { + Response interface{} + Error string + }{ + // Put the provided resp in here as it can be a pointer to + // some value we should unmarshal into. + Response: resp, + } + if err = json.Unmarshal(body.Bytes(), &result); err != nil { + log.Printf("json unmarshal %#q: %s\n", body.Bytes(), err) + return err + } + if result.Error != "" { + return errors.New(result.Error) + } + + return nil +} + +func versionTgz(rev string) io.Reader { + var buf bytes.Buffer + zw := gzip.NewWriter(&buf) + tw := tar.NewWriter(zw) + + contents := fmt.Sprintf("devel " + rev) + check(tw.WriteHeader(&tar.Header{ + Name: "VERSION", + Mode: 0644, + Size: int64(len(contents)), + })) + _, err := io.WriteString(tw, contents) + check(err) + check(tw.Close()) + check(zw.Close()) + return bytes.NewReader(buf.Bytes()) +} + +// check is only for things which should be impossible (not even rare) +// to fail. +func check(err error) { + if err != nil { + panic("previously assumed to never fail: " + err.Error()) + } +}