dashboard/coordinator: refactor status handling in prep for VM builders

Currently the coordinator only handles Docker container builders. An
upcoming change will add VM builders, running VMs on GCE for OpenBSD,
Plan 9, Windows, etc.

This change refactors the handling of the coordinator's build status
in prep for VM builders. Notably, it doesn't call "docker logs"
directly to get logs. It now keeps them in memory instead. Because
they're then in memory, we might as well make it keep the details of
the last N builds too, which we kept wanting earlier.

Updates #9492

Change-Id: I6bae4a5854e7efa3f8a6186ec3670a43c98c4df2
Reviewed-on: https://go-review.googlesource.com/2262
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
Brad Fitzpatrick 2015-01-02 15:00:54 -08:00
parent 005d2be0ba
commit 0a8eb31860
1 changed files with 150 additions and 34 deletions

View File

@ -51,10 +51,13 @@ var (
watchers = map[string]watchConfig{} // populated once at startup watchers = map[string]watchConfig{} // populated once at startup
donec = make(chan builderRev) // reports of finished builders donec = make(chan builderRev) // reports of finished builders
statusMu sync.Mutex statusMu sync.Mutex // guards both status (ongoing ones) and statusDone (just finished)
status = map[builderRev]*buildStatus{} status = map[builderRev]*buildStatus{}
statusDone []*buildStatus // finished recently, capped to maxStatusDone
) )
const maxStatusDone = 30
type imageInfo struct { type imageInfo struct {
url string // of tar file url string // of tar file
@ -175,7 +178,7 @@ func main() {
} }
case done := <-donec: case done := <-donec:
log.Printf("%v done", done) log.Printf("%v done", done)
setStatus(done, nil) markDone(done)
case <-ticker.C: case <-ticker.C:
if numCurrentBuilds() == 0 && time.Now().After(startTime.Add(10*time.Minute)) { if numCurrentBuilds() == 0 && time.Now().After(startTime.Add(10*time.Minute)) {
// TODO: halt the whole machine to kill the VM or something // TODO: halt the whole machine to kill the VM or something
@ -199,17 +202,36 @@ func mayBuildRev(work builderRev) bool {
func setStatus(work builderRev, st *buildStatus) { func setStatus(work builderRev, st *buildStatus) {
statusMu.Lock() statusMu.Lock()
defer statusMu.Unlock() defer statusMu.Unlock()
if st == nil {
delete(status, work)
} else {
status[work] = st status[work] = st
}
func markDone(work builderRev) {
statusMu.Lock()
defer statusMu.Unlock()
st, ok := status[work]
if !ok {
return
} }
delete(status, work)
if len(statusDone) == maxStatusDone {
copy(statusDone, statusDone[1:])
statusDone = statusDone[:len(statusDone)-1]
}
statusDone = append(statusDone, st)
} }
func getStatus(work builderRev) *buildStatus { func getStatus(work builderRev) *buildStatus {
statusMu.Lock() statusMu.Lock()
defer statusMu.Unlock() defer statusMu.Unlock()
return status[work] if st, ok := status[work]; ok {
return st
}
for _, st := range statusDone {
if st.builderRev == work {
return st
}
}
return nil
} }
type byAge []*buildStatus type byAge []*buildStatus
@ -220,19 +242,32 @@ func (s byAge) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func handleStatus(w http.ResponseWriter, r *http.Request) { func handleStatus(w http.ResponseWriter, r *http.Request) {
var active []*buildStatus var active []*buildStatus
var recent []*buildStatus
statusMu.Lock() statusMu.Lock()
for _, st := range status { for _, st := range status {
active = append(active, st) active = append(active, st)
} }
recent = append(recent, statusDone...)
statusMu.Unlock() statusMu.Unlock()
fmt.Fprintf(w, "<html><body><h1>Go build coordinator</h1>%d of max %d builds running:<p><pre>", len(status), *maxBuilds)
sort.Sort(byAge(active)) sort.Sort(byAge(active))
sort.Sort(byAge(recent))
io.WriteString(w, "<html><body><h1>Go build coordinator</h1>")
fmt.Fprintf(w, "<h2>running</h2>%d of max %d builds running:<p><pre>", len(status), *maxBuilds)
for _, st := range active { for _, st := range active {
fmt.Fprintf(w, "%-22s hg %s in container <a href='/logs?name=%s&rev=%s'>%s</a>, %v ago\n", st.name, st.rev, st.name, st.rev, io.WriteString(w, st.htmlStatusLine())
st.container, time.Now().Sub(st.start))
} }
fmt.Fprintf(w, "</pre><h2>disk space</h2><pre>%s</pre></body></html>", html.EscapeString(diskFree())) io.WriteString(w, "</pre>")
io.WriteString(w, "<h2>recently completed</h2><pre>")
for _, st := range recent {
io.WriteString(w, st.htmlStatusLine())
}
io.WriteString(w, "</pre>")
fmt.Fprintf(w, "<h2>disk space</h2><pre>%s</pre></body></html>", html.EscapeString(diskFree()))
} }
func diskFree() string { func diskFree() string {
@ -243,19 +278,15 @@ func diskFree() string {
func handleLogs(w http.ResponseWriter, r *http.Request) { func handleLogs(w http.ResponseWriter, r *http.Request) {
st := getStatus(builderRev{r.FormValue("name"), r.FormValue("rev")}) st := getStatus(builderRev{r.FormValue("name"), r.FormValue("rev")})
if st == nil { if st == nil {
fmt.Fprintf(w, "<html><body><h1>not building</h1>") http.NotFound(w, r)
return return
} }
out, err := exec.Command("docker", "logs", st.container).CombinedOutput()
if err != nil {
log.Print(err)
http.Error(w, "Error fetching logs. Already finished?", 500)
return
}
key := builderKey(st.name)
logs := strings.Replace(string(out), key, "BUILDERKEY", -1)
w.Header().Set("Content-Type", "text/plain; charset=utf-8") w.Header().Set("Content-Type", "text/plain; charset=utf-8")
io.WriteString(w, logs) io.WriteString(w, st.logs())
// TODO: if st is still building, stream them to the user with
// http.Flusher.Flush and CloseNotifier and registering interest
// of new writes with the buildStatus. Will require moving the
// BUILDERKEY scrubbing into the Write method.
} }
func findWorkLoop(builderName, dashURL string, work chan<- builderRev) { func findWorkLoop(builderName, dashURL string, work chan<- builderRev) {
@ -296,8 +327,10 @@ func findWork(builderName, dashURL string) (rev string, err error) {
return rev, err return rev, err
} }
// builderRev is a build configuration type and a revision.
type builderRev struct { type builderRev struct {
name, rev string name string // e.g. "linux-amd64-race"
rev string // lowercase hex git hash
} }
// returns the part after "docker run" // returns the part after "docker run"
@ -472,29 +505,112 @@ func startBuilding(conf buildConfig, rev string) (*buildStatus, error) {
return nil, err return nil, err
} }
container := strings.TrimSpace(string(all)) container := strings.TrimSpace(string(all))
go func() { bs := &buildStatus{
all, err := exec.Command("docker", "wait", container).CombinedOutput()
log.Printf("docker wait %s/%s: %v, %s", container, rev, err, strings.TrimSpace(string(all)))
donec <- builderRev{conf.name, rev}
exec.Command("docker", "rm", container).Run()
}()
return &buildStatus{
builderRev: builderRev{ builderRev: builderRev{
name: conf.name, name: conf.name,
rev: rev, rev: rev,
}, },
container: container, container: container,
start: time.Now(), start: time.Now(),
}, nil }
go func() {
all, err := exec.Command("docker", "wait", container).CombinedOutput()
output := strings.TrimSpace(string(all))
var ok bool
if err == nil {
exit, err := strconv.Atoi(output)
ok = (err == nil && exit == 0)
}
bs.setDone(ok)
log.Printf("docker wait %s/%s: %v, %s", container, rev, err, output)
donec <- builderRev{conf.name, rev}
exec.Command("docker", "rm", container).Run()
}()
go func() {
cmd := exec.Command("docker", "logs", "-f", container)
cmd.Stdout = bs
cmd.Stderr = bs
if err := cmd.Run(); err != nil {
// The docker logs subcommand always returns
// success, even if the underlying process
// fails.
log.Printf("failed to follow docker logs of %s: %v", container, err)
}
}()
return bs, nil
} }
// buildStatus is the status of a build.
type buildStatus struct { type buildStatus struct {
// Immutable:
builderRev builderRev
container string
start time.Time start time.Time
container string // container ID for docker, else it's a VM
mu sync.Mutex mu sync.Mutex // guards following
// ... done time.Time // finished running
succeeded bool // set when done
output bytes.Buffer // stdout and stderr
}
func (st *buildStatus) setDone(succeeded bool) {
st.mu.Lock()
defer st.mu.Unlock()
st.succeeded = succeeded
st.done = time.Now()
}
// htmlStatusLine returns the HTML to show within the <pre> block on
// the main page's list of active builds.
func (st *buildStatus) htmlStatusLine() string {
st.mu.Lock()
defer st.mu.Unlock()
urlPrefix := "https://go-review.googlesource.com/#/q/"
if strings.Contains(st.name, "gccgo") {
urlPrefix = "https://code.google.com/p/gofrontend/source/detail?r="
}
var buf bytes.Buffer
fmt.Fprintf(&buf, "<a href='https://github.com/golang/go/wiki/DashboardBuilders'>%s</a> rev <a href='%s%s'>%s</a>",
st.name, urlPrefix, st.rev, st.rev)
if st.done.IsZero() {
buf.WriteString(", running")
} else if st.succeeded {
buf.WriteString(", succeeded")
} else {
buf.WriteString(", failed")
}
if st.container != "" {
fmt.Fprintf(&buf, " in container <a href='/logs?name=%s&rev=%s'>%s</a>", st.name, st.rev, st.container)
}
t := st.done
if t.IsZero() {
t = st.start
}
fmt.Fprintf(&buf, ", %v ago\n", time.Since(t))
return buf.String()
}
func (st *buildStatus) logs() string {
st.mu.Lock()
logs := st.output.String()
st.mu.Unlock()
key := builderKey(st.name)
return strings.Replace(string(logs), key, "BUILDERKEY", -1)
}
func (st *buildStatus) Write(p []byte) (n int, err error) {
st.mu.Lock()
defer st.mu.Unlock()
const maxBufferSize = 2 << 20 // 2MB of output is way more than we expect.
if st.output.Len()+len(p) > maxBufferSize {
p = p[:maxBufferSize-st.output.Len()]
}
return st.output.Write(p)
} }
func startWatching(conf watchConfig) (err error) { func startWatching(conf watchConfig) (err error) {