dashboard/coordinator: refactor status handling in prep for VM builders
Currently the coordinator only handles Docker container builders. An upcoming change will add VM builders, running VMs on GCE for OpenBSD, Plan 9, Windows, etc. This change refactors the handling of the coordinator's build status in prep for VM builders. Notably, it doesn't call "docker logs" directly to get logs. It now keeps them in memory instead. Because they're then in memory, we might as well make it keep the details of the last N builds too, which we kept wanting earlier. Updates #9492 Change-Id: I6bae4a5854e7efa3f8a6186ec3670a43c98c4df2 Reviewed-on: https://go-review.googlesource.com/2262 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
005d2be0ba
commit
0a8eb31860
|
@ -51,10 +51,13 @@ var (
|
||||||
watchers = map[string]watchConfig{} // populated once at startup
|
watchers = map[string]watchConfig{} // populated once at startup
|
||||||
donec = make(chan builderRev) // reports of finished builders
|
donec = make(chan builderRev) // reports of finished builders
|
||||||
|
|
||||||
statusMu sync.Mutex
|
statusMu sync.Mutex // guards both status (ongoing ones) and statusDone (just finished)
|
||||||
status = map[builderRev]*buildStatus{}
|
status = map[builderRev]*buildStatus{}
|
||||||
|
statusDone []*buildStatus // finished recently, capped to maxStatusDone
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const maxStatusDone = 30
|
||||||
|
|
||||||
type imageInfo struct {
|
type imageInfo struct {
|
||||||
url string // of tar file
|
url string // of tar file
|
||||||
|
|
||||||
|
@ -175,7 +178,7 @@ func main() {
|
||||||
}
|
}
|
||||||
case done := <-donec:
|
case done := <-donec:
|
||||||
log.Printf("%v done", done)
|
log.Printf("%v done", done)
|
||||||
setStatus(done, nil)
|
markDone(done)
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
if numCurrentBuilds() == 0 && time.Now().After(startTime.Add(10*time.Minute)) {
|
if numCurrentBuilds() == 0 && time.Now().After(startTime.Add(10*time.Minute)) {
|
||||||
// TODO: halt the whole machine to kill the VM or something
|
// TODO: halt the whole machine to kill the VM or something
|
||||||
|
@ -199,17 +202,36 @@ func mayBuildRev(work builderRev) bool {
|
||||||
func setStatus(work builderRev, st *buildStatus) {
|
func setStatus(work builderRev, st *buildStatus) {
|
||||||
statusMu.Lock()
|
statusMu.Lock()
|
||||||
defer statusMu.Unlock()
|
defer statusMu.Unlock()
|
||||||
if st == nil {
|
status[work] = st
|
||||||
delete(status, work)
|
}
|
||||||
} else {
|
|
||||||
status[work] = st
|
func markDone(work builderRev) {
|
||||||
|
statusMu.Lock()
|
||||||
|
defer statusMu.Unlock()
|
||||||
|
st, ok := status[work]
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
delete(status, work)
|
||||||
|
if len(statusDone) == maxStatusDone {
|
||||||
|
copy(statusDone, statusDone[1:])
|
||||||
|
statusDone = statusDone[:len(statusDone)-1]
|
||||||
|
}
|
||||||
|
statusDone = append(statusDone, st)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getStatus(work builderRev) *buildStatus {
|
func getStatus(work builderRev) *buildStatus {
|
||||||
statusMu.Lock()
|
statusMu.Lock()
|
||||||
defer statusMu.Unlock()
|
defer statusMu.Unlock()
|
||||||
return status[work]
|
if st, ok := status[work]; ok {
|
||||||
|
return st
|
||||||
|
}
|
||||||
|
for _, st := range statusDone {
|
||||||
|
if st.builderRev == work {
|
||||||
|
return st
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type byAge []*buildStatus
|
type byAge []*buildStatus
|
||||||
|
@ -220,19 +242,32 @@ func (s byAge) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||||
|
|
||||||
func handleStatus(w http.ResponseWriter, r *http.Request) {
|
func handleStatus(w http.ResponseWriter, r *http.Request) {
|
||||||
var active []*buildStatus
|
var active []*buildStatus
|
||||||
|
var recent []*buildStatus
|
||||||
statusMu.Lock()
|
statusMu.Lock()
|
||||||
for _, st := range status {
|
for _, st := range status {
|
||||||
active = append(active, st)
|
active = append(active, st)
|
||||||
}
|
}
|
||||||
|
recent = append(recent, statusDone...)
|
||||||
statusMu.Unlock()
|
statusMu.Unlock()
|
||||||
|
|
||||||
fmt.Fprintf(w, "<html><body><h1>Go build coordinator</h1>%d of max %d builds running:<p><pre>", len(status), *maxBuilds)
|
|
||||||
sort.Sort(byAge(active))
|
sort.Sort(byAge(active))
|
||||||
|
sort.Sort(byAge(recent))
|
||||||
|
|
||||||
|
io.WriteString(w, "<html><body><h1>Go build coordinator</h1>")
|
||||||
|
|
||||||
|
fmt.Fprintf(w, "<h2>running</h2>%d of max %d builds running:<p><pre>", len(status), *maxBuilds)
|
||||||
for _, st := range active {
|
for _, st := range active {
|
||||||
fmt.Fprintf(w, "%-22s hg %s in container <a href='/logs?name=%s&rev=%s'>%s</a>, %v ago\n", st.name, st.rev, st.name, st.rev,
|
io.WriteString(w, st.htmlStatusLine())
|
||||||
st.container, time.Now().Sub(st.start))
|
|
||||||
}
|
}
|
||||||
fmt.Fprintf(w, "</pre><h2>disk space</h2><pre>%s</pre></body></html>", html.EscapeString(diskFree()))
|
io.WriteString(w, "</pre>")
|
||||||
|
|
||||||
|
io.WriteString(w, "<h2>recently completed</h2><pre>")
|
||||||
|
for _, st := range recent {
|
||||||
|
io.WriteString(w, st.htmlStatusLine())
|
||||||
|
}
|
||||||
|
io.WriteString(w, "</pre>")
|
||||||
|
|
||||||
|
fmt.Fprintf(w, "<h2>disk space</h2><pre>%s</pre></body></html>", html.EscapeString(diskFree()))
|
||||||
}
|
}
|
||||||
|
|
||||||
func diskFree() string {
|
func diskFree() string {
|
||||||
|
@ -243,19 +278,15 @@ func diskFree() string {
|
||||||
func handleLogs(w http.ResponseWriter, r *http.Request) {
|
func handleLogs(w http.ResponseWriter, r *http.Request) {
|
||||||
st := getStatus(builderRev{r.FormValue("name"), r.FormValue("rev")})
|
st := getStatus(builderRev{r.FormValue("name"), r.FormValue("rev")})
|
||||||
if st == nil {
|
if st == nil {
|
||||||
fmt.Fprintf(w, "<html><body><h1>not building</h1>")
|
http.NotFound(w, r)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
out, err := exec.Command("docker", "logs", st.container).CombinedOutput()
|
|
||||||
if err != nil {
|
|
||||||
log.Print(err)
|
|
||||||
http.Error(w, "Error fetching logs. Already finished?", 500)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
key := builderKey(st.name)
|
|
||||||
logs := strings.Replace(string(out), key, "BUILDERKEY", -1)
|
|
||||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||||
io.WriteString(w, logs)
|
io.WriteString(w, st.logs())
|
||||||
|
// TODO: if st is still building, stream them to the user with
|
||||||
|
// http.Flusher.Flush and CloseNotifier and registering interest
|
||||||
|
// of new writes with the buildStatus. Will require moving the
|
||||||
|
// BUILDERKEY scrubbing into the Write method.
|
||||||
}
|
}
|
||||||
|
|
||||||
func findWorkLoop(builderName, dashURL string, work chan<- builderRev) {
|
func findWorkLoop(builderName, dashURL string, work chan<- builderRev) {
|
||||||
|
@ -296,8 +327,10 @@ func findWork(builderName, dashURL string) (rev string, err error) {
|
||||||
return rev, err
|
return rev, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// builderRev is a build configuration type and a revision.
|
||||||
type builderRev struct {
|
type builderRev struct {
|
||||||
name, rev string
|
name string // e.g. "linux-amd64-race"
|
||||||
|
rev string // lowercase hex git hash
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns the part after "docker run"
|
// returns the part after "docker run"
|
||||||
|
@ -472,29 +505,112 @@ func startBuilding(conf buildConfig, rev string) (*buildStatus, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
container := strings.TrimSpace(string(all))
|
container := strings.TrimSpace(string(all))
|
||||||
go func() {
|
bs := &buildStatus{
|
||||||
all, err := exec.Command("docker", "wait", container).CombinedOutput()
|
|
||||||
log.Printf("docker wait %s/%s: %v, %s", container, rev, err, strings.TrimSpace(string(all)))
|
|
||||||
donec <- builderRev{conf.name, rev}
|
|
||||||
exec.Command("docker", "rm", container).Run()
|
|
||||||
}()
|
|
||||||
return &buildStatus{
|
|
||||||
builderRev: builderRev{
|
builderRev: builderRev{
|
||||||
name: conf.name,
|
name: conf.name,
|
||||||
rev: rev,
|
rev: rev,
|
||||||
},
|
},
|
||||||
container: container,
|
container: container,
|
||||||
start: time.Now(),
|
start: time.Now(),
|
||||||
}, nil
|
}
|
||||||
|
go func() {
|
||||||
|
all, err := exec.Command("docker", "wait", container).CombinedOutput()
|
||||||
|
output := strings.TrimSpace(string(all))
|
||||||
|
var ok bool
|
||||||
|
if err == nil {
|
||||||
|
exit, err := strconv.Atoi(output)
|
||||||
|
ok = (err == nil && exit == 0)
|
||||||
|
}
|
||||||
|
bs.setDone(ok)
|
||||||
|
log.Printf("docker wait %s/%s: %v, %s", container, rev, err, output)
|
||||||
|
donec <- builderRev{conf.name, rev}
|
||||||
|
exec.Command("docker", "rm", container).Run()
|
||||||
|
}()
|
||||||
|
go func() {
|
||||||
|
cmd := exec.Command("docker", "logs", "-f", container)
|
||||||
|
cmd.Stdout = bs
|
||||||
|
cmd.Stderr = bs
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
// The docker logs subcommand always returns
|
||||||
|
// success, even if the underlying process
|
||||||
|
// fails.
|
||||||
|
log.Printf("failed to follow docker logs of %s: %v", container, err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return bs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// buildStatus is the status of a build.
|
||||||
type buildStatus struct {
|
type buildStatus struct {
|
||||||
|
// Immutable:
|
||||||
builderRev
|
builderRev
|
||||||
container string
|
|
||||||
start time.Time
|
start time.Time
|
||||||
|
container string // container ID for docker, else it's a VM
|
||||||
|
|
||||||
mu sync.Mutex
|
mu sync.Mutex // guards following
|
||||||
// ...
|
done time.Time // finished running
|
||||||
|
succeeded bool // set when done
|
||||||
|
output bytes.Buffer // stdout and stderr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (st *buildStatus) setDone(succeeded bool) {
|
||||||
|
st.mu.Lock()
|
||||||
|
defer st.mu.Unlock()
|
||||||
|
st.succeeded = succeeded
|
||||||
|
st.done = time.Now()
|
||||||
|
}
|
||||||
|
|
||||||
|
// htmlStatusLine returns the HTML to show within the <pre> block on
|
||||||
|
// the main page's list of active builds.
|
||||||
|
func (st *buildStatus) htmlStatusLine() string {
|
||||||
|
st.mu.Lock()
|
||||||
|
defer st.mu.Unlock()
|
||||||
|
|
||||||
|
urlPrefix := "https://go-review.googlesource.com/#/q/"
|
||||||
|
if strings.Contains(st.name, "gccgo") {
|
||||||
|
urlPrefix = "https://code.google.com/p/gofrontend/source/detail?r="
|
||||||
|
}
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
fmt.Fprintf(&buf, "<a href='https://github.com/golang/go/wiki/DashboardBuilders'>%s</a> rev <a href='%s%s'>%s</a>",
|
||||||
|
st.name, urlPrefix, st.rev, st.rev)
|
||||||
|
|
||||||
|
if st.done.IsZero() {
|
||||||
|
buf.WriteString(", running")
|
||||||
|
} else if st.succeeded {
|
||||||
|
buf.WriteString(", succeeded")
|
||||||
|
} else {
|
||||||
|
buf.WriteString(", failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
if st.container != "" {
|
||||||
|
fmt.Fprintf(&buf, " in container <a href='/logs?name=%s&rev=%s'>%s</a>", st.name, st.rev, st.container)
|
||||||
|
}
|
||||||
|
|
||||||
|
t := st.done
|
||||||
|
if t.IsZero() {
|
||||||
|
t = st.start
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&buf, ", %v ago\n", time.Since(t))
|
||||||
|
return buf.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (st *buildStatus) logs() string {
|
||||||
|
st.mu.Lock()
|
||||||
|
logs := st.output.String()
|
||||||
|
st.mu.Unlock()
|
||||||
|
key := builderKey(st.name)
|
||||||
|
return strings.Replace(string(logs), key, "BUILDERKEY", -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (st *buildStatus) Write(p []byte) (n int, err error) {
|
||||||
|
st.mu.Lock()
|
||||||
|
defer st.mu.Unlock()
|
||||||
|
const maxBufferSize = 2 << 20 // 2MB of output is way more than we expect.
|
||||||
|
if st.output.Len()+len(p) > maxBufferSize {
|
||||||
|
p = p[:maxBufferSize-st.output.Len()]
|
||||||
|
}
|
||||||
|
return st.output.Write(p)
|
||||||
}
|
}
|
||||||
|
|
||||||
func startWatching(conf watchConfig) (err error) {
|
func startWatching(conf watchConfig) (err error) {
|
||||||
|
|
Loading…
Reference in New Issue