diff --git a/godoc/godoc.go b/godoc/godoc.go index 8bda89ad..1063244a 100644 --- a/godoc/godoc.go +++ b/godoc/godoc.go @@ -233,24 +233,28 @@ func addStructFieldIDAttributes(buf *bytes.Buffer, name string, st *ast.StructTy if st.Fields == nil { return } - var scratch bytes.Buffer + // needsLink is a set of identifiers that still need to be + // linked, where value == key, to avoid an allocation in func + // linkedField. + needsLink := make(map[string]string) + for _, f := range st.Fields.List { if len(f.Names) == 0 { continue } fieldName := f.Names[0].Name - scratch.Reset() - var added bool - foreachLine(buf.Bytes(), func(line []byte) { - if !added && isLineForStructFieldID(line, fieldName) { - added = true - fmt.Fprintf(&scratch, ``, name, fieldName) - } - scratch.Write(line) - }) - buf.Reset() - buf.Write(scratch.Bytes()) + needsLink[fieldName] = fieldName } + var newBuf bytes.Buffer + foreachLine(buf.Bytes(), func(line []byte) { + if fieldName := linkedField(line, needsLink); fieldName != "" { + fmt.Fprintf(&newBuf, ``, name, fieldName) + delete(needsLink, fieldName) + } + newBuf.Write(line) + }) + buf.Reset() + buf.Write(newBuf.Bytes()) } // foreachLine calls fn for each line of in, where a line includes @@ -270,9 +274,12 @@ func foreachLine(in []byte, fn func(line []byte)) { // commentPrefix is the line prefix for comments after they've been HTMLified. var commentPrefix = []byte(`// `) -// isLineForStructFieldID reports whether line is a line we should -// add a to. Only the fieldName is provided. -func isLineForStructFieldID(line []byte, fieldName string) bool { +// linkedField determines whether the given line starts with an +// identifer in the provided ids map (mapping from identifier to the +// same identifier). The line can start with either an identifier or +// an identifier in a comment. If one matches, it returns the +// identifier that matched. Otherwise it returns the empty string. +func linkedField(line []byte, ids map[string]string) string { line = bytes.TrimSpace(line) // For fields with a doc string of the @@ -292,13 +299,39 @@ func isLineForStructFieldID(line []byte, fieldName string) bool { // // TODO: do this better, so it works for all // comments, including unconventional ones. - // For comments if bytes.HasPrefix(line, commentPrefix) { - if matchesIdentBoundary(line[len(commentPrefix):], fieldName) { - return true - } + line = line[len(commentPrefix):] } - return matchesIdentBoundary(line, fieldName) + id := scanIdentifier(line) + if len(id) == 0 { + // No leading identifier. Avoid map lookup for + // somewhat common case. + return "" + } + return ids[string(id)] +} + +// scanIdentifier scans a valid Go identifier off the front of v and +// either returns a subslice of v if there's a valid identifier, or +// returns a zero-length slice. +func scanIdentifier(v []byte) []byte { + var n int // number of leading bytes of v belonging to an identifier + for { + r, width := utf8.DecodeRune(v[n:]) + if !(isLetter(r) || n > 0 && isDigit(r)) { + break + } + n += width + } + return v[:n] +} + +func isLetter(ch rune) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch) +} + +func isDigit(ch rune) bool { + return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch) } // matchesIdentBoundary reports whether line matches /^ident\b/. diff --git a/godoc/godoc_test.go b/godoc/godoc_test.go index 51d41b3a..011eca33 100644 --- a/godoc/godoc_test.go +++ b/godoc/godoc_test.go @@ -5,6 +5,8 @@ package godoc import ( + "bytes" + "fmt" "go/ast" "go/parser" "go/token" @@ -123,10 +125,7 @@ func TestSanitizeFunc(t *testing.T) { // Test that we add elements // to the HTML of struct fields. func TestStructFieldsIDAttributes(t *testing.T) { - p := &Presentation{ - DeclLinks: true, - } - src := []byte(` + got := linkifyStructFields(t, []byte(` package foo type T struct { @@ -137,8 +136,32 @@ type T struct { // Opt, if non-nil, is an option. Opt *int + + // Опция - другое поле. + Опция bool } -`) +`)) + want := `type T struct { +NoDoc string + +// Doc has a comment. +Doc string + +// Opt, if non-nil, is an option. +Opt *int + +// Опция - другое поле. +Опция bool +}` + if got != want { + t.Errorf("got: %s\n\nwant: %s\n", got, want) + } +} + +func linkifyStructFields(t *testing.T, src []byte) string { + p := &Presentation{ + DeclLinks: true, + } fset := token.NewFileSet() af, err := parser.ParseFile(fset, "foo.go", src, parser.ParseComments) if err != nil { @@ -148,17 +171,46 @@ type T struct { pi := &PageInfo{ FSet: fset, } - got := p.node_htmlFunc(pi, genDecl, true) - want := `type T struct { -NoDoc string + return p.node_htmlFunc(pi, genDecl, true) +} -// Doc has a comment. -Doc string - -// Opt, if non-nil, is an option. -Opt *int -}` - if got != want { - t.Errorf("got: %s\n\nwant: %s\n", got, want) +// Verify that scanIdentifier isn't quadratic. +// This doesn't actually measure and fail on its own, but it was previously +// very obvious when running by hand. +// +// TODO: if there's a reliable and non-flaky way to test this, do so. +// Maybe count user CPU time instead of wall time? But that's not easy +// to do portably in Go. +func TestStructField(t *testing.T) { + for _, n := range []int{10, 100, 1000, 10000} { + n := n + t.Run(fmt.Sprint(n), func(t *testing.T) { + var buf bytes.Buffer + fmt.Fprintf(&buf, "package foo\n\ntype T struct {\n") + for i := 0; i < n; i++ { + fmt.Fprintf(&buf, "\t// Field%d is foo.\n\tField%d int\n\n", i, i) + } + fmt.Fprintf(&buf, "}\n") + linkifyStructFields(t, buf.Bytes()) + }) + } +} + +func TestScanIdentifier(t *testing.T) { + tests := []struct { + in, want string + }{ + {"foo bar", "foo"}, + {"foo/bar", "foo"}, + {" foo", ""}, + {"фоо", "фоо"}, + {"f123", "f123"}, + {"123f", ""}, + } + for _, tt := range tests { + got := scanIdentifier([]byte(tt.in)) + if string(got) != tt.want { + t.Errorf("scanIdentifier(%q) = %q; want %q", tt.in, got, tt.want) + } } }