godoc: fix quadratic and ASCII-only struct field linkification

Fixes two problems with adding the #StructType.FieldName anchors for
linkified struct fields:

* the old code was quadratic

* the old code only dealt with ASCII only

Change-Id: If03a367a94d05d3d470e1326dfb573037088ff78
Reviewed-on: https://go-review.googlesource.com/35486
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
This commit is contained in:
Brad Fitzpatrick 2017-01-20 18:35:00 +00:00
parent 721f218496
commit 61efd71121
2 changed files with 121 additions and 36 deletions

View File

@ -233,24 +233,28 @@ func addStructFieldIDAttributes(buf *bytes.Buffer, name string, st *ast.StructTy
if st.Fields == nil {
return
}
var scratch bytes.Buffer
// needsLink is a set of identifiers that still need to be
// linked, where value == key, to avoid an allocation in func
// linkedField.
needsLink := make(map[string]string)
for _, f := range st.Fields.List {
if len(f.Names) == 0 {
continue
}
fieldName := f.Names[0].Name
scratch.Reset()
var added bool
foreachLine(buf.Bytes(), func(line []byte) {
if !added && isLineForStructFieldID(line, fieldName) {
added = true
fmt.Fprintf(&scratch, `<span id="%s.%s"></span>`, name, fieldName)
needsLink[fieldName] = fieldName
}
scratch.Write(line)
var newBuf bytes.Buffer
foreachLine(buf.Bytes(), func(line []byte) {
if fieldName := linkedField(line, needsLink); fieldName != "" {
fmt.Fprintf(&newBuf, `<span id="%s.%s"></span>`, name, fieldName)
delete(needsLink, fieldName)
}
newBuf.Write(line)
})
buf.Reset()
buf.Write(scratch.Bytes())
}
buf.Write(newBuf.Bytes())
}
// foreachLine calls fn for each line of in, where a line includes
@ -270,9 +274,12 @@ func foreachLine(in []byte, fn func(line []byte)) {
// commentPrefix is the line prefix for comments after they've been HTMLified.
var commentPrefix = []byte(`<span class="comment">// `)
// isLineForStructFieldID reports whether line is a line we should
// add a <span id="#StructName.FieldName"> to. Only the fieldName is provided.
func isLineForStructFieldID(line []byte, fieldName string) bool {
// linkedField determines whether the given line starts with an
// identifer in the provided ids map (mapping from identifier to the
// same identifier). The line can start with either an identifier or
// an identifier in a comment. If one matches, it returns the
// identifier that matched. Otherwise it returns the empty string.
func linkedField(line []byte, ids map[string]string) string {
line = bytes.TrimSpace(line)
// For fields with a doc string of the
@ -292,13 +299,39 @@ func isLineForStructFieldID(line []byte, fieldName string) bool {
//
// TODO: do this better, so it works for all
// comments, including unconventional ones.
// For comments
if bytes.HasPrefix(line, commentPrefix) {
if matchesIdentBoundary(line[len(commentPrefix):], fieldName) {
return true
line = line[len(commentPrefix):]
}
id := scanIdentifier(line)
if len(id) == 0 {
// No leading identifier. Avoid map lookup for
// somewhat common case.
return ""
}
return matchesIdentBoundary(line, fieldName)
return ids[string(id)]
}
// scanIdentifier scans a valid Go identifier off the front of v and
// either returns a subslice of v if there's a valid identifier, or
// returns a zero-length slice.
func scanIdentifier(v []byte) []byte {
var n int // number of leading bytes of v belonging to an identifier
for {
r, width := utf8.DecodeRune(v[n:])
if !(isLetter(r) || n > 0 && isDigit(r)) {
break
}
n += width
}
return v[:n]
}
func isLetter(ch rune) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
}
func isDigit(ch rune) bool {
return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
}
// matchesIdentBoundary reports whether line matches /^ident\b/.

View File

@ -5,6 +5,8 @@
package godoc
import (
"bytes"
"fmt"
"go/ast"
"go/parser"
"go/token"
@ -123,10 +125,7 @@ func TestSanitizeFunc(t *testing.T) {
// Test that we add <span id="StructName.FieldName"> elements
// to the HTML of struct fields.
func TestStructFieldsIDAttributes(t *testing.T) {
p := &Presentation{
DeclLinks: true,
}
src := []byte(`
got := linkifyStructFields(t, []byte(`
package foo
type T struct {
@ -137,8 +136,32 @@ type T struct {
// Opt, if non-nil, is an option.
Opt *int
// Опция - другое поле.
Опция bool
}
`)
`))
want := `type T struct {
<span id="T.NoDoc"></span>NoDoc <a href="/pkg/builtin/#string">string</a>
<span id="T.Doc"></span><span class="comment">// Doc has a comment.</span>
Doc <a href="/pkg/builtin/#string">string</a>
<span id="T.Opt"></span><span class="comment">// Opt, if non-nil, is an option.</span>
Opt *<a href="/pkg/builtin/#int">int</a>
<span id="T.Опция"></span><span class="comment">// Опция - другое поле.</span>
Опция <a href="/pkg/builtin/#bool">bool</a>
}`
if got != want {
t.Errorf("got: %s\n\nwant: %s\n", got, want)
}
}
func linkifyStructFields(t *testing.T, src []byte) string {
p := &Presentation{
DeclLinks: true,
}
fset := token.NewFileSet()
af, err := parser.ParseFile(fset, "foo.go", src, parser.ParseComments)
if err != nil {
@ -148,17 +171,46 @@ type T struct {
pi := &PageInfo{
FSet: fset,
}
got := p.node_htmlFunc(pi, genDecl, true)
want := `type T struct {
<span id="T.NoDoc"></span>NoDoc <a href="/pkg/builtin/#string">string</a>
return p.node_htmlFunc(pi, genDecl, true)
}
<span id="T.Doc"></span><span class="comment">// Doc has a comment.</span>
Doc <a href="/pkg/builtin/#string">string</a>
<span id="T.Opt"></span><span class="comment">// Opt, if non-nil, is an option.</span>
Opt *<a href="/pkg/builtin/#int">int</a>
}`
if got != want {
t.Errorf("got: %s\n\nwant: %s\n", got, want)
// Verify that scanIdentifier isn't quadratic.
// This doesn't actually measure and fail on its own, but it was previously
// very obvious when running by hand.
//
// TODO: if there's a reliable and non-flaky way to test this, do so.
// Maybe count user CPU time instead of wall time? But that's not easy
// to do portably in Go.
func TestStructField(t *testing.T) {
for _, n := range []int{10, 100, 1000, 10000} {
n := n
t.Run(fmt.Sprint(n), func(t *testing.T) {
var buf bytes.Buffer
fmt.Fprintf(&buf, "package foo\n\ntype T struct {\n")
for i := 0; i < n; i++ {
fmt.Fprintf(&buf, "\t// Field%d is foo.\n\tField%d int\n\n", i, i)
}
fmt.Fprintf(&buf, "}\n")
linkifyStructFields(t, buf.Bytes())
})
}
}
func TestScanIdentifier(t *testing.T) {
tests := []struct {
in, want string
}{
{"foo bar", "foo"},
{"foo/bar", "foo"},
{" foo", ""},
{"фоо", "фоо"},
{"f123", "f123"},
{"123f", ""},
}
for _, tt := range tests {
got := scanIdentifier([]byte(tt.in))
if string(got) != tt.want {
t.Errorf("scanIdentifier(%q) = %q; want %q", tt.in, got, tt.want)
}
}
}