godoc: fix quadratic and ASCII-only struct field linkification
Fixes two problems with adding the #StructType.FieldName anchors for linkified struct fields: * the old code was quadratic * the old code only dealt with ASCII only Change-Id: If03a367a94d05d3d470e1326dfb573037088ff78 Reviewed-on: https://go-review.googlesource.com/35486 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Robert Griesemer <gri@golang.org>
This commit is contained in:
parent
721f218496
commit
61efd71121
|
@ -233,24 +233,28 @@ func addStructFieldIDAttributes(buf *bytes.Buffer, name string, st *ast.StructTy
|
||||||
if st.Fields == nil {
|
if st.Fields == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
var scratch bytes.Buffer
|
// needsLink is a set of identifiers that still need to be
|
||||||
|
// linked, where value == key, to avoid an allocation in func
|
||||||
|
// linkedField.
|
||||||
|
needsLink := make(map[string]string)
|
||||||
|
|
||||||
for _, f := range st.Fields.List {
|
for _, f := range st.Fields.List {
|
||||||
if len(f.Names) == 0 {
|
if len(f.Names) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
fieldName := f.Names[0].Name
|
fieldName := f.Names[0].Name
|
||||||
scratch.Reset()
|
needsLink[fieldName] = fieldName
|
||||||
var added bool
|
|
||||||
foreachLine(buf.Bytes(), func(line []byte) {
|
|
||||||
if !added && isLineForStructFieldID(line, fieldName) {
|
|
||||||
added = true
|
|
||||||
fmt.Fprintf(&scratch, `<span id="%s.%s"></span>`, name, fieldName)
|
|
||||||
}
|
|
||||||
scratch.Write(line)
|
|
||||||
})
|
|
||||||
buf.Reset()
|
|
||||||
buf.Write(scratch.Bytes())
|
|
||||||
}
|
}
|
||||||
|
var newBuf bytes.Buffer
|
||||||
|
foreachLine(buf.Bytes(), func(line []byte) {
|
||||||
|
if fieldName := linkedField(line, needsLink); fieldName != "" {
|
||||||
|
fmt.Fprintf(&newBuf, `<span id="%s.%s"></span>`, name, fieldName)
|
||||||
|
delete(needsLink, fieldName)
|
||||||
|
}
|
||||||
|
newBuf.Write(line)
|
||||||
|
})
|
||||||
|
buf.Reset()
|
||||||
|
buf.Write(newBuf.Bytes())
|
||||||
}
|
}
|
||||||
|
|
||||||
// foreachLine calls fn for each line of in, where a line includes
|
// foreachLine calls fn for each line of in, where a line includes
|
||||||
|
@ -270,9 +274,12 @@ func foreachLine(in []byte, fn func(line []byte)) {
|
||||||
// commentPrefix is the line prefix for comments after they've been HTMLified.
|
// commentPrefix is the line prefix for comments after they've been HTMLified.
|
||||||
var commentPrefix = []byte(`<span class="comment">// `)
|
var commentPrefix = []byte(`<span class="comment">// `)
|
||||||
|
|
||||||
// isLineForStructFieldID reports whether line is a line we should
|
// linkedField determines whether the given line starts with an
|
||||||
// add a <span id="#StructName.FieldName"> to. Only the fieldName is provided.
|
// identifer in the provided ids map (mapping from identifier to the
|
||||||
func isLineForStructFieldID(line []byte, fieldName string) bool {
|
// same identifier). The line can start with either an identifier or
|
||||||
|
// an identifier in a comment. If one matches, it returns the
|
||||||
|
// identifier that matched. Otherwise it returns the empty string.
|
||||||
|
func linkedField(line []byte, ids map[string]string) string {
|
||||||
line = bytes.TrimSpace(line)
|
line = bytes.TrimSpace(line)
|
||||||
|
|
||||||
// For fields with a doc string of the
|
// For fields with a doc string of the
|
||||||
|
@ -292,13 +299,39 @@ func isLineForStructFieldID(line []byte, fieldName string) bool {
|
||||||
//
|
//
|
||||||
// TODO: do this better, so it works for all
|
// TODO: do this better, so it works for all
|
||||||
// comments, including unconventional ones.
|
// comments, including unconventional ones.
|
||||||
// For comments
|
|
||||||
if bytes.HasPrefix(line, commentPrefix) {
|
if bytes.HasPrefix(line, commentPrefix) {
|
||||||
if matchesIdentBoundary(line[len(commentPrefix):], fieldName) {
|
line = line[len(commentPrefix):]
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return matchesIdentBoundary(line, fieldName)
|
id := scanIdentifier(line)
|
||||||
|
if len(id) == 0 {
|
||||||
|
// No leading identifier. Avoid map lookup for
|
||||||
|
// somewhat common case.
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return ids[string(id)]
|
||||||
|
}
|
||||||
|
|
||||||
|
// scanIdentifier scans a valid Go identifier off the front of v and
|
||||||
|
// either returns a subslice of v if there's a valid identifier, or
|
||||||
|
// returns a zero-length slice.
|
||||||
|
func scanIdentifier(v []byte) []byte {
|
||||||
|
var n int // number of leading bytes of v belonging to an identifier
|
||||||
|
for {
|
||||||
|
r, width := utf8.DecodeRune(v[n:])
|
||||||
|
if !(isLetter(r) || n > 0 && isDigit(r)) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
n += width
|
||||||
|
}
|
||||||
|
return v[:n]
|
||||||
|
}
|
||||||
|
|
||||||
|
func isLetter(ch rune) bool {
|
||||||
|
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isDigit(ch rune) bool {
|
||||||
|
return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
// matchesIdentBoundary reports whether line matches /^ident\b/.
|
// matchesIdentBoundary reports whether line matches /^ident\b/.
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
package godoc
|
package godoc
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
"go/ast"
|
"go/ast"
|
||||||
"go/parser"
|
"go/parser"
|
||||||
"go/token"
|
"go/token"
|
||||||
|
@ -123,10 +125,7 @@ func TestSanitizeFunc(t *testing.T) {
|
||||||
// Test that we add <span id="StructName.FieldName"> elements
|
// Test that we add <span id="StructName.FieldName"> elements
|
||||||
// to the HTML of struct fields.
|
// to the HTML of struct fields.
|
||||||
func TestStructFieldsIDAttributes(t *testing.T) {
|
func TestStructFieldsIDAttributes(t *testing.T) {
|
||||||
p := &Presentation{
|
got := linkifyStructFields(t, []byte(`
|
||||||
DeclLinks: true,
|
|
||||||
}
|
|
||||||
src := []byte(`
|
|
||||||
package foo
|
package foo
|
||||||
|
|
||||||
type T struct {
|
type T struct {
|
||||||
|
@ -137,8 +136,32 @@ type T struct {
|
||||||
|
|
||||||
// Opt, if non-nil, is an option.
|
// Opt, if non-nil, is an option.
|
||||||
Opt *int
|
Opt *int
|
||||||
|
|
||||||
|
// Опция - другое поле.
|
||||||
|
Опция bool
|
||||||
}
|
}
|
||||||
`)
|
`))
|
||||||
|
want := `type T struct {
|
||||||
|
<span id="T.NoDoc"></span>NoDoc <a href="/pkg/builtin/#string">string</a>
|
||||||
|
|
||||||
|
<span id="T.Doc"></span><span class="comment">// Doc has a comment.</span>
|
||||||
|
Doc <a href="/pkg/builtin/#string">string</a>
|
||||||
|
|
||||||
|
<span id="T.Opt"></span><span class="comment">// Opt, if non-nil, is an option.</span>
|
||||||
|
Opt *<a href="/pkg/builtin/#int">int</a>
|
||||||
|
|
||||||
|
<span id="T.Опция"></span><span class="comment">// Опция - другое поле.</span>
|
||||||
|
Опция <a href="/pkg/builtin/#bool">bool</a>
|
||||||
|
}`
|
||||||
|
if got != want {
|
||||||
|
t.Errorf("got: %s\n\nwant: %s\n", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func linkifyStructFields(t *testing.T, src []byte) string {
|
||||||
|
p := &Presentation{
|
||||||
|
DeclLinks: true,
|
||||||
|
}
|
||||||
fset := token.NewFileSet()
|
fset := token.NewFileSet()
|
||||||
af, err := parser.ParseFile(fset, "foo.go", src, parser.ParseComments)
|
af, err := parser.ParseFile(fset, "foo.go", src, parser.ParseComments)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -148,17 +171,46 @@ type T struct {
|
||||||
pi := &PageInfo{
|
pi := &PageInfo{
|
||||||
FSet: fset,
|
FSet: fset,
|
||||||
}
|
}
|
||||||
got := p.node_htmlFunc(pi, genDecl, true)
|
return p.node_htmlFunc(pi, genDecl, true)
|
||||||
want := `type T struct {
|
}
|
||||||
<span id="T.NoDoc"></span>NoDoc <a href="/pkg/builtin/#string">string</a>
|
|
||||||
|
|
||||||
<span id="T.Doc"></span><span class="comment">// Doc has a comment.</span>
|
// Verify that scanIdentifier isn't quadratic.
|
||||||
Doc <a href="/pkg/builtin/#string">string</a>
|
// This doesn't actually measure and fail on its own, but it was previously
|
||||||
|
// very obvious when running by hand.
|
||||||
<span id="T.Opt"></span><span class="comment">// Opt, if non-nil, is an option.</span>
|
//
|
||||||
Opt *<a href="/pkg/builtin/#int">int</a>
|
// TODO: if there's a reliable and non-flaky way to test this, do so.
|
||||||
}`
|
// Maybe count user CPU time instead of wall time? But that's not easy
|
||||||
if got != want {
|
// to do portably in Go.
|
||||||
t.Errorf("got: %s\n\nwant: %s\n", got, want)
|
func TestStructField(t *testing.T) {
|
||||||
|
for _, n := range []int{10, 100, 1000, 10000} {
|
||||||
|
n := n
|
||||||
|
t.Run(fmt.Sprint(n), func(t *testing.T) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
fmt.Fprintf(&buf, "package foo\n\ntype T struct {\n")
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
fmt.Fprintf(&buf, "\t// Field%d is foo.\n\tField%d int\n\n", i, i)
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&buf, "}\n")
|
||||||
|
linkifyStructFields(t, buf.Bytes())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestScanIdentifier(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
in, want string
|
||||||
|
}{
|
||||||
|
{"foo bar", "foo"},
|
||||||
|
{"foo/bar", "foo"},
|
||||||
|
{" foo", ""},
|
||||||
|
{"фоо", "фоо"},
|
||||||
|
{"f123", "f123"},
|
||||||
|
{"123f", ""},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
got := scanIdentifier([]byte(tt.in))
|
||||||
|
if string(got) != tt.want {
|
||||||
|
t.Errorf("scanIdentifier(%q) = %q; want %q", tt.in, got, tt.want)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue