internal/lsp/fuzzy: add fuzzy matching library
This change uses a fuzzy matching library to score completion results. Updates golang/go#32754 Change-Id: Ia7771b33534de393a865443e05c0fcbf1e9a969b Reviewed-on: https://go-review.googlesource.com/c/tools/+/184441 Run-TryBot: Rebecca Stambler <rstambler@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Cottrell <iancottrell@google.com>
This commit is contained in:
parent
719fbf7c21
commit
2214986f16
|
@ -30,7 +30,7 @@ func (s *Server) completion(ctx context.Context, params *protocol.CompletionPara
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
items, surrounding, err := source.Completion(ctx, view, f, rng.Start, source.CompletionOptions{
|
candidates, surrounding, err := source.Completion(ctx, view, f, rng.Start, source.CompletionOptions{
|
||||||
DeepComplete: s.useDeepCompletions,
|
DeepComplete: s.useDeepCompletions,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -58,7 +58,7 @@ func (s *Server) completion(ctx context.Context, params *protocol.CompletionPara
|
||||||
}
|
}
|
||||||
return &protocol.CompletionList{
|
return &protocol.CompletionList{
|
||||||
IsIncomplete: false,
|
IsIncomplete: false,
|
||||||
Items: toProtocolCompletionItems(items, prefix, insertionRng, s.insertTextFormat, s.usePlaceholders, s.useDeepCompletions),
|
Items: toProtocolCompletionItems(candidates, prefix, insertionRng, s.insertTextFormat, s.usePlaceholders, s.useDeepCompletions),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,185 @@
|
||||||
|
// Copyright 2019 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package fuzzy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Input specifies the type of the input. This influences how the runes are interpreted wrt to
|
||||||
|
// segmenting the input.
|
||||||
|
type Input int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Text represents a text input type. Input is not segmented.
|
||||||
|
Text Input = iota
|
||||||
|
// Filename represents a filepath input type with '/' segment delimitors.
|
||||||
|
Filename
|
||||||
|
// Symbol represents a symbol input type with '.' and ':' segment delimitors.
|
||||||
|
Symbol
|
||||||
|
)
|
||||||
|
|
||||||
|
// RuneRole specifies the role of a rune in the context of an input.
|
||||||
|
type RuneRole byte
|
||||||
|
|
||||||
|
const (
|
||||||
|
// RNone specifies a rune without any role in the input (i.e., whitespace/non-ASCII).
|
||||||
|
RNone RuneRole = iota
|
||||||
|
// RSep specifies a rune with the role of segment separator.
|
||||||
|
RSep
|
||||||
|
// RTail specifies a rune which is a lower-case tail in a word in the input.
|
||||||
|
RTail
|
||||||
|
// RUCTail specifies a rune which is an upper-case tail in a word in the input.
|
||||||
|
RUCTail
|
||||||
|
// RHead specifies a rune which is the first character in a word in the input.
|
||||||
|
RHead
|
||||||
|
)
|
||||||
|
|
||||||
|
// RuneRoles detects the roles of each byte rune in an input string and stores it in the output
|
||||||
|
// slice. The rune role depends on the input type. Stops when it parsed all the runes in the string
|
||||||
|
// or when it filled the output. If output is nil, then it gets created.
|
||||||
|
func RuneRoles(str string, input Input, reuse []RuneRole) []RuneRole {
|
||||||
|
var output []RuneRole
|
||||||
|
if cap(reuse) < len(str) {
|
||||||
|
output = make([]RuneRole, 0, len(str))
|
||||||
|
} else {
|
||||||
|
output = reuse[:0]
|
||||||
|
}
|
||||||
|
|
||||||
|
prev, prev2 := rtNone, rtNone
|
||||||
|
for i := 0; i < len(str); i++ {
|
||||||
|
r := rune(str[i])
|
||||||
|
|
||||||
|
role := RNone
|
||||||
|
|
||||||
|
curr := rtLower
|
||||||
|
if str[i] <= unicode.MaxASCII {
|
||||||
|
curr = runeType(rt[str[i]] - '0')
|
||||||
|
}
|
||||||
|
|
||||||
|
if curr == rtLower {
|
||||||
|
if prev == rtNone || prev == rtPunct {
|
||||||
|
role = RHead
|
||||||
|
} else {
|
||||||
|
role = RTail
|
||||||
|
}
|
||||||
|
} else if curr == rtUpper {
|
||||||
|
role = RHead
|
||||||
|
|
||||||
|
if prev == rtUpper {
|
||||||
|
// This and previous characters are both upper case.
|
||||||
|
|
||||||
|
if i+1 == len(str) {
|
||||||
|
// This is last character, previous was also uppercase -> this is UCTail
|
||||||
|
// i.e., (current char is C): aBC / BC / ABC
|
||||||
|
role = RUCTail
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if curr == rtPunct {
|
||||||
|
switch {
|
||||||
|
case input == Filename && r == '/':
|
||||||
|
role = RSep
|
||||||
|
case input == Symbol && r == '.':
|
||||||
|
role = RSep
|
||||||
|
case input == Symbol && r == ':':
|
||||||
|
role = RSep
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if curr != rtLower {
|
||||||
|
if i > 1 && output[i-1] == RHead && prev2 == rtUpper && (output[i-2] == RHead || output[i-2] == RUCTail) {
|
||||||
|
// The previous two characters were uppercase. The current one is not a lower case, so the
|
||||||
|
// previous one can't be a HEAD. Make it a UCTail.
|
||||||
|
// i.e., (last char is current char - B must be a UCTail): ABC / ZABC / AB.
|
||||||
|
output[i-1] = RUCTail
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output = append(output, role)
|
||||||
|
prev2 = prev
|
||||||
|
prev = curr
|
||||||
|
}
|
||||||
|
return output
|
||||||
|
}
|
||||||
|
|
||||||
|
type runeType byte
|
||||||
|
|
||||||
|
const (
|
||||||
|
rtNone runeType = iota
|
||||||
|
rtPunct
|
||||||
|
rtLower
|
||||||
|
rtUpper
|
||||||
|
)
|
||||||
|
|
||||||
|
const rt = "00000000000000000000000000000000000000000000001122222222221000000333333333333333333333333330000002222222222222222222222222200000"
|
||||||
|
|
||||||
|
// LastSegment returns the substring representing the last segment from the input, where each
|
||||||
|
// byte has an associated RuneRole in the roles slice. This makes sense only for inputs of Symbol
|
||||||
|
// or Filename type.
|
||||||
|
func LastSegment(input string, roles []RuneRole) string {
|
||||||
|
// Exclude ending separators.
|
||||||
|
end := len(input) - 1
|
||||||
|
for end >= 0 && roles[end] == RSep {
|
||||||
|
end--
|
||||||
|
}
|
||||||
|
if end < 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
start := end - 1
|
||||||
|
for start >= 0 && roles[start] != RSep {
|
||||||
|
start--
|
||||||
|
}
|
||||||
|
|
||||||
|
return input[start+1 : end+1]
|
||||||
|
}
|
||||||
|
|
||||||
|
// ToLower transforms the input string to lower case, which is stored in the output byte slice.
|
||||||
|
// The lower casing considers only ASCII values - non ASCII values are left unmodified.
|
||||||
|
// Stops when parsed all input or when it filled the output slice. If output is nil, then it gets
|
||||||
|
// created.
|
||||||
|
func ToLower(input string, reuse []byte) []byte {
|
||||||
|
output := reuse
|
||||||
|
if cap(reuse) < len(input) {
|
||||||
|
output = make([]byte, len(input))
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(input); i++ {
|
||||||
|
r := rune(input[i])
|
||||||
|
if r <= unicode.MaxASCII {
|
||||||
|
if 'A' <= r && r <= 'Z' {
|
||||||
|
r += 'a' - 'A'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
output[i] = byte(r)
|
||||||
|
}
|
||||||
|
return output[:len(input)]
|
||||||
|
}
|
||||||
|
|
||||||
|
// WordConsumer defines a consumer for a word delimited by the [start,end) byte offsets in an input
|
||||||
|
// (start is inclusive, end is exclusive).
|
||||||
|
type WordConsumer func(start, end int)
|
||||||
|
|
||||||
|
// Words find word delimiters in an input based on its bytes' mappings to rune roles. The offset
|
||||||
|
// delimiters for each word are fed to the provided consumer function.
|
||||||
|
func Words(roles []RuneRole, consume WordConsumer) {
|
||||||
|
var wordStart int
|
||||||
|
for i, r := range roles {
|
||||||
|
switch r {
|
||||||
|
case RUCTail, RTail:
|
||||||
|
case RHead, RNone, RSep:
|
||||||
|
if i != wordStart {
|
||||||
|
consume(wordStart, i)
|
||||||
|
}
|
||||||
|
wordStart = i
|
||||||
|
if r != RHead {
|
||||||
|
// Skip this character.
|
||||||
|
wordStart = i + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if wordStart != len(roles) {
|
||||||
|
consume(wordStart, len(roles))
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,186 @@
|
||||||
|
// Copyright 2019 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package fuzzy_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"sort"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"golang.org/x/tools/internal/lsp/fuzzy"
|
||||||
|
)
|
||||||
|
|
||||||
|
var rolesTests = []struct {
|
||||||
|
str string
|
||||||
|
input fuzzy.Input
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{str: "abc", want: "Ccc", input: fuzzy.Text},
|
||||||
|
{str: ".abc", want: " Ccc", input: fuzzy.Text},
|
||||||
|
{str: "abc def", want: "Ccc Ccc", input: fuzzy.Text},
|
||||||
|
{str: "SWT MyID", want: "Cuu CcCu", input: fuzzy.Text},
|
||||||
|
{str: "ID", want: "Cu", input: fuzzy.Text},
|
||||||
|
{str: "IDD", want: "Cuu", input: fuzzy.Text},
|
||||||
|
{str: " ID ", want: " Cu ", input: fuzzy.Text},
|
||||||
|
{str: "IDSome", want: "CuCccc", input: fuzzy.Text},
|
||||||
|
{str: "0123456789", want: "Cccccccccc", input: fuzzy.Text},
|
||||||
|
{str: "abcdefghigklmnopqrstuvwxyz", want: "Cccccccccccccccccccccccccc", input: fuzzy.Text},
|
||||||
|
{str: "ABCDEFGHIGKLMNOPQRSTUVWXYZ", want: "Cuuuuuuuuuuuuuuuuuuuuuuuuu", input: fuzzy.Text},
|
||||||
|
{str: "こんにちは", want: "Ccccccccccccccc", input: fuzzy.Text}, // We don't parse unicode
|
||||||
|
{str: ":/.", want: " ", input: fuzzy.Text},
|
||||||
|
|
||||||
|
// Filenames
|
||||||
|
{str: "abc/def", want: "Ccc/Ccc", input: fuzzy.Filename},
|
||||||
|
{str: " abc_def", want: " Ccc Ccc", input: fuzzy.Filename},
|
||||||
|
{str: " abc_DDf", want: " Ccc CCc", input: fuzzy.Filename},
|
||||||
|
{str: ":.", want: " ", input: fuzzy.Filename},
|
||||||
|
|
||||||
|
// Symbols
|
||||||
|
{str: "abc::def::goo", want: "Ccc//Ccc//Ccc", input: fuzzy.Symbol},
|
||||||
|
{str: "proto::Message", want: "Ccccc//Ccccccc", input: fuzzy.Symbol},
|
||||||
|
{str: "AbstractSWTFactory", want: "CcccccccCuuCcccccc", input: fuzzy.Symbol},
|
||||||
|
{str: "Abs012", want: "Cccccc", input: fuzzy.Symbol},
|
||||||
|
{str: "/", want: " ", input: fuzzy.Symbol},
|
||||||
|
{str: "fOO", want: "CCu", input: fuzzy.Symbol},
|
||||||
|
{str: "fo_oo.o_oo", want: "Cc Cc/C Cc", input: fuzzy.Symbol},
|
||||||
|
}
|
||||||
|
|
||||||
|
func rolesString(roles []fuzzy.RuneRole) string {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
for _, r := range roles {
|
||||||
|
buf.WriteByte(" /cuC"[int(r)])
|
||||||
|
}
|
||||||
|
return buf.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRoles(t *testing.T) {
|
||||||
|
for _, tc := range rolesTests {
|
||||||
|
gotRoles := make([]fuzzy.RuneRole, len(tc.str))
|
||||||
|
fuzzy.RuneRoles(tc.str, tc.input, gotRoles)
|
||||||
|
got := rolesString(gotRoles)
|
||||||
|
if got != tc.want {
|
||||||
|
t.Errorf("roles(%s) = %v; want %v", tc.str, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func words(strWords ...string) [][]byte {
|
||||||
|
var ret [][]byte
|
||||||
|
for _, w := range strWords {
|
||||||
|
ret = append(ret, []byte(w))
|
||||||
|
}
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
var wordSplitTests = []struct {
|
||||||
|
input string
|
||||||
|
want []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
input: "foo bar baz",
|
||||||
|
want: []string{"foo", "bar", "baz"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "fooBarBaz",
|
||||||
|
want: []string{"foo", "Bar", "Baz"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "FOOBarBAZ",
|
||||||
|
want: []string{"FOO", "Bar", "BAZ"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: "foo123_bar2Baz3",
|
||||||
|
want: []string{"foo123", "bar2", "Baz3"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWordSplit(t *testing.T) {
|
||||||
|
for _, tc := range wordSplitTests {
|
||||||
|
roles := fuzzy.RuneRoles(tc.input, fuzzy.Symbol, nil)
|
||||||
|
|
||||||
|
var got []string
|
||||||
|
consumer := func(i, j int) {
|
||||||
|
got = append(got, tc.input[i:j])
|
||||||
|
}
|
||||||
|
fuzzy.Words(roles, consumer)
|
||||||
|
|
||||||
|
if eq := diffStringLists(tc.want, got); !eq {
|
||||||
|
t.Errorf("input %v: (want %v -> got %v)", tc.input, tc.want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func diffStringLists(a, b []string) bool {
|
||||||
|
if len(a) != len(b) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
sort.Strings(a)
|
||||||
|
sort.Strings(b)
|
||||||
|
for i := range a {
|
||||||
|
if a[i] != b[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
var lastSegmentSplitTests = []struct {
|
||||||
|
str string
|
||||||
|
input fuzzy.Input
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
str: "identifier",
|
||||||
|
input: fuzzy.Symbol,
|
||||||
|
want: "identifier",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
str: "two_words",
|
||||||
|
input: fuzzy.Symbol,
|
||||||
|
want: "two_words",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
str: "first::second",
|
||||||
|
input: fuzzy.Symbol,
|
||||||
|
want: "second",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
str: "foo.bar.FOOBar_buz123_test",
|
||||||
|
input: fuzzy.Symbol,
|
||||||
|
want: "FOOBar_buz123_test",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
str: "golang.org/x/tools/internal/lsp/fuzzy_matcher.go",
|
||||||
|
input: fuzzy.Filename,
|
||||||
|
want: "fuzzy_matcher.go",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
str: "golang.org/x/tools/internal/lsp/fuzzy_matcher.go",
|
||||||
|
input: fuzzy.Text,
|
||||||
|
want: "golang.org/x/tools/internal/lsp/fuzzy_matcher.go",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLastSegment(t *testing.T) {
|
||||||
|
for _, tc := range lastSegmentSplitTests {
|
||||||
|
roles := fuzzy.RuneRoles(tc.str, tc.input, nil)
|
||||||
|
|
||||||
|
got := fuzzy.LastSegment(tc.str, roles)
|
||||||
|
|
||||||
|
if got != tc.want {
|
||||||
|
t.Errorf("str %v: want %v; got %v", tc.str, tc.want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkRoles(b *testing.B) {
|
||||||
|
str := "AbstractSWTFactory"
|
||||||
|
out := make([]fuzzy.RuneRole, len(str))
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
fuzzy.RuneRoles(str, fuzzy.Symbol, out)
|
||||||
|
}
|
||||||
|
b.SetBytes(int64(len(str)))
|
||||||
|
}
|
|
@ -0,0 +1,437 @@
|
||||||
|
// Copyright 2019 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package fuzzy implements a fuzzy matching algorithm.
|
||||||
|
package fuzzy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// MaxInputSize is the maximum size of the input scored against the fuzzy matcher. Longer inputs
|
||||||
|
// will be truncated to this size.
|
||||||
|
MaxInputSize = 127
|
||||||
|
// MaxPatternSize is the maximum size of the pattern used to construct the fuzzy matcher. Longer
|
||||||
|
// inputs are truncated to this size.
|
||||||
|
MaxPatternSize = 63
|
||||||
|
)
|
||||||
|
|
||||||
|
type scoreVal int
|
||||||
|
|
||||||
|
func (s scoreVal) val() int {
|
||||||
|
return int(s) >> 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s scoreVal) prevK() int {
|
||||||
|
return int(s) & 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func score(val int, prevK int /*0 or 1*/) scoreVal {
|
||||||
|
return scoreVal(val<<1 + prevK)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Matcher implements a fuzzy matching algorithm for scoring candidates against a pattern.
|
||||||
|
// The matcher does not support parallel usage.
|
||||||
|
type Matcher struct {
|
||||||
|
input Input
|
||||||
|
|
||||||
|
pattern string
|
||||||
|
patternLower []byte // lower-case version of the pattern
|
||||||
|
patternShort []byte // first characters of the pattern
|
||||||
|
caseSensitive bool // set if the pattern is mix-cased
|
||||||
|
|
||||||
|
patternRoles []RuneRole // the role of each character in the pattern
|
||||||
|
roles []RuneRole // the role of each character in the tested string
|
||||||
|
|
||||||
|
scores [MaxInputSize + 1][MaxPatternSize + 1][2]scoreVal
|
||||||
|
|
||||||
|
scoreScale float32
|
||||||
|
|
||||||
|
lastCandidateLen int // in bytes
|
||||||
|
lastCandidateMatched bool
|
||||||
|
|
||||||
|
// Here we save the last candidate in lower-case. This is basically a byte slice we reuse for
|
||||||
|
// performance reasons, so the slice is not reallocated for every candidate.
|
||||||
|
lowerBuf [MaxInputSize]byte
|
||||||
|
rolesBuf [MaxInputSize]RuneRole
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Matcher) bestK(i, j int) int {
|
||||||
|
if m.scores[i][j][0].val() < m.scores[i][j][1].val() {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewMatcher returns a new fuzzy matcher for scoring candidates against the provided pattern.
|
||||||
|
func NewMatcher(pattern string, input Input) *Matcher {
|
||||||
|
if len(pattern) > MaxPatternSize {
|
||||||
|
pattern = pattern[:MaxPatternSize]
|
||||||
|
}
|
||||||
|
|
||||||
|
m := &Matcher{
|
||||||
|
input: input,
|
||||||
|
pattern: pattern,
|
||||||
|
patternLower: ToLower(pattern, nil),
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, c := range m.patternLower {
|
||||||
|
if pattern[i] != c {
|
||||||
|
m.caseSensitive = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(pattern) > 3 {
|
||||||
|
m.patternShort = m.patternLower[:3]
|
||||||
|
} else {
|
||||||
|
m.patternShort = m.patternLower
|
||||||
|
}
|
||||||
|
|
||||||
|
m.patternRoles = RuneRoles(pattern, input, nil)
|
||||||
|
|
||||||
|
if len(pattern) > 0 {
|
||||||
|
maxCharScore := 4
|
||||||
|
if input == Text {
|
||||||
|
maxCharScore = 6
|
||||||
|
}
|
||||||
|
m.scoreScale = 1 / float32(maxCharScore*len(pattern))
|
||||||
|
}
|
||||||
|
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetInput updates the input type for subsequent scoring attempts.
|
||||||
|
func (m *Matcher) SetInput(input Input) {
|
||||||
|
if m.input == input {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
m.input = input
|
||||||
|
m.patternRoles = RuneRoles(m.pattern, input, m.patternRoles)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Score returns the score returned by matching the candidate to the pattern.
|
||||||
|
// This is not designed for parallel use. Multiple candidates must be scored sequentally.
|
||||||
|
// Returns a score between 0 and 1 (0 - no match, 1 - perfect match).
|
||||||
|
func (m *Matcher) Score(candidate string) float32 {
|
||||||
|
if len(candidate) > MaxInputSize {
|
||||||
|
candidate = candidate[:MaxInputSize]
|
||||||
|
}
|
||||||
|
lower := ToLower(candidate, m.lowerBuf[:])
|
||||||
|
m.lastCandidateLen = len(candidate)
|
||||||
|
|
||||||
|
if len(m.pattern) == 0 {
|
||||||
|
// Empty patterns perfectly match candidates.
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if m.match(candidate, lower) {
|
||||||
|
sc := m.computeScore(candidate, lower)
|
||||||
|
if sc > minScore/2 && !m.poorMatch() {
|
||||||
|
m.lastCandidateMatched = true
|
||||||
|
if len(m.pattern) == len(candidate) {
|
||||||
|
// Perfect match.
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if sc < 0 {
|
||||||
|
sc = 0
|
||||||
|
}
|
||||||
|
normalizedScore := float32(sc) * m.scoreScale
|
||||||
|
if normalizedScore > 1 {
|
||||||
|
normalizedScore = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalizedScore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m.lastCandidateMatched = false
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
const minScore = -10000
|
||||||
|
|
||||||
|
// MatchedRanges returns matches ranges for the last scored string as a flattened array of
|
||||||
|
// [begin, end) byte offset pairs.
|
||||||
|
func (m *Matcher) MatchedRanges() []int {
|
||||||
|
if len(m.pattern) == 0 || !m.lastCandidateMatched {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
i, j := m.lastCandidateLen, len(m.pattern)
|
||||||
|
if m.scores[i][j][0].val() < minScore/2 && m.scores[i][j][1].val() < minScore/2 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var ret []int
|
||||||
|
k := m.bestK(i, j)
|
||||||
|
for i > 0 {
|
||||||
|
take := (k == 1)
|
||||||
|
k = m.scores[i][j][k].prevK()
|
||||||
|
if take {
|
||||||
|
if len(ret) == 0 || ret[len(ret)-1] != i {
|
||||||
|
ret = append(ret, i)
|
||||||
|
ret = append(ret, i-1)
|
||||||
|
} else {
|
||||||
|
ret[len(ret)-1] = i - 1
|
||||||
|
}
|
||||||
|
j--
|
||||||
|
}
|
||||||
|
i--
|
||||||
|
}
|
||||||
|
// Reverse slice.
|
||||||
|
for i := 0; i < len(ret)/2; i++ {
|
||||||
|
ret[i], ret[len(ret)-1-i] = ret[len(ret)-1-i], ret[i]
|
||||||
|
}
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Matcher) match(candidate string, candidateLower []byte) bool {
|
||||||
|
i, j := 0, 0
|
||||||
|
for ; i < len(candidateLower) && j < len(m.patternLower); i++ {
|
||||||
|
if candidateLower[i] == m.patternLower[j] {
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if j != len(m.patternLower) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// The input passes the simple test against pattern, so it is time to classify its characters.
|
||||||
|
// Character roles are used below to find the last segment.
|
||||||
|
m.roles = RuneRoles(candidate, m.input, m.rolesBuf[:])
|
||||||
|
if m.input != Text {
|
||||||
|
sep := len(candidateLower) - 1
|
||||||
|
for sep >= i && m.roles[sep] != RSep {
|
||||||
|
sep--
|
||||||
|
}
|
||||||
|
if sep >= i {
|
||||||
|
// We are not in the last segment, check that we have at least one character match in the last
|
||||||
|
// segment of the candidate.
|
||||||
|
return bytes.IndexByte(candidateLower[sep:], m.patternLower[len(m.pattern)-1]) != -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Matcher) computeScore(candidate string, candidateLower []byte) int {
|
||||||
|
pattLen, candLen := len(m.pattern), len(candidate)
|
||||||
|
|
||||||
|
for j := 0; j <= len(m.pattern); j++ {
|
||||||
|
m.scores[0][j][0] = minScore << 1
|
||||||
|
m.scores[0][j][1] = minScore << 1
|
||||||
|
}
|
||||||
|
m.scores[0][0][0] = score(0, 0) // Start with 0.
|
||||||
|
|
||||||
|
segmentsLeft, lastSegStart := 1, 0
|
||||||
|
for i := 0; i < candLen; i++ {
|
||||||
|
if m.roles[i] == RSep {
|
||||||
|
segmentsLeft++
|
||||||
|
lastSegStart = i + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A per-character bonus for a consecutive match.
|
||||||
|
consecutiveBonus := 2
|
||||||
|
if m.input == Text {
|
||||||
|
// Consecutive matches for text are more important.
|
||||||
|
consecutiveBonus = 4
|
||||||
|
}
|
||||||
|
wordIdx := 0 // Word count within segment.
|
||||||
|
for i := 1; i <= candLen; i++ {
|
||||||
|
|
||||||
|
role := m.roles[i-1]
|
||||||
|
isHead := role == RHead
|
||||||
|
|
||||||
|
if isHead {
|
||||||
|
wordIdx++
|
||||||
|
} else if role == RSep && segmentsLeft > 1 {
|
||||||
|
wordIdx = 0
|
||||||
|
segmentsLeft--
|
||||||
|
}
|
||||||
|
|
||||||
|
var skipPenalty int
|
||||||
|
if segmentsLeft == 1 && isHead && m.input != Text {
|
||||||
|
// Skipping a word.
|
||||||
|
skipPenalty++
|
||||||
|
}
|
||||||
|
if i-1 == lastSegStart {
|
||||||
|
// Skipping the start of the last segment.
|
||||||
|
skipPenalty += 3
|
||||||
|
}
|
||||||
|
|
||||||
|
for j := 0; j <= pattLen; j++ {
|
||||||
|
// By default, we don't have a match. Fill in the skip data.
|
||||||
|
m.scores[i][j][1] = minScore << 1
|
||||||
|
|
||||||
|
if segmentsLeft > 1 && j == pattLen {
|
||||||
|
// The very last pattern character can only be matched in the last segment.
|
||||||
|
m.scores[i][j][0] = minScore << 1
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the skip score.
|
||||||
|
k := 0
|
||||||
|
if m.scores[i-1][j][0].val() < m.scores[i-1][j][1].val() {
|
||||||
|
k = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
skipScore := m.scores[i-1][j][k].val()
|
||||||
|
// Do not penalize missing characters after the last matched segment.
|
||||||
|
if j != pattLen {
|
||||||
|
skipScore -= skipPenalty
|
||||||
|
}
|
||||||
|
m.scores[i][j][0] = score(skipScore, k)
|
||||||
|
|
||||||
|
if j == 0 || candidateLower[i-1] != m.patternLower[j-1] {
|
||||||
|
// Not a match.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pRole := m.patternRoles[j-1]
|
||||||
|
|
||||||
|
if role == RTail && pRole == RHead {
|
||||||
|
if j > 1 {
|
||||||
|
// Not a match: a head in the pattern matches a tail character in the candidate.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Special treatment for the first character of the pattern. We allow
|
||||||
|
// matches in the middle of a word if they are long enough, at least
|
||||||
|
// min(3, pattern.length) characters.
|
||||||
|
if !bytes.HasPrefix(candidateLower[i-1:], m.patternShort) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the char score.
|
||||||
|
var charScore int
|
||||||
|
// Bonus 1: the char is in the candidate's last segment.
|
||||||
|
if segmentsLeft <= 1 {
|
||||||
|
charScore++
|
||||||
|
}
|
||||||
|
// Bonus 2: Case match or a Head in the pattern aligns with one in the word.
|
||||||
|
// Single-case patterns lack segmentation signals and we assume any character
|
||||||
|
// can be a head of a segment.
|
||||||
|
if candidate[i-1] == m.pattern[j-1] || role == RHead && (!m.caseSensitive || pRole == RHead) {
|
||||||
|
charScore++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Penalty 1: pattern char is Head, candidate char is Tail.
|
||||||
|
if role == RTail && pRole == RHead {
|
||||||
|
charScore--
|
||||||
|
}
|
||||||
|
// Penalty 2: first pattern character matched in the middle of a word.
|
||||||
|
if j == 1 && role == RTail {
|
||||||
|
charScore -= 4
|
||||||
|
}
|
||||||
|
|
||||||
|
// Third dimension encodes whether there is a gap between the previous match and the current
|
||||||
|
// one.
|
||||||
|
for k := 0; k < 2; k++ {
|
||||||
|
sc := m.scores[i-1][j-1][k].val() + charScore
|
||||||
|
|
||||||
|
isConsecutive := k == 1 || i-1 == 0 || i-1 == lastSegStart
|
||||||
|
if isConsecutive || (m.input == Text && j-1 == 0) {
|
||||||
|
// Bonus 3: a consecutive match. First character match also gets a bonus to
|
||||||
|
// ensure prefix final match score normalizes to 1.0.
|
||||||
|
// Logically, this is a part of charScore, but we have to compute it here because it
|
||||||
|
// only applies for consecutive matches (k == 1).
|
||||||
|
sc += consecutiveBonus
|
||||||
|
}
|
||||||
|
if k == 0 {
|
||||||
|
// Penalty 3: Matching inside a segment (and previous char wasn't matched). Penalize for the lack
|
||||||
|
// of alignment.
|
||||||
|
if role == RTail || role == RUCTail {
|
||||||
|
sc -= 3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if sc > m.scores[i][j][1].val() {
|
||||||
|
m.scores[i][j][1] = score(sc, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result := m.scores[len(candidate)][len(m.pattern)][m.bestK(len(candidate), len(m.pattern))].val()
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// ScoreTable returns the score table computed for the provided candidate. Used only for debugging.
|
||||||
|
func (m *Matcher) ScoreTable(candidate string) string {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
|
||||||
|
var line1, line2, separator bytes.Buffer
|
||||||
|
line1.WriteString("\t")
|
||||||
|
line2.WriteString("\t")
|
||||||
|
for j := 0; j < len(m.pattern); j++ {
|
||||||
|
line1.WriteString(fmt.Sprintf("%c\t\t", m.pattern[j]))
|
||||||
|
separator.WriteString("----------------")
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.WriteString(line1.String())
|
||||||
|
buf.WriteString("\n")
|
||||||
|
buf.WriteString(separator.String())
|
||||||
|
buf.WriteString("\n")
|
||||||
|
|
||||||
|
for i := 1; i <= len(candidate); i++ {
|
||||||
|
line1.Reset()
|
||||||
|
line2.Reset()
|
||||||
|
|
||||||
|
line1.WriteString(fmt.Sprintf("%c\t", candidate[i-1]))
|
||||||
|
line2.WriteString("\t")
|
||||||
|
|
||||||
|
for j := 1; j <= len(m.pattern); j++ {
|
||||||
|
line1.WriteString(fmt.Sprintf("M%6d(%c)\t", m.scores[i][j][0].val(), dir(m.scores[i][j][0].prevK())))
|
||||||
|
line2.WriteString(fmt.Sprintf("H%6d(%c)\t", m.scores[i][j][1].val(), dir(m.scores[i][j][1].prevK())))
|
||||||
|
}
|
||||||
|
buf.WriteString(line1.String())
|
||||||
|
buf.WriteString("\n")
|
||||||
|
buf.WriteString(line2.String())
|
||||||
|
buf.WriteString("\n")
|
||||||
|
buf.WriteString(separator.String())
|
||||||
|
buf.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func dir(prevK int) rune {
|
||||||
|
if prevK == 0 {
|
||||||
|
return 'M'
|
||||||
|
}
|
||||||
|
return 'H'
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Matcher) poorMatch() bool {
|
||||||
|
if len(m.pattern) < 2 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
i, j := m.lastCandidateLen, len(m.pattern)
|
||||||
|
k := m.bestK(i, j)
|
||||||
|
|
||||||
|
var counter, len int
|
||||||
|
for i > 0 {
|
||||||
|
take := (k == 1)
|
||||||
|
k = m.scores[i][j][k].prevK()
|
||||||
|
if take {
|
||||||
|
len++
|
||||||
|
if k == 0 && len < 3 && m.roles[i-1] == RTail {
|
||||||
|
// Short match in the middle of a word
|
||||||
|
counter++
|
||||||
|
if counter > 1 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
j--
|
||||||
|
} else {
|
||||||
|
len = 0
|
||||||
|
}
|
||||||
|
i--
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
|
@ -0,0 +1,352 @@
|
||||||
|
// Copyright 2019 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Benchmark results:
|
||||||
|
//
|
||||||
|
// BenchmarkMatcher-12 1000000 1615 ns/op 30.95 MB/s 0 B/op 0 allocs/op
|
||||||
|
//
|
||||||
|
package fuzzy_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"golang.org/x/tools/internal/lsp/fuzzy"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ExampleFuzzyMatcher() {
|
||||||
|
pattern := "TEdit"
|
||||||
|
candidates := []string{"fuzzy.TextEdit", "ArtEdit", "TED talks about IT"}
|
||||||
|
|
||||||
|
// Create a fuzzy matcher for the pattern.
|
||||||
|
matcher := fuzzy.NewMatcher(pattern, fuzzy.Text)
|
||||||
|
|
||||||
|
for _, candidate := range candidates {
|
||||||
|
// Compute candidate's score against the matcher.
|
||||||
|
score := matcher.Score(candidate)
|
||||||
|
|
||||||
|
if score > -1 {
|
||||||
|
// Get the substrings in the candidate matching the pattern.
|
||||||
|
ranges := matcher.MatchedRanges()
|
||||||
|
|
||||||
|
fmt.Println(ranges) // Do something with the ranges.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type comparator struct {
|
||||||
|
f func(val, ref float32) bool
|
||||||
|
descr string
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
eq = comparator{
|
||||||
|
f: func(val, ref float32) bool {
|
||||||
|
return val == ref
|
||||||
|
},
|
||||||
|
descr: "==",
|
||||||
|
}
|
||||||
|
ge = comparator{
|
||||||
|
f: func(val, ref float32) bool {
|
||||||
|
return val >= ref
|
||||||
|
},
|
||||||
|
descr: ">=",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func (c comparator) eval(val, ref float32) bool {
|
||||||
|
return c.f(val, ref)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c comparator) String() string {
|
||||||
|
return c.descr
|
||||||
|
}
|
||||||
|
|
||||||
|
type scoreTest struct {
|
||||||
|
candidate string
|
||||||
|
comparator
|
||||||
|
ref float32
|
||||||
|
}
|
||||||
|
|
||||||
|
var matcherTests = []struct {
|
||||||
|
pattern string
|
||||||
|
input fuzzy.Input
|
||||||
|
tests []scoreTest
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "",
|
||||||
|
input: fuzzy.Text,
|
||||||
|
tests: []scoreTest{
|
||||||
|
{"def", eq, 1},
|
||||||
|
{"Ab stuff c", eq, 1},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "abc",
|
||||||
|
input: fuzzy.Text,
|
||||||
|
tests: []scoreTest{
|
||||||
|
{"def", eq, -1},
|
||||||
|
{"abd", eq, -1},
|
||||||
|
{"abc", ge, 0},
|
||||||
|
{"Abc", ge, 0},
|
||||||
|
{"Ab stuff c", ge, 0},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "Abc",
|
||||||
|
input: fuzzy.Text,
|
||||||
|
tests: []scoreTest{
|
||||||
|
{"def", eq, -1},
|
||||||
|
{"abd", eq, -1},
|
||||||
|
{"abc", ge, 0},
|
||||||
|
{"Abc", ge, 0},
|
||||||
|
{"Ab stuff c", ge, 0},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "subs",
|
||||||
|
input: fuzzy.Filename,
|
||||||
|
tests: []scoreTest{
|
||||||
|
{"sub/seq", ge, 0},
|
||||||
|
{"sub/seq/end", eq, -1},
|
||||||
|
{"sub/seq/base", ge, 0},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "subs",
|
||||||
|
input: fuzzy.Filename,
|
||||||
|
tests: []scoreTest{
|
||||||
|
{"//sub/seq", ge, 0},
|
||||||
|
{"//sub/seq/end", eq, -1},
|
||||||
|
{"//sub/seq/base", ge, 0},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestScore(t *testing.T) {
|
||||||
|
for _, tc := range matcherTests {
|
||||||
|
m := fuzzy.NewMatcher(tc.pattern, tc.input)
|
||||||
|
for _, sct := range tc.tests {
|
||||||
|
score := m.Score(sct.candidate)
|
||||||
|
if !sct.comparator.eval(score, sct.ref) {
|
||||||
|
t.Errorf("not true that m.Score(%s)[=%v] %s %v", sct.candidate, score, sct.comparator, sct.ref)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type candidateCompTest struct {
|
||||||
|
c1 string
|
||||||
|
comparator comparator
|
||||||
|
c2 string
|
||||||
|
}
|
||||||
|
|
||||||
|
var compareCandidatesTestCases = []struct {
|
||||||
|
pattern string
|
||||||
|
input fuzzy.Input
|
||||||
|
orderedCandidates []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
pattern: "aa",
|
||||||
|
input: fuzzy.Filename,
|
||||||
|
orderedCandidates: []string{
|
||||||
|
"baab",
|
||||||
|
"bb_aa",
|
||||||
|
"a/a/a",
|
||||||
|
"aa_bb",
|
||||||
|
"aa_b",
|
||||||
|
"aabb",
|
||||||
|
"aab",
|
||||||
|
"b/aa",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pattern: "Foo",
|
||||||
|
input: fuzzy.Text,
|
||||||
|
orderedCandidates: []string{
|
||||||
|
"Barfoo",
|
||||||
|
"F_o_o",
|
||||||
|
"Faoo",
|
||||||
|
"F__oo",
|
||||||
|
"F_oo",
|
||||||
|
"FaoFooa",
|
||||||
|
"BarFoo",
|
||||||
|
"FooA",
|
||||||
|
"FooBar",
|
||||||
|
"Foo",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCompareCandidateScores(t *testing.T) {
|
||||||
|
for _, tc := range compareCandidatesTestCases {
|
||||||
|
m := fuzzy.NewMatcher(tc.pattern, tc.input)
|
||||||
|
|
||||||
|
var prevScore float32
|
||||||
|
prevCand := "MIN_SCORE"
|
||||||
|
for _, cand := range tc.orderedCandidates {
|
||||||
|
score := m.Score(cand)
|
||||||
|
if prevScore > score {
|
||||||
|
t.Errorf("%s[=%v] is scored lower than %s[=%v]", cand, score, prevCand, prevScore)
|
||||||
|
}
|
||||||
|
if score < -1 || score > 1 {
|
||||||
|
t.Errorf("%s score is %v; want value between [-1, 1]", cand, score)
|
||||||
|
}
|
||||||
|
prevScore = score
|
||||||
|
prevCand = cand
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var fuzzyMatcherTestCases = []struct {
|
||||||
|
p string
|
||||||
|
str string
|
||||||
|
want string
|
||||||
|
input fuzzy.Input
|
||||||
|
}{
|
||||||
|
// fuzzy.Filename
|
||||||
|
{p: "aa", str: "a_a/a_a", want: "[a]_a/[a]_a", input: fuzzy.Filename},
|
||||||
|
{p: "aaaa", str: "a_a/a_a", want: "[a]_[a]/[a]_[a]", input: fuzzy.Filename},
|
||||||
|
{p: "aaaa", str: "aaaa", want: "[aaaa]", input: fuzzy.Filename},
|
||||||
|
{p: "aaaa", str: "a_a/a_aaaa", want: "a_a/[a]_[aaa]a", input: fuzzy.Filename},
|
||||||
|
{p: "aaaa", str: "a_a/aaaaa", want: "a_a/[aaaa]a", input: fuzzy.Filename},
|
||||||
|
{p: "aaaa", str: "aabaaa", want: "[aa]b[aa]a", input: fuzzy.Filename},
|
||||||
|
{p: "aaaa", str: "a/baaa", want: "[a]/b[aaa]", input: fuzzy.Filename},
|
||||||
|
{p: "abcxz", str: "d/abc/abcd/oxz", want: "d/[abc]/abcd/o[xz]", input: fuzzy.Filename},
|
||||||
|
{p: "abcxz", str: "d/abcd/abc/oxz", want: "d/[abc]d/abc/o[xz]", input: fuzzy.Filename},
|
||||||
|
|
||||||
|
// fuzzy.Symbol
|
||||||
|
{p: "foo", str: "abc::foo", want: "abc::[foo]", input: fuzzy.Symbol},
|
||||||
|
{p: "foo", str: "foo.foo", want: "foo.[foo]", input: fuzzy.Symbol},
|
||||||
|
{p: "foo", str: "fo_oo.o_oo", want: "[fo]_oo.[o]_oo", input: fuzzy.Symbol},
|
||||||
|
{p: "foo", str: "fo_oo.fo_oo", want: "fo_oo.[fo]_[o]o", input: fuzzy.Symbol},
|
||||||
|
{p: "fo_o", str: "fo_oo.o_oo", want: "[f]o_oo.[o_o]o", input: fuzzy.Symbol},
|
||||||
|
{p: "fOO", str: "fo_oo.o_oo", want: "[f]o_oo.[o]_[o]o", input: fuzzy.Symbol},
|
||||||
|
{p: "tedit", str: "foo.TextEdit", want: "foo.[T]ext[Edit]", input: fuzzy.Symbol},
|
||||||
|
{p: "TEdit", str: "foo.TextEdit", want: "foo.[T]ext[Edit]", input: fuzzy.Symbol},
|
||||||
|
{p: "Tedit", str: "foo.TextEdit", want: "foo.[T]ext[Edit]", input: fuzzy.Symbol},
|
||||||
|
{p: "Tedit", str: "foo.Textedit", want: "foo.[Te]xte[dit]", input: fuzzy.Symbol},
|
||||||
|
{p: "TEdit", str: "foo.Textedit", want: "", input: fuzzy.Symbol},
|
||||||
|
{p: "te", str: "foo.Textedit", want: "foo.[Te]xtedit", input: fuzzy.Symbol},
|
||||||
|
{p: "ee", str: "foo.Textedit", want: "", input: fuzzy.Symbol}, // short middle of the word match
|
||||||
|
{p: "ex", str: "foo.Textedit", want: "foo.T[ex]tedit", input: fuzzy.Symbol},
|
||||||
|
{p: "exdi", str: "foo.Textedit", want: "", input: fuzzy.Symbol}, // short middle of the word match
|
||||||
|
{p: "exdit", str: "foo.Textedit", want: "", input: fuzzy.Symbol}, // short middle of the word match
|
||||||
|
{p: "extdit", str: "foo.Textedit", want: "foo.T[ext]e[dit]", input: fuzzy.Symbol},
|
||||||
|
{p: "e", str: "foo.Textedit", want: "foo.T[e]xtedit", input: fuzzy.Symbol},
|
||||||
|
{p: "E", str: "foo.Textedit", want: "foo.T[e]xtedit", input: fuzzy.Symbol},
|
||||||
|
{p: "ed", str: "foo.Textedit", want: "foo.Text[ed]it", input: fuzzy.Symbol},
|
||||||
|
{p: "edt", str: "foo.Textedit", want: "", input: fuzzy.Symbol}, // short middle of the word match
|
||||||
|
{p: "edit", str: "foo.Textedit", want: "foo.Text[edit]", input: fuzzy.Symbol},
|
||||||
|
{p: "edin", str: "foo.TexteditNum", want: "foo.Text[edi]t[N]um", input: fuzzy.Symbol},
|
||||||
|
{p: "n", str: "node.GoNodeMax", want: "node.Go[N]odeMax", input: fuzzy.Symbol},
|
||||||
|
{p: "N", str: "node.GoNodeMax", want: "node.Go[N]odeMax", input: fuzzy.Symbol},
|
||||||
|
{p: "completio", str: "completion", want: "[completio]n", input: fuzzy.Symbol},
|
||||||
|
{p: "completio", str: "completion.None", want: "[completi]on.N[o]ne", input: fuzzy.Symbol},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFuzzyMatcherRanges(t *testing.T) {
|
||||||
|
for _, tc := range fuzzyMatcherTestCases {
|
||||||
|
matcher := fuzzy.NewMatcher(tc.p, tc.input)
|
||||||
|
score := matcher.Score(tc.str)
|
||||||
|
if tc.want == "" {
|
||||||
|
if score >= 0 {
|
||||||
|
t.Errorf("Score(%s, %s) = %v; want: <= 0", tc.p, tc.str, score)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if score < 0 {
|
||||||
|
t.Errorf("Score(%s, %s) = %v, want: > 0", tc.p, tc.str, score)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
got := highlightMatches(tc.str, matcher)
|
||||||
|
if tc.want != got {
|
||||||
|
t.Errorf("highlightMatches(%s, %s) = %v, want: %v", tc.p, tc.str, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var scoreTestCases = []struct {
|
||||||
|
p string
|
||||||
|
str string
|
||||||
|
want float64
|
||||||
|
}{
|
||||||
|
// Score precision up to five digits. Modify if changing the score, but make sure the new values
|
||||||
|
// are reasonable.
|
||||||
|
{p: "abc", str: "abc", want: 1},
|
||||||
|
{p: "abc", str: "Abc", want: 1},
|
||||||
|
{p: "abc", str: "Abcdef", want: 1},
|
||||||
|
{p: "strc", str: "StrCat", want: 1},
|
||||||
|
{p: "abc_def", str: "abc_def_xyz", want: 1},
|
||||||
|
{p: "abcdef", str: "abc_def_xyz", want: 0.91667},
|
||||||
|
{p: "abcxyz", str: "abc_def_xyz", want: 0.875},
|
||||||
|
{p: "sc", str: "StrCat", want: 0.75},
|
||||||
|
{p: "abc", str: "AbstrBasicCtor", want: 0.75},
|
||||||
|
{p: "foo", str: "abc::foo", want: 1},
|
||||||
|
{p: "afoo", str: "abc::foo", want: 0.9375},
|
||||||
|
{p: "abr", str: "abc::bar", want: 0.5},
|
||||||
|
{p: "br", str: "abc::bar", want: 0.375},
|
||||||
|
{p: "aar", str: "abc::bar", want: 0.16667},
|
||||||
|
{p: "edin", str: "foo.TexteditNum", want: 0},
|
||||||
|
{p: "ediu", str: "foo.TexteditNum", want: 0},
|
||||||
|
// We want the next two items to have roughly similar scores.
|
||||||
|
{p: "up", str: "unique_ptr", want: 0.75},
|
||||||
|
{p: "up", str: "upper_bound", want: 1},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestScores(t *testing.T) {
|
||||||
|
for _, tc := range scoreTestCases {
|
||||||
|
matcher := fuzzy.NewMatcher(tc.p, fuzzy.Symbol)
|
||||||
|
got := math.Round(float64(matcher.Score(tc.str))*1e5) / 1e5
|
||||||
|
if got != tc.want {
|
||||||
|
t.Errorf("Score(%s, %s) = %v, want: %v", tc.p, tc.str, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func highlightMatches(str string, matcher *fuzzy.Matcher) string {
|
||||||
|
matches := matcher.MatchedRanges()
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
index := 0
|
||||||
|
for i := 0; i < len(matches)-1; i += 2 {
|
||||||
|
s, e := matches[i], matches[i+1]
|
||||||
|
fmt.Fprintf(&buf, "%s[%s]", str[index:s], str[s:e])
|
||||||
|
index = e
|
||||||
|
}
|
||||||
|
buf.WriteString(str[index:])
|
||||||
|
return buf.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkMatcher(b *testing.B) {
|
||||||
|
pattern := "Foo"
|
||||||
|
candidates := []string{
|
||||||
|
"F_o_o",
|
||||||
|
"Barfoo",
|
||||||
|
"Faoo",
|
||||||
|
"F__oo",
|
||||||
|
"F_oo",
|
||||||
|
"FaoFooa",
|
||||||
|
"BarFoo",
|
||||||
|
"FooA",
|
||||||
|
"FooBar",
|
||||||
|
"Foo",
|
||||||
|
}
|
||||||
|
|
||||||
|
matcher := fuzzy.NewMatcher(pattern, fuzzy.Text)
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
for _, c := range candidates {
|
||||||
|
matcher.Score(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var numBytes int
|
||||||
|
for _, c := range candidates {
|
||||||
|
numBytes += len(c)
|
||||||
|
}
|
||||||
|
b.SetBytes(int64(numBytes))
|
||||||
|
}
|
|
@ -12,6 +12,7 @@ import (
|
||||||
"go/types"
|
"go/types"
|
||||||
|
|
||||||
"golang.org/x/tools/go/ast/astutil"
|
"golang.org/x/tools/go/ast/astutil"
|
||||||
|
"golang.org/x/tools/internal/lsp/fuzzy"
|
||||||
"golang.org/x/tools/internal/lsp/snippet"
|
"golang.org/x/tools/internal/lsp/snippet"
|
||||||
"golang.org/x/tools/internal/lsp/telemetry/trace"
|
"golang.org/x/tools/internal/lsp/telemetry/trace"
|
||||||
"golang.org/x/tools/internal/span"
|
"golang.org/x/tools/internal/span"
|
||||||
|
@ -149,6 +150,9 @@ type completer struct {
|
||||||
|
|
||||||
// deepState contains the current state of our deep completion search.
|
// deepState contains the current state of our deep completion search.
|
||||||
deepState deepCompletionState
|
deepState deepCompletionState
|
||||||
|
|
||||||
|
// matcher does fuzzy matching of the candidates for the surrounding prefix.
|
||||||
|
matcher *fuzzy.Matcher
|
||||||
}
|
}
|
||||||
|
|
||||||
type compLitInfo struct {
|
type compLitInfo struct {
|
||||||
|
@ -187,16 +191,17 @@ func (c *completer) setSurrounding(ident *ast.Ident) {
|
||||||
if c.surrounding != nil {
|
if c.surrounding != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if !(ident.Pos() <= c.pos && c.pos <= ident.End()) {
|
if !(ident.Pos() <= c.pos && c.pos <= ident.End()) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
c.surrounding = &Selection{
|
c.surrounding = &Selection{
|
||||||
Content: ident.Name,
|
Content: ident.Name,
|
||||||
Range: span.NewRange(c.view.Session().Cache().FileSet(), ident.Pos(), ident.End()),
|
Range: span.NewRange(c.view.Session().Cache().FileSet(), ident.Pos(), ident.End()),
|
||||||
Cursor: c.pos,
|
Cursor: c.pos,
|
||||||
}
|
}
|
||||||
|
if c.surrounding.Prefix() != "" {
|
||||||
|
c.matcher = fuzzy.NewMatcher(c.surrounding.Prefix(), fuzzy.Symbol)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// found adds a candidate completion. We will also search through the object's
|
// found adds a candidate completion. We will also search through the object's
|
||||||
|
|
Loading…
Reference in New Issue