Implement tokenize().
This commit is contained in:
14
internal/tokenize/tokenize.go
Normal file
14
internal/tokenize/tokenize.go
Normal file
@@ -0,0 +1,14 @@
|
||||
package tokenize
|
||||
|
||||
import (
|
||||
"slices"
|
||||
|
||||
"github.com/gslin/go-ir-playground/internal/ngram"
|
||||
)
|
||||
|
||||
func Tokenize(s string) []string {
|
||||
bag := append(ngram.Unigram(s), ngram.Bigram(s)...)
|
||||
slices.Sort(bag)
|
||||
slices.Compact[[]string, string](bag)
|
||||
return bag
|
||||
}
|
||||
Reference in New Issue
Block a user