Implement tokenize().

This commit is contained in:
Gea-Suan Lin
2024-02-09 11:46:19 +08:00
parent b133273065
commit ce79d2b245
3 changed files with 21 additions and 3 deletions

View File

@@ -0,0 +1,14 @@
package tokenize
import (
"slices"
"github.com/gslin/go-ir-playground/internal/ngram"
)
func Tokenize(s string) []string {
bag := append(ngram.Unigram(s), ngram.Bigram(s)...)
slices.Sort(bag)
slices.Compact[[]string, string](bag)
return bag
}