Files
Gea-Suan Lin 3dcd171227 Rename.
2024-02-16 21:02:40 +08:00

15 lines
253 B
Go

package tokenizer
import (
"slices"
"github.com/gslin/go-ir-playground/internal/ngram"
)
func Tokenize(s string) []string {
bag := append(ngram.Unigram(s), ngram.Bigram(s)...)
slices.Sort(bag)
slices.Compact[[]string, string](bag)
return bag
}