From 9e455bb15acfb48a77f76a78fe310226a98653e6 Mon Sep 17 00:00:00 2001 From: Gea-Suan Lin Date: Wed, 31 Jan 2024 09:43:04 +0800 Subject: [PATCH] Implement gram-related functions. --- internal/ngram/ngram.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 internal/ngram/ngram.go diff --git a/internal/ngram/ngram.go b/internal/ngram/ngram.go new file mode 100644 index 0000000..21ce13e --- /dev/null +++ b/internal/ngram/ngram.go @@ -0,0 +1,31 @@ +package ngram + +import ( + "regexp" +) + +var re1, re2 *regexp.Regexp + +func init() { + re1 = regexp.MustCompile("\\PL+") + re2 = regexp.MustCompile("") +} + +func Bigram(s string) []string { + bag := split(s) + + r := make([]string, 0) + for i := 0; i < len(bag) - 1; i++ { + r = append(r, bag[i] + bag[i + 1]) + } + + return r +} + +func split(s string) []string { + bag := make([]string, 0) + for _, w := range re1.Split(s, -1) { + bag = append(bag, re2.Split(w, -1)...) + } + return bag +}