Compare commits

..

10 Commits

Author SHA1 Message Date
Gea-Suan Lin
4a1e2b9c5e Fix test function naming and the actual test. 2024-02-19 10:42:23 +08:00
Gea-Suan Lin
14e54f2393 Fix test function naming. 2024-02-19 10:41:38 +08:00
Gea-Suan Lin
2d1c6f161a Check arguments. 2024-02-18 22:00:43 +08:00
Gea-Suan Lin
64a2507631 Add link so that I can validate quickly. 2024-02-18 21:45:29 +08:00
Gea-Suan Lin
de47a7fab3 Import my own blog. 2024-02-18 05:06:29 +08:00
Gea-Suan Lin
3dcd171227 Rename. 2024-02-16 21:02:40 +08:00
Gea-Suan Lin
1de46569e8 Add a simple test case for tokenizer. 2024-02-16 20:59:15 +08:00
Gea-Suan Lin
57c153a6c3 Add more test about bigram. 2024-02-16 20:55:23 +08:00
Gea-Suan Lin
55ad14e790 Add test cases for bigram. 2024-02-16 20:54:28 +08:00
Gea-Suan Lin
2b1e514431 Add "make test". 2024-02-16 20:52:49 +08:00
6 changed files with 37925 additions and 13 deletions

View File

@@ -1,6 +1,6 @@
# #
.DEFAULT: all .DEFAULT: all
.PHONY: all clean .PHONY: all clean test
# #
DIST?= dist/ DIST?= dist/
@@ -18,3 +18,6 @@ dist/ir-tfidf:: cmd/ir-tfidf/* internal/**
clean:: clean::
rm -rf "${DIST}" rm -rf "${DIST}"
test::
go test ./...

View File

@@ -11,6 +11,11 @@ import (
) )
func main() { func main() {
if len(os.Args) < 2 {
fmt.Printf("You need to specify a keyword to search.\n")
os.Exit(1)
}
articles := artifact.Read("data/articles.json") articles := artifact.Read("data/articles.json")
tokens := make(map[string][]string) tokens := make(map[string][]string)
@@ -49,7 +54,7 @@ func main() {
} }
if score > 0 { if score > 0 {
fmt.Printf("Article %v: %v\n", article.Id, score) fmt.Printf("Article %v (https://blog.gslin.org/?p=%v): %v\n", article.Id, article.Id, score)
} }
} }
} }

File diff suppressed because one or more lines are too long

View File

@@ -7,6 +7,21 @@ import (
"github.com/gslin/go-ir-playground/internal/ngram" "github.com/gslin/go-ir-playground/internal/ngram"
) )
func TestBigram(t *testing.T) {
a := ngram.Bigram("test")
assert.Equal(t, len(a), 0)
a = ngram.Bigram("測試")
assert.Equal(t, len(a), 1)
assert.Equal(t, a[0], "測試")
a = ngram.Bigram("中文測試")
assert.Equal(t, len(a), 3)
assert.Equal(t, a[0], "中文")
assert.Equal(t, a[1], "文測")
assert.Equal(t, a[2], "測試")
}
func TestUnigram(t *testing.T) { func TestUnigram(t *testing.T) {
a := ngram.Unigram("test") a := ngram.Unigram("test")
assert.Equal(t, len(a), 1) assert.Equal(t, len(a), 1)

View File

@@ -0,0 +1,14 @@
package tokenizer_test
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/gslin/go-ir-playground/internal/tokenizer"
)
func TestTokenize(t *testing.T) {
a := tokenizer.Tokenize("test")
assert.Equal(t, len(a), 1)
assert.Equal(t, a[0], "test")
}