Implement TF & DF in tf-idf.

This commit is contained in:
Gea-Suan Lin
2024-02-09 14:20:08 +08:00
parent 18fbfa7292
commit ade2049093

View File

@@ -2,6 +2,7 @@ package main
import ( import (
"fmt" "fmt"
"strings"
"github.com/gslin/go-ir-playground/internal/artifact" "github.com/gslin/go-ir-playground/internal/artifact"
"github.com/gslin/go-ir-playground/internal/tokenizer" "github.com/gslin/go-ir-playground/internal/tokenizer"
@@ -10,11 +11,27 @@ import (
func main() { func main() {
articles := artifact.Read("data/articles.json") articles := artifact.Read("data/articles.json")
for _, article := range articles { tokens := make(map[string][]string)
title_bag := tokenizer.Tokenize(article.Title) tf := make(map[string]map[string]int)
body_bag := tokenizer.Tokenize(article.Body) df := make(map[string]int)
fmt.Printf("title_bag = %v\n", title_bag) for _, article := range articles {
fmt.Printf("body_bag = %v\n", body_bag) str := article.Title + "\n" + article.Body
bag := tokenizer.Tokenize(str)
tokens[article.Id] = bag
// Init TF:
tf[article.Id] = make(map[string]int)
for _, w := range bag {
// Handle TF:
tf[article.Id][w] = strings.Count(str, w)
// Handle DF:
df[w] += 1
} }
} }
fmt.Println("TF & DF Built")
}