Implement TF & DF in tf-idf.

This commit is contained in:
Gea-Suan Lin
2024-02-09 14:20:08 +08:00
parent 18fbfa7292
commit ade2049093

View File

@@ -2,6 +2,7 @@ package main
import (
"fmt"
"strings"
"github.com/gslin/go-ir-playground/internal/artifact"
"github.com/gslin/go-ir-playground/internal/tokenizer"
@@ -10,11 +11,27 @@ import (
func main() {
articles := artifact.Read("data/articles.json")
for _, article := range articles {
title_bag := tokenizer.Tokenize(article.Title)
body_bag := tokenizer.Tokenize(article.Body)
tokens := make(map[string][]string)
tf := make(map[string]map[string]int)
df := make(map[string]int)
fmt.Printf("title_bag = %v\n", title_bag)
fmt.Printf("body_bag = %v\n", body_bag)
for _, article := range articles {
str := article.Title + "\n" + article.Body
bag := tokenizer.Tokenize(str)
tokens[article.Id] = bag
// Init TF:
tf[article.Id] = make(map[string]int)
for _, w := range bag {
// Handle TF:
tf[article.Id][w] = strings.Count(str, w)
// Handle DF:
df[w] += 1
}
}
fmt.Println("TF & DF Built")
}