Implement TF & DF in tf-idf.
This commit is contained in:
@@ -2,6 +2,7 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/gslin/go-ir-playground/internal/artifact"
|
"github.com/gslin/go-ir-playground/internal/artifact"
|
||||||
"github.com/gslin/go-ir-playground/internal/tokenizer"
|
"github.com/gslin/go-ir-playground/internal/tokenizer"
|
||||||
@@ -10,11 +11,27 @@ import (
|
|||||||
func main() {
|
func main() {
|
||||||
articles := artifact.Read("data/articles.json")
|
articles := artifact.Read("data/articles.json")
|
||||||
|
|
||||||
for _, article := range articles {
|
tokens := make(map[string][]string)
|
||||||
title_bag := tokenizer.Tokenize(article.Title)
|
tf := make(map[string]map[string]int)
|
||||||
body_bag := tokenizer.Tokenize(article.Body)
|
df := make(map[string]int)
|
||||||
|
|
||||||
fmt.Printf("title_bag = %v\n", title_bag)
|
for _, article := range articles {
|
||||||
fmt.Printf("body_bag = %v\n", body_bag)
|
str := article.Title + "\n" + article.Body
|
||||||
|
|
||||||
|
bag := tokenizer.Tokenize(str)
|
||||||
|
tokens[article.Id] = bag
|
||||||
|
|
||||||
|
// Init TF:
|
||||||
|
tf[article.Id] = make(map[string]int)
|
||||||
|
|
||||||
|
for _, w := range bag {
|
||||||
|
// Handle TF:
|
||||||
|
tf[article.Id][w] = strings.Count(str, w)
|
||||||
|
|
||||||
|
// Handle DF:
|
||||||
|
df[w] += 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fmt.Println("TF & DF Built")
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user