From 36c17a10b56513c36302374415574ce0cdd8e643 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Sat, 3 Dec 2022 10:54:06 +0800 Subject: [PATCH] fs.File -> io.Reader --- README.md | 2 +- analyse/idf.go | 4 ++-- analyse/stopwords.go | 4 ++-- analyse/tag_extracker.go | 8 ++++---- analyse/textrank.go | 4 ++-- dictionary.go | 4 ++-- dictionary/dictionary.go | 6 +++--- jieba.go | 6 +++--- posseg/dictionary.go | 4 ++-- posseg/posseg.go | 6 +++--- tokenizers/tokenizer.go | 8 ++++---- 11 files changed, 28 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 75e7f0a..b1ca54e 100755 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![GoDoc](https://godoc.org/github.com/fumiama/jieba?status.svg)](https://godoc.org/github.com/fumiama/jieba) -[结巴分词](https://github.com/fxsjy/jieba) 是由 [@fxsjy](https://github.com/fxsjy) 使用 Python 编写的中文分词组件,本仓库是结巴分词的 Golang 语言实现,修改于[jiebago](https://github.com/wangbin/jiebago),大幅优化了速度与性能,增加了从`fs.File`加载字典等功能。 +[结巴分词](https://github.com/fxsjy/jieba) 是由 [@fxsjy](https://github.com/fxsjy) 使用 Python 编写的中文分词组件,本仓库是结巴分词的 Golang 语言实现,修改于[jiebago](https://github.com/wangbin/jiebago),大幅优化了速度与性能,增加了从`io.Reader`加载字典等功能。 ## 使用 diff --git a/analyse/idf.go b/analyse/idf.go index 4f55a45..987ef05 100755 --- a/analyse/idf.go +++ b/analyse/idf.go @@ -1,7 +1,7 @@ package analyse import ( - "io/fs" + "io" "sort" "sync" @@ -39,7 +39,7 @@ func (i *Idf) Load(tokens ...dictionary.Token) { i.Unlock() } -func (i *Idf) loadDictionary(file fs.File) error { +func (i *Idf) loadDictionary(file io.Reader) error { return dictionary.LoadDictionary(i, file) } diff --git a/analyse/stopwords.go b/analyse/stopwords.go index 5a39391..dc2382e 100755 --- a/analyse/stopwords.go +++ b/analyse/stopwords.go @@ -1,7 +1,7 @@ package analyse import ( - "io/fs" + "io" "sync" "github.com/fumiama/jieba/dictionary" @@ -83,7 +83,7 @@ func (s *StopWord) Load(tokens ...dictionary.Token) { s.Unlock() } -func (s *StopWord) loadDictionary(file fs.File) error { +func (s *StopWord) loadDictionary(file io.Reader) error { return dictionary.LoadDictionary(s, file) } diff --git a/analyse/tag_extracker.go b/analyse/tag_extracker.go index be71f1b..30d9a97 100755 --- a/analyse/tag_extracker.go +++ b/analyse/tag_extracker.go @@ -2,7 +2,7 @@ package analyse import ( - "io/fs" + "io" "sort" "strings" "unicode/utf8" @@ -53,7 +53,7 @@ type TagExtracter struct { } // LoadDictionary reads the given filename and create a new dictionary. -func (t *TagExtracter) LoadDictionary(file fs.File) (err error) { +func (t *TagExtracter) LoadDictionary(file io.Reader) (err error) { t.stopWord = NewStopWord() t.seg, err = jieba.LoadDictionary(file) return @@ -67,7 +67,7 @@ func (t *TagExtracter) LoadDictionaryAt(file string) (err error) { } // LoadIdf reads the given file and create a new Idf dictionary. -func (t *TagExtracter) LoadIdf(file fs.File) error { +func (t *TagExtracter) LoadIdf(file io.Reader) error { t.idf = NewIdf() return t.idf.loadDictionary(file) } @@ -79,7 +79,7 @@ func (t *TagExtracter) LoadIdfAt(fileName string) error { } // LoadStopWords reads the given file and create a new StopWord dictionary. -func (t *TagExtracter) LoadStopWords(file fs.File) error { +func (t *TagExtracter) LoadStopWords(file io.Reader) error { t.stopWord = NewStopWord() return t.stopWord.loadDictionary(file) } diff --git a/analyse/textrank.go b/analyse/textrank.go index 4b2007e..e07d140 100755 --- a/analyse/textrank.go +++ b/analyse/textrank.go @@ -2,7 +2,7 @@ package analyse import ( "hash/crc64" - "io/fs" + "io" "math" "sort" @@ -171,7 +171,7 @@ func (t *TextRanker) TextRank(sentence string, topK int) Segments { type TextRanker posseg.Segmenter // NewTextRanker reads a given file and create a new dictionary file for Textranker. -func NewTextRanker(file fs.File) (*TextRanker, error) { +func NewTextRanker(file io.Reader) (*TextRanker, error) { seg, err := posseg.LoadDictionary(file) return (*TextRanker)(seg), err } diff --git a/dictionary.go b/dictionary.go index d38eaaa..cb1f72b 100755 --- a/dictionary.go +++ b/dictionary.go @@ -1,7 +1,7 @@ package jieba import ( - "io/fs" + "io" "math" "sync" @@ -58,7 +58,7 @@ func (d *Dictionary) Frequency(key string) (float64, bool) { return freq, ok } -func (d *Dictionary) loadDictionary(file fs.File) error { +func (d *Dictionary) loadDictionary(file io.Reader) error { return dictionary.LoadDictionary(d, file) } diff --git a/dictionary/dictionary.go b/dictionary/dictionary.go index 7514f34..f630b05 100755 --- a/dictionary/dictionary.go +++ b/dictionary/dictionary.go @@ -4,7 +4,7 @@ package dictionary import ( "bufio" - "io/fs" + "io" "os" "strconv" "strings" @@ -16,7 +16,7 @@ type DictLoader interface { AddToken(Token) } -func loadDictionary(file fs.File) (tokens []Token, err error) { +func loadDictionary(file io.Reader) (tokens []Token, err error) { scanner := bufio.NewScanner(file) var token Token var line string @@ -44,7 +44,7 @@ func loadDictionary(file fs.File) (tokens []Token, err error) { } // LoadDictionary reads the given file and passes all tokens to a DictLoader. -func LoadDictionary(dl DictLoader, file fs.File) error { +func LoadDictionary(dl DictLoader, file io.Reader) error { tokens, err := loadDictionary(file) if err != nil { return err diff --git a/jieba.go b/jieba.go index 669f3c6..55bd1eb 100755 --- a/jieba.go +++ b/jieba.go @@ -2,7 +2,7 @@ package jieba import ( - "io/fs" + "io" "math" "regexp" "strings" @@ -92,7 +92,7 @@ func (seg *Segmenter) SuggestFrequency(words ...string) float64 { // LoadDictionary loads dictionary from given file name. Everytime // LoadDictionary is called, previously loaded dictionary will be cleard. -func LoadDictionary(file fs.File) (*Segmenter, error) { +func LoadDictionary(file io.Reader) (*Segmenter, error) { d := &Dictionary{freqMap: make(map[string]float64)} err := d.loadDictionary(file) return (*Segmenter)(d), err @@ -109,7 +109,7 @@ func LoadDictionaryAt(file string) (*Segmenter, error) { // LoadUserDictionary loads a user specified dictionary, it must be called // after LoadDictionary, and it will not clear any previous loaded dictionary, // instead it will override exist entries. -func (seg *Segmenter) LoadUserDictionary(file fs.File) error { +func (seg *Segmenter) LoadUserDictionary(file io.Reader) error { return (*Dictionary)(seg).loadDictionary(file) } diff --git a/posseg/dictionary.go b/posseg/dictionary.go index a2cb3a6..eb43c72 100755 --- a/posseg/dictionary.go +++ b/posseg/dictionary.go @@ -1,7 +1,7 @@ package posseg import ( - "io/fs" + "io" "math" "sync" @@ -70,7 +70,7 @@ func (d *Dictionary) Pos(key string) (string, bool) { return pos, ok } -func (d *Dictionary) loadDictionary(file fs.File) error { +func (d *Dictionary) loadDictionary(file io.Reader) error { return dictionary.LoadDictionary(d, file) } diff --git a/posseg/posseg.go b/posseg/posseg.go index 67c3423..bbdb6ad 100755 --- a/posseg/posseg.go +++ b/posseg/posseg.go @@ -2,7 +2,7 @@ package posseg import ( - "io/fs" + "io" "math" "regexp" @@ -39,7 +39,7 @@ type Segmenter Dictionary // LoadDictionary loads dictionary from given file name. // Everytime LoadDictionaryAt is called, previously loaded dictionary will be cleard. -func LoadDictionary(file fs.File) (*Segmenter, error) { +func LoadDictionary(file io.Reader) (*Segmenter, error) { dict := &Dictionary{freqMap: make(map[string]float64), posMap: make(map[string]string)} err := dict.loadDictionary(file) if err != nil { @@ -62,7 +62,7 @@ func LoadDictionaryAt(file string) (*Segmenter, error) { // LoadUserDictionary loads a user specified dictionary, it must be called // after LoadDictionary, and it will not clear any previous loaded dictionary, // instead it will override exist entries. -func (seg *Segmenter) LoadUserDictionary(file fs.File) error { +func (seg *Segmenter) LoadUserDictionary(file io.Reader) error { return (*Dictionary)(seg).loadDictionary(file) } diff --git a/tokenizers/tokenizer.go b/tokenizers/tokenizer.go index 9ae5056..a5bc0c6 100755 --- a/tokenizers/tokenizer.go +++ b/tokenizers/tokenizer.go @@ -1,7 +1,7 @@ package tokenizers import ( - "io/fs" + "io" "regexp" "strconv" @@ -42,7 +42,7 @@ Parameters: "交换机" as a single word. If searchMode is true, it will further split this word into "交换", "换机", which are valid Chinese words. */ -func NewJiebaTokenizer(dictFile fs.File, hmm, searchMode bool) (analysis.Tokenizer, error) { +func NewJiebaTokenizer(dictFile io.Reader, hmm, searchMode bool) (analysis.Tokenizer, error) { seg, err := jieba.LoadDictionary(dictFile) return &JiebaTokenizer{ seg: seg, @@ -131,7 +131,7 @@ JiebaTokenizerConstructor creates a JiebaTokenizer. Parameter config should contains at least one parameter: - file: the path of the dictionary file or fs.File. + file: the path of the dictionary file or io.Reader. hmm: optional, specify whether to use Hidden Markov Model, see NewJiebaTokenizer for details. @@ -150,7 +150,7 @@ func JiebaTokenizerConstructor(config map[string]interface{}, cache *registry.Ca if ok { return NewJiebaTokenizerAt(dictFilePath, hmm, searchMode) } - dictFile := config["file"].(fs.File) + dictFile := config["file"].(io.Reader) return NewJiebaTokenizer(dictFile, hmm, searchMode) }