Method processTokens

tok/tok.go:447–464 · view source on GitHub ↗

processTokens handles the common preprocessing steps for both QueryTokens and Tokens

(v interface{})

Source from the content-addressed store, hash-verified

445
446	// processTokens handles the common preprocessing steps for both QueryTokens and Tokens
447	func (t NGramTokenizer) processTokens(v interface{}) (analysis.TokenStream, error) {
448	str, ok := v.(string)
449	if !ok \|\| str == "" {
450	return nil, nil
451	}
452	lang := LangBase(t.lang)
453
454	// Step 1: Lowercase, normalize, basic tokenization
455	tokens := fulltextAnalyzer.Analyze([]byte(str))
456
457	// Step 2: Remove stopwords
458	tokens = filterStopwords(lang, tokens)
459
460	// Step 3: Apply stemming
461	tokens = filterStemmers(lang, tokens)
462
463	return tokens, nil
464	}
465
466	// generateNGrams creates n-grams from tokens and returns deduplicated results
467	func (t NGramTokenizer) generateNGrams(tokens analysis.TokenStream, ngramFunc func(analysis.TokenStream, func(string))) []string {

QueryTokensMethod · 0.95

TokensMethod · 0.95

LangBaseFunction · 0.85

filterStopwordsFunction · 0.85

filterStemmersFunction · 0.85

no test coverage detected