[Python-checkins] r66576 - doctools/trunk/sphinx/search.py

georg.brandl python-checkins at python.org
Wed Sep 24 11:06:32 CEST 2008


Author: georg.brandl
Date: Wed Sep 24 11:06:31 2008
New Revision: 66576
Log:
Add stopword list.
Modified:
 doctools/trunk/sphinx/search.py
Modified: doctools/trunk/sphinx/search.py
==============================================================================
--- doctools/trunk/sphinx/search.py	(original)
+++ doctools/trunk/sphinx/search.py	Wed Sep 24 11:06:31 2008
@@ -20,6 +20,17 @@
 
 word_re = re.compile(r'\w+(?u)')
 
+stopwords = set("""
+a and are as at
+be but by
+for
+if in into is it
+near no not
+of on or
+such
+that the their then there these they this to
+was will with
+""".split())
 
 class _JavaScriptIndex(object):
 """
@@ -165,8 +176,10 @@
 visitor = WordCollector(doctree)
 doctree.walk(visitor)
 
- def add_term(word, prefix=''):
- word = self._stemmer.stem(word)
+ def add_term(word, prefix='', stem=self._stemmer.stem):
+ word = stem(word)
+ if len(word) < 3 or word in stopwords or word.isdigit():
+ return
 self._mapping.setdefault(prefix + word, set()).add(filename)
 
 for word in word_re.findall(title):


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /