[Python-checkins] r66576 - doctools/trunk/sphinx/search.py
georg.brandl
python-checkins at python.org
Wed Sep 24 11:06:32 CEST 2008
Author: georg.brandl
Date: Wed Sep 24 11:06:31 2008
New Revision: 66576
Log:
Add stopword list.
Modified:
doctools/trunk/sphinx/search.py
Modified: doctools/trunk/sphinx/search.py
==============================================================================
--- doctools/trunk/sphinx/search.py (original)
+++ doctools/trunk/sphinx/search.py Wed Sep 24 11:06:31 2008
@@ -20,6 +20,17 @@
word_re = re.compile(r'\w+(?u)')
+stopwords = set("""
+a and are as at
+be but by
+for
+if in into is it
+near no not
+of on or
+such
+that the their then there these they this to
+was will with
+""".split())
class _JavaScriptIndex(object):
"""
@@ -165,8 +176,10 @@
visitor = WordCollector(doctree)
doctree.walk(visitor)
- def add_term(word, prefix=''):
- word = self._stemmer.stem(word)
+ def add_term(word, prefix='', stem=self._stemmer.stem):
+ word = stem(word)
+ if len(word) < 3 or word in stopwords or word.isdigit():
+ return
self._mapping.setdefault(prefix + word, set()).add(filename)
for word in word_re.findall(title):
More information about the Python-checkins
mailing list