Changeset 670
- Timestamp:
- 04/19/06 02:44:46 (3 years ago)
- Files:
-
- reposearchplugin/0.9/setup.py (modified) (1 diff)
- reposearchplugin/0.9/tracreposearch/indexer.py (modified) (7 diffs)
- reposearchplugin/0.9/tracreposearch/search.py (modified) (5 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
reposearchplugin/0.9/setup.py
r363 r670 4 4 VERSION = '0.1' 5 5 6 setup(name=PACKAGE, version=VERSION, packages=['tracreposearch']) 6 setup(name='tracreposearch', 7 version='0.2', 8 packages=['tracreposearch'], 9 author='Alec Thomas', 10 mail='alec@swapoff.org', 11 url="http://trac-hacks.org/wiki/TracRepoSearch", 12 license='BSD') reposearchplugin/0.9/tracreposearch/indexer.py
r509 r670 12 12 from tracreposearch.lock import lock, unlock, LOCK_EX 13 13 from trac.versioncontrol.api import Node 14 from trac.mimeview.api import Mimeview 15 import time 14 16 import anydbm 15 17 import re … … 26 28 27 29 def __init__(self, file, mode): 30 self._cache = {} 28 31 self.dbm = anydbm.open(file, mode) 29 32 30 33 def __contains__(self, key): 31 return str(key) in self.dbm 34 key = key.encode('utf-8') 35 return key in self._cache or key in self.dbm 32 36 33 37 def __getitem__(self, key): 34 return set(self.dbm[str(key)].split(pathsep)) 38 key = key.encode('utf-8') 39 if key in self._cache: 40 return self._cache[key] 41 return self._cache.setdefault(key, set(self.dbm[key].split(pathsep))) 35 42 36 43 def __setitem__(self, key, value): 37 self.dbm[str(key)] = pathsep.join(value) 44 key = key.encode('utf-8') 45 value = pathsep.join(value) 46 self._cache[key] = value 38 47 39 48 def __delitem__(self, key): 40 del self.dbm[str(key)] 49 key = key.encode('utf-8') 50 try: 51 del self._cache[key] 52 except KeyError: 53 pass 54 del self.dbm[key] 41 55 42 56 def keys(self): 43 return self.dbm.keys()57 return [k.decode('utf-8') for k in self.dbm.keys()] 44 58 45 59 def sync(self): 60 for key, value in self._cache.iteritems(): 61 self.dbm[key] = value 46 62 self.dbm.sync() 63 64 def __del__(self): 65 self.sync() 47 66 48 67 index_lock = None … … 66 85 def synchronized(f): 67 86 """ Synchronization decorator. """ 68 69 87 def wrap(*args, **kw): 70 88 acquire_lock() … … 123 141 124 142 def sync(self): 125 self.meta['last-repo-rev'] = str(self.repo.youngest_rev)143 self.meta['last-repo-rev'] = unicode(self.repo.youngest_rev) 126 144 self.meta['index-include'] = self.env.config.get('repo-search', 'include', '') 127 145 self.meta['index-exclude'] = self.env.config.get('repo-search', 'exclude', '') … … 160 178 161 179 def _reindex_node(self, node): 180 to_unicode = Mimeview(self.env).to_unicode 181 162 182 def node_tokens(): 163 for token in self._strip.finditer(node.get_content().read()): 183 content = to_unicode(node.get_content().read(), node.get_content_type()) 184 for token in self._strip.finditer(content): 164 185 yield token.group().lower() 165 186 for token in self._strip.finditer(node.path): … … 187 208 self.words[word] = [node.path] 188 209 node_words.add(word) 189 self.files[str(node.path)] = node_words 190 self.revs[str(node.path)] = str(node.rev) 210 self.files[node.path] = node_words 211 self.revs[node.path.encode('utf-8')] = unicode(node.rev) 212 _reindex_node = _reindex_node 191 213 192 214 def _invalidate_file(self, file): … … 200 222 def reindex(self): 201 223 """ Reindex the repository if necessary. """ 202 if self.need_reindex(): 203 self.env.log.debug('Indexing repository (either repository or indexing criteria have changed)') 204 self._open_storage('c') 205 new_files = set() 206 for node in TracRepoSearchPlugin(self.env).walk_repo(self.repo): 207 if node.kind != Node.DIRECTORY: 208 # Node has changed? 209 if int(self.revs.get(str(node.path), -1)) != node.rev: 210 self.env.log.debug("Reindexing %s" % node.path) 211 self._invalidate_file(node.path) 212 self._reindex_node(node) 213 new_files.add(node.path) 214 215 # All files that don't match the new filter criteria must be purged 216 # from the index 217 invalidated_files = set(self.files.keys()) 218 invalidated_files.difference_update(new_files) 219 for invalid in invalidated_files: 220 self._invalidate_file(invalid) 221 222 self.sync() 223 self._open_storage('r') 224 self.env.log.debug('Index finished') 224 if not self.need_reindex(): 225 return 226 start = time.time() 227 self.env.log.debug('Indexing repository (either repository or indexing criteria have changed)') 228 self._open_storage('c') 229 new_files = set() 230 for node in TracRepoSearchPlugin(self.env).walk_repo(self.repo): 231 if node.kind != Node.DIRECTORY: 232 # Node has changed? 233 if int(self.revs.get(node.path.encode('utf-8'), -1)) != node.rev: 234 self.env.log.debug("Reindexing %s" % node.path) 235 self._invalidate_file(node.path) 236 self._reindex_node(node) 237 new_files.add(node.path) 238 239 # All files that don't match the new filter criteria must be purged 240 # from the index 241 invalidated_files = set(self.files.keys()) 242 invalidated_files.difference_update(new_files) 243 for invalid in invalidated_files: 244 self._invalidate_file(invalid) 245 246 self.sync() 247 self._open_storage('r') 248 self.env.log.debug('Index finished in %.2f seconds' % (time.time() - start)) 225 249 reindex = synchronized(reindex) 226 250 reposearchplugin/0.9/tracreposearch/search.py
r476 r670 4 4 from trac.perm import IPermissionRequestor 5 5 from trac.util import Markup, escape 6 from trac.mimeview.api import Mimeview 6 7 import re 7 8 import posixpath … … 71 72 include, excludes = self._get_filters() 72 73 74 to_unicode = Mimeview(self.env).to_unicode 75 73 76 # Use indexer if possible, otherwise fall back on brute force search. 74 77 try: … … 78 81 walker = lambda repo, query: [repo.get_node(filename) for filename 79 82 in self.indexer.find_words(query)] 80 except TracError: 83 except TracError, e: 84 self.env.log.warning(e) 85 self.env.log.warning('Falling back on full repository walk') 81 86 def full_walker(repo, query): 82 87 for node in self.walk_repo(repo): 83 88 # Search content 84 89 matched = 1 85 content = node.get_content().read().lower() 90 content = node.get_content() 91 if not content: 92 continue 93 content = to_unicode(content.read().lower(), node.get_content_type()) 86 94 for term in query: 87 95 if term not in content: … … 110 118 else: 111 119 found = 0 112 for n, line in enumerate(node.get_content().read().splitlines()): 120 content = to_unicode(node.get_content().read(), node.get_content_type()) 121 for n, line in enumerate(content.splitlines()): 113 122 line = line.lower() 114 123 for q in query: … … 122 131 yield (self.env.href.browser(node.path) + (found and '#L%i' % found or ''), 123 132 node.path, change.date, change.author, 124 shorten_result( node.get_content().read(), query))133 shorten_result(content, query))
