Package trac ::
Package versioncontrol ::
Module svn_fs
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2005-2009 Edgewall Software
4 # Copyright (C) 2005 Christopher Lenz <cmlenz@gmx.de>
5 # Copyright (C) 2005-2007 Christian Boos <cboos@edgewall.org>
6 # All rights reserved.
7 #
8 # This software is licensed as described in the file COPYING, which
9 # you should have received as part of this distribution. The terms
10 # are also available at http://trac.edgewall.org/wiki/TracLicense.
11 #
12 # This software consists of voluntary contributions made by many
13 # individuals. For the exact contribution history, see the revision
14 # history and logs, available at http://trac.edgewall.org/log/.
15 #
16 # Author: Christopher Lenz <cmlenz@gmx.de>
17 # Christian Boos <cboos@edgewall.org>
18
19 """Filesystem access to Subversion repositories.
20
21 '''Note about Unicode:'''
22
23 The Subversion bindings are not unicode-aware and they expect to
24 receive UTF-8 encoded `string` parameters,
25
26 On the other hand, all paths manipulated by Trac are `unicode` objects.
27
28 Therefore:
29
30 * before being handed out to SVN, the Trac paths have to be encoded to
31 UTF-8, using `_to_svn()`
32 * before being handed out to Trac, a SVN path has to be decoded from
33 UTF-8, using `_from_svn()`
34
35 Whenever a value has to be stored as utf8, we explicitly mark the
36 variable name with "_utf8", in order to avoid any possible confusion.
37
38 Warning:
39 `SubversionNode.get_content()` returns an object from which one can read
40 a stream of bytes. NO guarantees can be given about what that stream of
41 bytes represents. It might be some text, encoded in some way or another.
42 SVN properties __might__ give some hints about the content, but they
43 actually only reflect the beliefs of whomever set those properties...
44 """
45
46 import os.path
47 import weakref
48 import posixpath
49 from urllib import quote
50
51 from trac .config import ListOption
52 from trac .core import *
53 from trac .env import ISystemInfoProvider
54 from trac .versioncontrol import Changeset , Node , Repository , \
55 IRepositoryConnector , \
56 NoSuchChangeset , NoSuchNode
57 from trac .versioncontrol .cache import CachedRepository
58 from trac .util import embedded_numbers
59 from trac .util .concurrency import threading
60 from trac .util .text import exception_to_unicode , to_unicode
61 from trac .util .translation import _
62 from trac .util .datefmt import from_utimestamp
63
64
65 application_pool = None
66 application_pool_lock = threading.Lock()
67
68
70 global fs, repos, core , delta, _kindmap , _svn_uri_canonicalize
71 from svn import fs, repos, core , delta
72 _kindmap = {core .svn_node_dir: Node .DIRECTORY ,
73 core .svn_node_file: Node .FILE }
74 try:
75 _svn_uri_canonicalize = core .svn_uri_canonicalize # Subversion 1.7+
76 except AttributeError:
77 _svn_uri_canonicalize = lambda v: v
78 # Protect svn.core methods from GC
79 Pool .apr_pool_clear = staticmethod(core .apr_pool_clear)
80 Pool .apr_pool_destroy = staticmethod(core .apr_pool_destroy)
81
83 """Expect a pool and a list of `unicode` path components.
84
85 Returns an UTF-8 encoded string suitable for the Subversion python
86 bindings (the returned path never starts with a leading "/")
87 """
88 return core .svn_path_canonicalize('/'.join (args ).lstrip('/')
89 .encode('utf-8'),
90 pool )
91
93 """Expect an UTF-8 encoded string and transform it to an `unicode` object
94
95 But Subversion repositories built from conversion utilities can have
96 non-UTF-8 byte strings, so we have to convert using `to_unicode`.
97 """
98 return path and to_unicode (path , 'utf-8')
99
100 # The following 3 helpers deal with unicode paths
101
103 """Remove leading "/", except for the root."""
104 return path and path .strip('/') or '/'
105
107 """Remove the leading scope from repository paths.
108
109 Return `None` if the path is not is scope.
110 """
111 if fullpath is not None:
112 fullpath = fullpath.lstrip('/')
113 if scope == '/':
114 return _normalize_path(fullpath)
115 scope = scope .strip('/')
116 if (fullpath + '/').startswith(scope + '/'):
117 return fullpath[len(scope ) + 1:] or '/'
118
120 """Check whether the given `fullpath` is within the given `scope`"""
121 if scope == '/':
122 return fullpath is not None
123 fullpath = fullpath and fullpath.lstrip('/') or ''
124 scope = scope .strip('/')
125 return (fullpath + '/').startswith(scope + '/')
126
127 # svn_opt_revision_t helpers
128
130 value = core .svn_opt_revision_value_t()
131 value.number = num
132 revision = core .svn_opt_revision_t()
133 revision.kind = core .svn_opt_revision_number
134 revision.value = value
135 return revision
136
138 revision = core .svn_opt_revision_t()
139 revision.kind = core .svn_opt_revision_head
140 return revision
141
142 # apr_pool_t helpers
143
145 if weakpool():
146 weakpool()._mark_invalid()
147
148
149 - class Pool (object):
150 """A Pythonic memory pool object"""
151
153 """Create a new memory pool"""
154
155 global application_pool
156
157 application_pool_lock .acquire()
158 try:
159 self._parent_pool = parent_pool or application_pool
160
161 # Create pool
162 if self._parent_pool:
163 self._pool = core .svn_pool_create(self._parent_pool())
164 else:
165 # If we are an application-level pool,
166 # then initialize APR and set this pool
167 # to be the application-level pool
168 core .apr_initialize()
169 self._pool = core .svn_pool_create(None)
170 application_pool = self
171 finally:
172 application_pool_lock .release()
173
174 self._mark_valid()
175
177 return self._pool
178
180 """Check whether this memory pool and its parents
181 are still valid"""
182 return hasattr(self,"_is_valid")
183
185 """Assert that this memory_pool is still valid."""
186 assert self.valid ()
187
189 """Clear embedded memory pool. Invalidate all subpools."""
190 self.apr_pool_clear(self._pool)
191 self._mark_valid()
192
194 """Destroy embedded memory pool. If you do not destroy
195 the memory pool manually, Python will destroy it
196 automatically."""
197
198 global application_pool
199
200 self.assert_valid ()
201
202 # Destroy pool
203 self.apr_pool_destroy(self._pool)
204
205 # Clear application pool and terminate APR if necessary
206 if not self._parent_pool:
207 application_pool = None
208
209 self._mark_invalid()
210
212 """Automatically destroy memory pools, if necessary"""
213 if self.valid ():
214 self.destroy ()
215
217 """Mark pool as valid"""
218 if self._parent_pool:
219 # Refer to self using a weakreference so that we don't
220 # create a reference cycle
221 weakself = weakref.ref(self)
222
223 # Set up callbacks to mark pool as invalid when parents
224 # are destroyed
225 self._weakref = weakref.ref(self._parent_pool._is_valid,
226 lambda x : \
227 _mark_weakpool_invalid(weakself))
228
229 # mark pool as valid
230 self._is_valid = lambda: 1
231
233 """Mark pool as invalid"""
234 if self.valid ():
235 # Mark invalid
236 del self._is_valid
237
238 # Free up memory
239 del self._parent_pool
240 if hasattr(self, "_weakref"):
241 del self._weakref
242
243
245 """Subversion-specific cached repository, zero-pads revision numbers
246 in the cache tables.
247 """
249 return '%010d' % rev
250
252 return int(rev or 0)
253
254
256
257 implements (ISystemInfoProvider, IRepositoryConnector )
258
259 branches = ListOption ('svn', 'branches', 'trunk,branches/*', doc=
260 """Comma separated list of paths categorized as branches.
261 If a path ends with '*', then all the directory entries found below
262 that path will be included.
263 Example: `/trunk, /branches/*, /projectAlpha/trunk, /sandbox/*`
264 """)
265
266 tags = ListOption ('svn', 'tags', 'tags/*', doc=
267 """Comma separated list of paths categorized as tags.
268
269 If a path ends with '*', then all the directory entries found below
270 that path will be included.
271 Example: `/tags/*, /projectAlpha/tags/A-1.0, /projectAlpha/tags/A-v1.1`
272 """)
273
274 error = None
275
277 self._version = None
278 try:
279 _import_svn()
280 self.log .debug('Subversion bindings imported')
281 except ImportError, e :
282 self.error = e
283 self.log .info('Failed to load Subversion bindings', exc_info=True)
284 else:
285 version = (core .SVN_VER_MAJOR, core .SVN_VER_MINOR,
286 core .SVN_VER_MICRO)
287 self._version = '%d.%d.%d' % version + core .SVN_VER_TAG
288 if version [0] < 1:
289 self.error = _ ("Subversion >= 1.0 required, found %(version)s",
290 version =self._version)
291 Pool ()
292
293 # ISystemInfoProvider methods
294
296 if self._version is not None:
297 yield 'Subversion', self._version
298
299 # IRepositoryConnector methods
300
302 prio = 1
303 if self.error :
304 prio = -1
305 yield ("direct-svnfs", prio*4)
306 yield ("svnfs", prio*4)
307 yield ("svn", prio*2)
308
323
324
326 """Repository implementation based on the svn.fs API."""
327
328 - def __init__ (self, path, params, log):
329 self.log = log
330 self.pool = Pool ()
331
332 # Remove any trailing slash or else subversion might abort
333 if isinstance(path , unicode):
334 path_utf8 = path .encode('utf-8')
335 else: # note that this should usually not happen (unicode arg expected)
336 path_utf8 = to_unicode (path ).encode('utf-8')
337
338 path_utf8 = core .svn_path_canonicalize(
339 os.path .normpath(path_utf8).replace ('\\', '/'))
340 self.path = path_utf8.decode('utf-8')
341
342 root_path_utf8 = repos.svn_repos_find_root_path(path_utf8, self.pool ())
343 if root_path_utf8 is None:
344 raise TracError (_ ("%(path)s does not appear to be a Subversion "
345 "repository.", path =to_unicode (path_utf8)))
346
347 try:
348 self.repos = repos.svn_repos_open(root_path_utf8, self.pool ())
349 except core .SubversionException, e :
350 raise TracError (_ ("Couldn't open Subversion repository %(path)s: "
351 "%(svn_error)s", path =to_unicode (path_utf8),
352 svn_error=exception_to_unicode (e )))
353 self.fs_ptr = repos.svn_repos_fs(self.repos)
354
355 self.uuid = fs.get_uuid(self.fs_ptr, self.pool ())
356 self.base = 'svn:%s:%s' % (self.uuid, _from_svn(root_path_utf8))
357 name = 'svn:%s:%s' % (self.uuid, self.path )
358
359 Repository .__init__ (self, name , params , log )
360
361 # if root_path_utf8 is shorter than the path_utf8, the difference is
362 # this scope (which always starts with a '/')
363 if root_path_utf8 != path_utf8:
364 self.scope = path_utf8[len(root_path_utf8):].decode('utf-8')
365 if not self.scope [-1] == '/':
366 self.scope += '/'
367 else:
368 self.scope = '/'
369 assert self.scope [0] == '/'
370 # we keep root_path_utf8 for RA
371 ra_prefix = os.name == 'nt' and 'file:///' or 'file://'
372 self.ra_url_utf8 = _svn_uri_canonicalize(ra_prefix +
373 quote (root_path_utf8))
374 self.clear ()
375
376 - def clear (self, youngest_rev=None):
381
384
385 - def has_node (self, path, rev=None, pool=None):
393
395 return _normalize_path(path )
396
398 if rev is None or isinstance(rev, basestring) and \
399 rev.lower() in ('', 'head', 'latest', 'youngest'):
400 return self.youngest_rev
401 else:
402 try:
403 rev = int(rev)
404 if rev <= self.youngest_rev :
405 return rev
406 except (ValueError, TypeError):
407 pass
408 raise NoSuchChangeset (rev)
409
414
416 return self.base
417
436
438 """Retrieve known branches, as (name, id) pairs.
439
440 Purposedly ignores `rev` and always takes the last revision.
441 """
442 for n in self._get_tags_or_branches('branches'):
443 yield 'branches', n.path , n.path , None
444 for n in self._get_tags_or_branches('tags'):
445 yield 'tags', n.path , n.created_path , n.created_rev
446
453
457
459 return (self.uuid, rev)
460
469
471 """Return the revisions affecting `path` between `first` and `last`
472 revs. If `first` is not given, it goes down to the revision in which
473 the branch was created.
474 """
475 node = self.get_node (path , last)
476 revs = []
477 for (p, r, chg) in node.get_history ():
478 if p != path or (first and r < first):
479 break
480 revs.append(r)
481 return revs
482
484 path_revs = {}
485 for node, first in node_infos:
486 path = node.path
487 revs = []
488 for p, r, chg in node.get_history ():
489 if p != path or r < first:
490 break
491 revs.append(r)
492 path_revs[path ] = revs
493 return path_revs
494
495 - def _history (self, path, start, end, pool):
496 """`path` is a unicode path in the scope.
497
498 Generator yielding `(path, rev)` pairs, where `path` is an `unicode`
499 object.
500 Must start with `(path, created rev)`.
501 """
502 path_utf8 = _to_svn(pool (), self.scope , path )
503 if start < end:
504 start , end = end, start
505 if (start , end) == (1, 0): # only happens for empty repos
506 return
507 root = fs.revision_root(self.fs_ptr, start , pool ())
508 # fs.node_history leaks when path doesn't exist (#6588)
509 if fs.check_path(root, path_utf8, pool ()) == core .svn_node_none:
510 return
511 tmp1 = Pool (pool )
512 tmp2 = Pool (pool )
513 history_ptr = fs.node_history(root, path_utf8, tmp1())
514 cross_copies = 1
515 while history_ptr:
516 history_ptr = fs.history_prev(history_ptr, cross_copies, tmp2())
517 tmp1.clear ()
518 tmp1, tmp2 = tmp2, tmp1
519 if history_ptr:
520 path_utf8, rev = fs.history_location(history_ptr, tmp2())
521 tmp2.clear ()
522 if rev < end:
523 break
524 path = _from_svn(path_utf8)
525 yield path , rev
526 del tmp1
527 del tmp2
528
530 if rev > 1: # don't use oldest here, as it's too expensive
531 for _ , prev in self._history(path , 1, rev-1, pool or self.pool ):
532 return prev
533 return None
534
535
537 if self.oldest is None:
538 self.oldest = 1
539 # trying to figure out the oldest rev for scoped repository
540 # is too expensive and uncovers a big memory leak (#5213)
541 # if self.scope != '/':
542 # self.oldest = self.next_rev(0, find_initial_rev=True)
543 return self.oldest
544
546 if not self.youngest:
547 self.youngest = fs.youngest_rev (self.fs_ptr, self.pool ())
548 if self.scope != '/':
549 for path , rev in self._history('', 1, self.youngest, self.pool ):
550 self.youngest = rev
551 break
552 return self.youngest
553
557
558 - def next_rev (self, rev, path='', find_initial_rev=False):
573
576
578 """Get the latest stored revision by sorting the revision strings
579 numerically
580
581 (deprecated, only used for transparent migration to the new caching
582 scheme).
583 """
584 cursor = db .cursor ()
585 cursor .execute ("SELECT rev FROM revision "
586 "ORDER BY -LENGTH(rev), rev DESC LIMIT 1")
587 row = cursor .fetchone ()
588 return row and row[0] or None
589
590 - def get_path_history (self, path, rev=None, limit=None):
591 path = self.normalize_path (path )
592 rev = self.normalize_rev (rev)
593 expect_deletion = False
594 subpool = Pool (self.pool )
595 numrevs = 0
596 while rev and (not limit or numrevs < limit):
597 subpool.clear ()
598 if self.has_node (path , rev, subpool):
599 if expect_deletion:
600 # it was missing, now it's there again:
601 # rev+1 must be a delete
602 numrevs += 1
603 yield path , rev+1, Changeset .DELETE
604 newer = None # 'newer' is the previously seen history tuple
605 older = None # 'older' is the currently examined history tuple
606 for p, r in self._history(path , 1, rev, subpool):
607 older = (_path_within_scope(self.scope , p), r,
608 Changeset .ADD )
609 rev = self._previous_rev(r, pool =subpool)
610 if newer:
611 numrevs += 1
612 if older[0] == path :
613 # still on the path: 'newer' was an edit
614 yield newer[0], newer[1], Changeset .EDIT
615 else:
616 # the path changed: 'newer' was a copy
617 rev = self._previous_rev(newer[1], pool =subpool)
618 # restart before the copy op
619 yield newer[0], newer[1], Changeset .COPY
620 older = (older[0], older[1], 'unknown')
621 break
622 newer = older
623 if older:
624 # either a real ADD or the source of a COPY
625 numrevs += 1
626 yield older
627 else:
628 expect_deletion = True
629 rev = self._previous_rev(rev, pool =subpool)
630
631 - def get_changes (self, old_path, old_rev, new_path, new_rev,
632 ignore_ancestry=0):
633 old_node = new_node = None
634 old_rev = self.normalize_rev (old_rev)
635 new_rev = self.normalize_rev (new_rev)
636 if self.has_node (old_path, old_rev):
637 old_node = self.get_node (old_path, old_rev)
638 else:
639 raise NoSuchNode (old_path, old_rev, 'The Base for Diff is invalid')
640 if self.has_node (new_path, new_rev):
641 new_node = self.get_node (new_path, new_rev)
642 else:
643 raise NoSuchNode (new_path, new_rev,
644 'The Target for Diff is invalid')
645 if new_node.kind != old_node.kind:
646 raise TracError (_ ('Diff mismatch: Base is a %(oldnode)s '
647 '(%(oldpath)s in revision %(oldrev)s) and '
648 'Target is a %(newnode)s (%(newpath)s in '
649 'revision %(newrev)s).', oldnode=old_node.kind,
650 oldpath=old_path, oldrev=old_rev,
651 newnode=new_node.kind, newpath=new_path,
652 newrev=new_rev))
653 subpool = Pool (self.pool )
654 if new_node.isdir :
655 editor = DiffChangeEditor ()
656 e_ptr, e_baton = delta.make_editor(editor, subpool())
657 old_root = fs.revision_root(self.fs_ptr, old_rev, subpool())
658 new_root = fs.revision_root(self.fs_ptr, new_rev, subpool())
659 def authz_cb(root, path, pool):
660 return 1
661 text_deltas = 0 # as this is anyway re-done in Diff.py...
662 entry_props = 0 # "... typically used only for working copy updates"
663 repos.svn_repos_dir_delta(old_root,
664 _to_svn(subpool(), self.scope , old_path),
665 '', new_root,
666 _to_svn(subpool(), self.scope , new_path),
667 e_ptr, e_baton, authz_cb,
668 text_deltas,
669 1, # directory
670 entry_props,
671 ignore_ancestry,
672 subpool())
673 # sort deltas by path before creating `SubversionNode`s to reduce
674 # memory usage (#10978)
675 deltas = sorted(((_from_svn(path ), kind, change)
676 for path , kind, change in editor.deltas),
677 key=lambda entry: entry[0])
678 for path , kind, change in deltas:
679 old_node = new_node = None
680 if change != Changeset .ADD :
681 old_node = self.get_node (posixpath.join (old_path, path ),
682 old_rev)
683 if change != Changeset .DELETE :
684 new_node = self.get_node (posixpath.join (new_path, path ),
685 new_rev)
686 else:
687 kind = _kindmap [fs.check_path(old_root,
688 _to_svn(subpool(),
689 self.scope ,
690 old_node.path ),
691 subpool())]
692 yield (old_node, new_node, kind, change)
693 else:
694 old_root = fs.revision_root(self.fs_ptr, old_rev, subpool())
695 new_root = fs.revision_root(self.fs_ptr, new_rev, subpool())
696 if fs.contents_changed(old_root,
697 _to_svn(subpool(), self.scope , old_path),
698 new_root,
699 _to_svn(subpool(), self.scope , new_path),
700 subpool()):
701 yield (old_node, new_node, Node .FILE , Changeset .EDIT )