[Python-checkins] python/nondist/sandbox/setuptools pkg_resources.py, 1.51, 1.52 setuptools.txt, 1.21, 1.22

Sun Jul 24 19:59:28 CEST 2005

Update of /cvsroot/python/python/nondist/sandbox/setuptools
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8768
Modified Files:
	pkg_resources.py setuptools.txt 
Log Message:
Fix eager resource extraction. Add eager_resources setup() argument. Add
support for obtaining project-level resources by making get_provider()
accept Requirement objects.
Index: pkg_resources.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/pkg_resources.py,v
retrieving revision 1.51
retrieving revision 1.52
diff -u -d -r1.51 -r1.52

--- pkg_resources.py	21 Jul 2005 16:11:34 -0000	1.51
+++ pkg_resources.py	24 Jul 2005 17:59:26 -0000	1.52
@@ -25,6 +25,7 @@
 'safe_name', 'safe_version', 'run_main', 'BINARY_DIST', 'run_script',
 'get_default_cache', 'EmptyProvider', 'empty_provider', 'normalize_path',
 'WorkingSet', 'working_set', 'add_activation_listener', 'CHECKOUT_DIST',
+ 'list_resources', 'resource_exists', 'resource_isdir',
 ]
 
 import sys, os, zipimport, time, re, imp
@@ -38,7 +39,6 @@
 
 
 
-
 class ResolutionError(Exception):
 """Abstract base for dependency resolution errors"""
 
@@ -68,18 +68,18 @@
 """
 _provider_factories[loader_type] = provider_factory
 
-def get_provider(moduleName):
- """Return an IResourceProvider for the named module"""
+def get_provider(moduleOrReq):
+ """Return an IResourceProvider for the named module or requirement"""
+ if isinstance(moduleOrReq,Requirement):
+ return working_set.find(moduleOrReq) or require(str(moduleOrReq))[0]
 try:
- module = sys.modules[moduleName]
+ module = sys.modules[moduleOrReq]
 except KeyError:
- __import__(moduleName)
- module = sys.modules[moduleName]
+ __import__(moduleOrReq)
+ module = sys.modules[moduleOrReq]
 loader = getattr(module, '__loader__', None)
 return _find_adapter(_provider_factories, loader)(module)
 
-
-
 def _macosx_vers(_cache=[]):
 if not _cache:
 info = os.popen('/usr/bin/sw_vers').read().splitlines()
@@ -627,7 +627,7 @@
 
 def resource_isdir(self, package_name, resource_name):
 """Does the named resource exist in the named package?"""
- return get_provider(package_name).resource_isdir(self, resource_name)
+ return get_provider(package_name).resource_isdir(resource_name)
 
 def resource_filename(self, package_name, resource_name):
 """Return a true filesystem path for specified resource"""
@@ -648,7 +648,7 @@
 )
 
 def list_resources(self, package_name, resource_name):
- return get_provider(package_name).resource_listdir(self, resource_name)
+ return get_provider(package_name).resource_listdir(resource_name)
 
 
 
@@ -913,8 +913,8 @@
 register_loader_type(object, NullProvider)
 
 
-class DefaultProvider(NullProvider):
- """Provides access to package resources in the filesystem"""
+class EggProvider(NullProvider):
+ """Provider based on a virtual filesystem"""
 
 def __init__(self,module):
 NullProvider.__init__(self,module)
@@ -925,22 +925,28 @@
 # of multiple eggs; that's why we use module_path instead of .archive
 path = self.module_path
 old = None
- self.prefix = []
 while path!=old:
 if path.lower().endswith('.egg'):
 self.egg_name = os.path.basename(path)
 self.egg_info = os.path.join(path, 'EGG-INFO')
+ self.egg_root = path
 break
 old = path
 path, base = os.path.split(path)
- self.prefix.append(base)
- self.prefix.reverse()
 
- def _has(self, path):
- return os.path.exists(path)
 
 
 
+
+
+
+
+class DefaultProvider(EggProvider):
+ """Provides access to package resources in the filesystem"""
+
+ def _has(self, path):
+ return os.path.exists(path)
+
 def _isdir(self,path):
 return os.path.isdir(path)
 
@@ -976,67 +982,63 @@
 
 
 
-
-
-
-
-
-
-class ZipProvider(DefaultProvider):
+class ZipProvider(EggProvider):
 """Resource support for zips and eggs"""
 
 eagers = None
 
 def __init__(self, module):
- DefaultProvider.__init__(self,module)
+ EggProvider.__init__(self,module)
 self.zipinfo = zipimport._zip_directory_cache[self.loader.archive]
 self.zip_pre = self.loader.archive+os.sep
 
- def _short_name(self, path):
- if path.startswith(self.zip_pre):
- return path[len(self.zip_pre):]
- return path
+ def _zipinfo_name(self, fspath):
+ # Convert a virtual filename (full path to file) into a zipfile subpath
+ # usable with the zipimport directory cache for our target archive
+ if fspath.startswith(self.zip_pre):
+ return fspath[len(self.zip_pre):]
+ raise AssertionError(
+ "%s is not a subpath of %s" % (fspath,self.zip_pre)
+ )
 
- def get_resource_stream(self, manager, resource_name):
- return StringIO(self.get_resource_string(manager, resource_name))
+ def _parts(self,zip_path):
+ # Convert a zipfile subpath into an egg-relative path part list
+ fspath = self.zip_pre+zip_path # pseudo-fs path
+ if fspath.startswith(self.egg_root+os.sep):
+ return fspath[len(self.egg_root)+1:].split(os.sep)
+ raise AssertionError(
+ "%s is not a subpath of %s" % (fspath,self.egg_root)
+ ) 
 
- def get_resource_filename(self, manager, resource_name):
+ def get_resource_filename(self, manager, resource_name): 
 if not self.egg_name:
 raise NotImplementedError(
 "resource_filename() only supported for .egg, not .zip"
 )
-
 # no need to lock for extraction, since we use temp names
+ zip_path = self._resource_to_zip(resource_name)
 eagers = self._get_eager_resources()
- if resource_name in eagers:
+ if '/'.join(self._parts(zip_path)) in eagers:
 for name in eagers:
- self._extract_resource(manager, name)
-
- return self._extract_resource(manager, resource_name)
-
- def _extract_directory(self, manager, resource_name):
- if resource_name.endswith('/'):
- resource_name = resource_name[:-1]
- for resource in self.resource_listdir(resource_name):
- last = self._extract_resource(manager, resource_name+'/'+resource)
- return os.path.dirname(last) # return the directory path
-
-
+ self._extract_resource(manager, self._eager_to_zip(name))
+ return self._extract_resource(manager, zip_path)
 
- def _extract_resource(self, manager, resource_name):
- if self.resource_isdir(resource_name):
- return self._extract_directory(manager, resource_name)
+ def _extract_resource(self, manager, zip_path):
+ if zip_path in self._index():
+ for name in self._index()[zip_path]:
+ last = self._extract_resource(
+ manager, os.path.join(zip_path, name)
+ )
+ return os.path.dirname(last) # return the extracted directory name
 
- parts = resource_name.split('/')
- zip_path = os.path.join(self.module_path, *parts)
- zip_stat = self.zipinfo[os.path.join(*self.prefix+parts)]
+ zip_stat = self.zipinfo[zip_path]
 t,d,size = zip_stat[5], zip_stat[6], zip_stat[3]
 date_time = (
 (d>>9)+1980, (d>>5)&0xF, d&0x1F, # ymd
 (t&0xFFFF)>>11, (t>>5)&0x3F, (t&0x1F) * 2, 0, 0, -1 # hms, etc.
 )
 timestamp = time.mktime(date_time)
- real_path = manager.get_cache_path(self.egg_name, self.prefix+parts)
+ real_path = manager.get_cache_path(self.egg_name, self._parts(zip_path))
 
 if os.path.isfile(real_path):
 stat = os.stat(real_path)
@@ -1060,10 +1062,8 @@
 # so we're done
 return real_path
 raise
-
 return real_path
 
-
 def _get_eager_resources(self):
 if self.eagers is None:
 eagers = []
@@ -1077,12 +1077,9 @@
 try:
 return self._dirindex
 except AttributeError:
- ind = {}; skip = len(self.prefix)
+ ind = {}
 for path in self.zipinfo:
 parts = path.split(os.sep)
- if parts[:skip] != self.prefix:
- continue # only include items under our prefix
- parts = parts[skip:] # but don't include prefix in paths
 while parts:
 parent = '/'.join(parts[:-1])
 if parent in ind:
@@ -1093,26 +1090,26 @@
 self._dirindex = ind
 return ind
 
- def _has(self, path):
- return self._short_name(path) in self.zipinfo or self._isdir(path)
+ def _has(self, fspath):
+ zip_path = self._zipinfo_name(fspath)
+ return zip_path in self.zipinfo or zip_path in self._index()
 
- def _isdir(self,path):
- return self._dir_name(path) in self._index()
+ def _isdir(self,fspath):
+ return self._zipinfo_name(fspath) in self._index()
 
- def _listdir(self,path):
- return list(self._index().get(self._dir_name(path), ()))
+ def _listdir(self,fspath):
+ return list(self._index().get(self._zipinfo_name(fspath), ()))
 
 
 
 
- def _dir_name(self,path):
- if path.startswith(self.module_path+os.sep):
- path = path[len(self.module_path+os.sep):]
- path = path.replace(os.sep,'/')
- if path.endswith('/'): path = path[:-1]
- return path
 
- _get = NullProvider._get
+
+ def _eager_to_zip(self,resource_name):
+ return self._zipinfo_name(self._fn(self.egg_root,resource_name))
+
+ def _resource_to_zip(self,resource_name):
+ return self._zipinfo_name(self._fn(self.module_path,resource_name))
 
 register_loader_type(zipimport.zipimporter, ZipProvider)
 
@@ -1146,6 +1143,9 @@
 
 
 
+
+
+
 class PathMetadata(DefaultProvider):
 """Metadata provider for egg directories
 
Index: setuptools.txt
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setuptools.txt,v
retrieving revision 1.21
retrieving revision 1.22
diff -u -d -r1.21 -r1.22
--- setuptools.txt	24 Jul 2005 02:41:43 -0000	1.21
+++ setuptools.txt	24 Jul 2005 17:59:26 -0000	1.22
@@ -180,6 +180,22 @@
 does not contain any code. See the section below on `Namespace Packages`_
 for more information.
 
+``eager_resources``
+ A list of strings naming resources that should be extracted together, if
+ any of them is needed, or if any C extensions included in the project are
+ imported. This argument is only useful if the project will be installed as
+ a zipfile, and there is a need to have all of the listed resources be
+ extracted to the filesystem *as a unit*. Resources listed here
+ should be '/'-separated paths, relative to the source root, so to list a
+ resource ``foo.png`` in package ``bar.baz``, you would include the string
+ ``bar/baz/foo.png`` in this argument.
+
+ If you only need to obtain resources one at a time, or you don't have any C
+ extensions that access other files in the project (such as data files or
+ shared libraries), you probably do NOT need this argument and shouldn't
+ mess with it. For more details on how this argument works, see the section
+ below on `Automatic Resource Extraction`_.
+
 
 Using ``find_packages()``
 -------------------------
@@ -414,6 +430,7 @@
 __ http://docs.python.org/dist/node11.html 
 
 
+
 Accessing Data Files at Runtime
 -------------------------------
 
@@ -432,6 +449,76 @@
 .. _Accessing Package Resources: http://peak.telecommunity.com/DevCenter/PythonEggs#accessing-package-resources
 
 
+Non-Package Data Files
+----------------------
+
+The ``distutils`` normally install general "data files" to a platform-specific
+location (e.g. ``/usr/share``). This feature intended to be used for things
+like documentation, example configuration files, and the like. ``setuptools``
+does not install these data files in a separate location, however. They are
+bundled inside the egg file or directory, alongside the Python modules and
+packages. The data files can also be accessed using the `Resource Management
+API`_, by specifying a ``Requirement`` instead of a package name::
+
+ from pkg_resources import Requirement, resource_filename
+ filename = resource_filename(Requirement.parse("MyProject"),"sample.conf")
+
+The above code will obtain the filename of the "sample.conf" file in the data
+root of the "MyProject" distribution.
+
+Note, by the way, that this encapsulation of data files means that you can't
+actually install data files to some arbitrary location on a user's machine;
+this is a feature, not a bug. You can always include a script in your
+distribution that extracts and copies your the documentation or data files to
+a user-specified location, at their discretion. If you put related data files
+in a single directory, you can use ``resource_filename()`` with the directory
+name to get a filesystem directory that then can be copied with the ``shutil``
+module. (Even if your package is installed as a zipfile, calling
+``resource_filename()`` on a directory will return an actual filesystem
+directory, whose contents will be that entire subtree of your distribution.)
+
+(Of course, if you're writing a new package, you can just as easily place your
+data files or directories inside one of your packages, rather than using the
+distutils' approach. However, if you're updating an existing application, it
+may be simpler not to change the way it currently specifies these data files.)
+
+
+Automatic Resource Extraction
+-----------------------------
+
+If you are using tools that expect your resources to be "real" files, or your
+project includes non-extension native libraries or other files that your C
+extensions expect to be able to access, you may need to list those files in
+the ``eager_resources`` argument to ``setup()``, so that the files will be
+extracted together, whenever a C extension in the project is imported. This
+is especially important if your project includes shared libraries *other* than
+distutils-built C extensions. Those shared libraries should be listed as
+``eager_resources``, because they need to be present in the filesystem when the
+C extensions that link to them are used.
+
+The ``pkg_resources`` runtime for compressed packages will automatically
+extract *all* C extensions and ``eager_resources`` at the same time, whenever
+*any* C extension or eager resource is requested via the ``resource_filename()``
+API. (C extensions are imported using ``resource_filename()`` internally.)
+This ensures that C extensions will see all of the "real" files that they
+expect to see.
+
+Note also that you can list directory resource names in ``eager_resources`` as
+well, in which case the directory's contents (including subdirectories) will be
+extracted whenever any C extension or eager resource is requested.
+
+Please note that if you're not sure whether you need to use this argument, you
+don't! It's really intended to support projects with lots of non-Python
+dependencies and as a last resort for crufty projects that can't otherwise
+handle being compressed. If your package is pure Python, Python plus data
+files, or Python plus C, you really don't need this. You've got to be using
+either C or an external program that needs "real" files in your project before
+there's any possibility of ``eager_resources`` being relevant to your project.
+
+
+
+
+
 "Development Mode"
 ==================
 
@@ -1396,14 +1483,32 @@
 * Fixed the ``--tag-svn-revision`` option of ``egg_info`` not finding the
 latest revision number; it was using the revision number of the directory
 containing ``setup.py``, not the highest revision number in the project.
+
+ * Added ``eager_resources`` setup argument
 
 * Fixed some problems using ``pkg_resources`` w/PEP 302 loaders other than
- ``zipimport``.
- 
- * Fixed ``pkg_resources.resource_exists()`` not working correctly.
+ ``zipimport``, and the previously-broken "eager resource" support.
+
+ * Fixed ``pkg_resources.resource_exists()`` not working correctly, along with
+ some other resource API bugs.
+
 
 * Many ``pkg_resources`` API changes and enhancements:
 
+ * Resource API functions like ``resource_string()`` that accepted a package
+ name and resource name, will now also accept a ``Requirement`` object in
+ place of the package name (to allow access to non-package data files in
+ an egg).
+
+ * ``get_provider()`` will now accept a ``Requirement`` instance or a module
+ name. If it is given a ``Requirement``, it will return a corresponding
+ ``Distribution`` (by calling ``require()`` if a suitable distribution
+ isn't already in the working set), rather than returning a metadata and
+ resource provider for a specific module. (The difference is in how
+ resource paths are interpreted; supplying a module name means resources
+ path will be module-relative, rather than relative to the distribution's
+ root.)
+
 * ``Distribution`` objects now implement the ``IResourceProvider`` and
 ``IMetadataProvider`` interfaces, so you don't need to reference the (no
 longer available) ``metadata`` attribute to get at these interfaces.