Source code for PyOpenWorm.datasource_loader
# -*- coding: utf-8 -*-
'''
DataSourceLoaders take a data source identifier and retrieve the primary data (e.g., CSV files, electrode recordings)
from some location (e.g., a file store, via a bittorrent tracker).
Each loader can treat the base_directory given as its own namespace and place directories in there however it wants.
'''
from yarom.utils import FCN
from os.path import exists, isdir, join as pth_join, isabs, realpath
import six
class DataSourceDirLoaderMeta(type):
# Logic behind this: I want to provide a good default of the FCN, but also allow implementers to say "I just want to
# use a class variable". You can also subclass this meta, set dirkey in the meta's class definition and then
# override in instances of that meta by explicitly setting dirkey in the instance...so best of both.
@property
def dirkey(self):
return getattr(self, 'directory_key', None) or FCN(self)
class DataSourceDirLoader(six.with_metaclass(DataSourceDirLoaderMeta, object)):
def __init__(self, base_directory):
self._basedir = realpath(base_directory)
def __call__(self, ident):
'''
Load the data source
Parameters
----------
ident : str
The identifier of the data source to load data for
Returns
-------
A path to the loaded resource
Raises
------
LoadFailed
'''
# Call str(ยท) to give a more uniform interface to the sub-class ``load``
# Conventionally, types that tag or "enhance" a string have the base string representation as their __str__
s = self.load(str(ident))
if not s:
raise LoadFailed(ident, self, 'Loader returned an empty string')
# N.B.: This logic is NOT intended as a security measure against directory traversal: it is only to make the
# interface both flexible and unambiguous for implementers
# Relative paths are allowed
if not isabs(s):
s = pth_join(self._basedir, s)
# Make sure the loader isn't doing some nonsense with symlinks or non-portable paths
rpath = realpath(s)
if not rpath.startswith(self._basedir):
msg = 'Loader returned a file path outside of the base directory, {}'.format(self._basedir)
raise LoadFailed(ident, self, msg)
if not exists(rpath):
msg = 'Loader returned a non-existant file {}'.format(rpath)
raise LoadFailed(ident, self, msg)
if not isdir(rpath):
msg = 'Loader did not return a directory, but returned {}'.format(rpath)
raise LoadFailed(ident, self, msg)
return rpath
def load(self, ident):
raise NotImplementedError()
def can_load(self, ident):
return False
def __str__(self):
return FCN(type(self)) + '()'
[docs]class LoadFailed(Exception):
def __init__(self, ident, loader, *args):
msg = args[0]
mmsg = 'Failed to load {} data with loader {}{}'.format(ident, loader, ': ' + msg if msg else '')
super(LoadFailed, self).__init__(mmsg, *args[1:])