Source code for PyOpenWorm.dataObject

from __future__ import print_function
from functools import partial
import rdflib as R
from rdflib.term import URIRef
import logging
from itertools import groupby
import six
import hashlib

import PyOpenWorm
from PyOpenWorm.contextualize import (Contextualizable,
                                      ContextualizableClass,
                                      contextualize_metaclass,
                                      contextualize_helper,
                                      decontextualize_helper)

from yarom.graphObject import (GraphObject,
                               ComponentTripler,
                               GraphObjectQuerier)
from yarom.rdfUtils import triples_to_bgp, deserialize_rdflib_term
from yarom.rdfTypeResolver import RDFTypeResolver
from yarom.mappedClass import MappedClass
from yarom.mapper import FCN
from .data import DataUser
from .context import Contexts
from .identifier_mixin import IdMixin
from .inverse_property import InversePropertyMixin, InverseProperty

import PyOpenWorm.simpleProperty as SP

__all__ = [
    "BaseDataObject",
    "ContextMappedClass",
    "DataObject",
    "values",
    "DataObjectTypes",
    "RDFTypeTable",
    "DataObjectsParents"]

L = logging.getLogger(__name__)

DataObjectTypes = dict()
PropertyTypes = dict()
RDFTypeTable = dict()
DataObjectsParents = dict()

This = object()
""" A reference to be used in class-level property declarations to denote the
    class currently being defined. For example::

        >>> class Person(DataObject):
        ...     parent = ObjectProperty(value_type=This,
        ...                             inverse_of=(This, 'child'))
        ...     child = ObjectProperty(value_type=This)
"""


def mp(c, k):
    ak = '_pow_' + k
    if c.lazy:
        def getter(target):
            attr = getattr(target, ak, None)
            if attr is None:
                attr = target.attach_property(c, name=ak)
            return attr
    else:
        def getter(target):
            return getattr(target, ak)

    return property(getter)


[docs]class ContextMappedClass(MappedClass, ContextualizableClass): def __init__(self, name, bases, dct): super(ContextMappedClass, self).__init__(name, bases, dct) ctx_uri = ContextMappedClass._find_class_context(dct, bases) if ctx_uri is not None: if not isinstance(ctx_uri, URIRef) \ and isinstance(ctx_uri, (str, six.text_type)): ctx_uri = URIRef(ctx_uri) self.__context = Contexts[ctx_uri] else: self.__context = None if not hasattr(self, 'base_namespace') or self.base_namespace is None: self.base_namespace = ContextMappedClass._find_base_namespace(dct, bases) self._property_classes = dict() for b in bases: d = getattr(b, '_property_classes', None) if d: self._property_classes.update(d) for k, v in dct.items(): if isinstance(v, PThunk): c = v(self, k) self._property_classes[k] = c setattr(self, k, mp(c, k)) @classmethod def _find_class_context(cls, dct, bases): ctx_uri = dct.get('class_context', None) if ctx_uri is None: for b in bases: pctx = getattr(b, 'definition_context', None) if pctx is not None: ctx_uri = pctx.identifier break return ctx_uri @classmethod def _find_base_namespace(cls, dct, bases): base_ns = dct.get('base_namespace', None) if base_ns is None: for b in bases: if hasattr(b, 'base_namespace') and b.base_namespace is not None: base_ns = b.base_namespace break return base_ns def contextualize_class_augment(self, context): if context is None: return self ctxd_meta = contextualize_metaclass(context, self) res = ctxd_meta(self.__name__, (self,), dict(rdf_namespace=self.rdf_namespace, rdf_type=self.rdf_type, class_context=context.identifier)) return res
[docs] def after_mapper_module_load(self, mapper): if self is not TypeDataObject: if self.definition_context is None: raise Exception("The class {0} has no context for TypeDataObject(ident={1})".format( self, self.rdf_type)) L.debug('Creating rdf_type_object for {} in {}'.format(self, self.definition_context)) self.rdf_type_object = TypeDataObject.contextualize(self.definition_context)(ident=self.rdf_type) else: self.rdf_type_object = None
def __call__(self, *args, **kwargs): o = super(ContextMappedClass, self).__call__(*args, **kwargs) if isinstance(o, PropertyDataObject): o.rdf_type_property(RDFProperty.get_instance()) elif isinstance(o, RDFProperty): o.rdf_type_property(RDFSClass.get_instance()) elif isinstance(o, RDFSClass): o.rdf_type_property.set(o) elif isinstance(o, TypeDataObject): o.rdf_type_property(RDFSClass.get_instance()) else: o.rdf_type_property.set(self.rdf_type_object) return o @property def context(self): return None @property def definition_context(self): """ Unlike self.context, definition_context isn't meant to be overriden """ return self.__context
class _partial_property(partial): pass def contextualized_data_object(context, obj): res = contextualize_helper(context, obj) if obj is not res and hasattr(res, 'properties'): cprop = res.properties.contextualize(context) res.add_attr_override('properties', cprop) for p in cprop: res.add_attr_override(p.linkName, p) return res class ContextualizableList(Contextualizable, list): def __init__(self, context): self._context = context def contextualize(self, context): res = type(self)(context=context) res += list(x.contextualize(context) for x in self) return res def decontextualize(self): res = type(self)(None) res += list(x.decontextualize() for x in self) return res class PThunk(object): def __call__(self, *args, **kwargs): raise NotImplementedError() class CPThunk(PThunk): def __init__(self, c): self.c = c def __call__(self, *args, **kwargs): return self.c class APThunk(PThunk): def __init__(self, t, args, kwargs): self.t = t self.args = args self.kwargs = kwargs def __call__(self, cls, linkName): return cls._create_property_class(linkName, *self.args, property_type=self.t, **self.kwargs) def __repr__(self): return '{}({}, {})'.format(self.t, ',\n'.join(self.args), ',\n'.join(k + '=' + str(v) for k, v in self.kwargs.items())) def DatatypeProperty(*args, **kwargs): return APThunk('DatatypeProperty', args, kwargs) def ObjectProperty(*args, **kwargs): return APThunk('ObjectProperty', args, kwargs) def UnionProperty(*args, **kwargs): return APThunk('UnionProperty', args, kwargs)
[docs]class BaseDataObject(six.with_metaclass(ContextMappedClass, IdMixin(hashfunc=hashlib.md5), GraphObject, DataUser, Contextualizable)): """ An object backed by the database Attributes ----------- rdf_type : rdflib.term.URIRef The RDF type URI for objects of this type rdf_namespace : rdflib.namespace.Namespace The rdflib namespace (prefix for URIs) for objects from this class properties : list of Property Properties belonging to this object owner_properties : list of Property Properties belonging to parents of this object """ rdf_type = R.RDFS['Resource'] class_context = 'http://openworm.org/schema' base_namespace = R.Namespace("http://openworm.org/entities/") _next_variable_int = 0 properties_are_init_args = True ''' If true, then properties defined in the class body can be passed as keyword arguments to __init__. For example:: >>> class A(DataObject): ... p = DatatypeProperty() >>> A(p=5) If the arguments are written explicitly into the __init__, then no special processing is done. ''' def __new__(cls, *args, **kwargs): """ This is defined so that the __init__ method gets a contextualized instance, allowing for statements made in __init__ to be contextualized. """ ores = super(BaseDataObject, cls).__new__(cls) if cls.context is not None: ores.context = cls.context ores.add_contextualization(cls.context, ores) res = ores else: ores.context = None res = ores return res def __init__(self, **kwargs): ot = type(self) pc = ot._property_classes paia = ot.properties_are_init_args if paia: property_args = [(key, val) for key, val in ((k, kwargs.pop(k, None)) for k in pc) if val is not None] super(BaseDataObject, self).__init__(**kwargs) self.properties = ContextualizableList(self.context) self.owner_properties = [] self.po_cache = None """ A cache of property URIs and values. Used by RealSimpleProperty """ self._variable = None for k, v in pc.items(): if not v.lazy: self.attach_property(v, name='_pow_' + k) if paia: for k, v in property_args: getattr(self, k)(v) self.attach_property(RDFTypeProperty) @property def conf(self): if self.context is None: return super(BaseDataObject, self).conf else: return self.context.conf @conf.setter def conf(self, conf): super(BaseDataObject, self).conf = conf @property def rdf(self): if self.context is not None: return self.context.rdf_graph() else: return self.conf.get('rdf.graph', None) @classmethod def next_variable(cls): cls._next_variable_int += 1 return R.Variable('a' + cls.__name__ + '_' + str(cls._next_variable_int)) @property def context(self): return self.__context @context.setter def context(self, value): self.__context = value
[docs] def clear_po_cache(self): """ Clear the property-object cache for this object. This cache is maintained by and shared by the properties of this object. It isn't necessary to clear this cache manually unless you modify the RDFLib graph indirectly (e.g., through the store) at runtime. """ self.po_cache = None
def __repr__(self): s = self.__class__.__name__ + "(" s += 'ident=' + repr(self.idl) s += ")" return s
[docs] def id_is_variable(self): """ Is the identifier a variable? """ return not self.defined
def triples(self, *args, **kwargs): return ComponentTripler(self, **kwargs)() def __str__(self): k = self.idl if self.namespace_manager is not None: k = self.namespace_manager.normalizeUri(k) s = self.__class__.__name__ + "(" s += str(k) s += ")" return s def __eq__(self, other): """ This method should not be overridden by subclasses """ return (isinstance(other, BaseDataObject) and self.defined and other.defined and (self.identifier == other.identifier)) def __setattr__(self, name, val): if isinstance(val, _partial_property): val(owner=self, linkName=name) else: super(BaseDataObject, self).__setattr__(name, val) def count(self): return len(GraphObjectQuerier(self, self.rdf, parallel=False)()) def load(self): idents = GraphObjectQuerier(self, self.rdf, parallel=False)() if idents: choices = self.rdf.triples_choices((list(idents), R.RDF['type'], None)) grouped_type_triples = groupby(choices, lambda x: x[0]) for ident, type_triples in grouped_type_triples: types = set() for __, __, rdf_type in type_triples: types.add(rdf_type) the_type = get_most_specific_rdf_type(types) yield oid(ident, the_type, self.context) else: return
[docs] def variable(self): if self._variable is None: self._variable = self.next_variable() return self._variable
def __hash__(self): """ This method should not be overridden by subclasses """ return hash(self.idl) def __getitem__(self, x): try: return DataUser.__getitem__(self, x) except KeyError: raise Exception( "You attempted to get the value `%s' from `%s'. It isn't here." " Perhaps you misspelled the name of a Property?" % (x, self))
[docs] def get_owners(self, property_class_name): """ Return the owners along a property pointing to this object """ res = [] for x in self.owner_properties: if str(x.__class__.__name__) == str(property_class_name): res.append(x.owner) return res
[docs] @classmethod def DatatypeProperty(cls, *args, **kwargs): """ Attach a, possibly new, property to this class that has a simple type (string,number,etc) for its values Parameters ---------- linkName : string The name of this property. owner : PyOpenWorm.dataObject.BaseDataObject The name of this property. """ try: return cls._create_property(*args, property_type='DatatypeProperty', **kwargs) except TypeError: return _partial_property(cls._create_property, *args, property_type='DatatypeProperty', **kwargs)
[docs] @classmethod def ObjectProperty(cls, *args, **kwargs): """ Attach a, possibly new, property to this class that has a complex BaseDataObject for its values Parameters ---------- linkName : string The name of this property. owner : PyOpenWorm.dataObject.BaseDataObject The name of this property. value_type : type The type of BaseDataObject for values of this property """ try: return cls._create_property(*args, property_type='ObjectProperty', **kwargs) except TypeError: return _partial_property(cls._create_property, *args, property_type='ObjectProperty', **kwargs)
[docs] @classmethod def UnionProperty(cls, *args, **kwargs): """ Attach a, possibly new, property to this class that has a simple type (string,number,etc) or BaseDataObject for its values Parameters ---------- linkName : string The name of this property. owner : PyOpenWorm.dataObject.BaseDataObject The name of this property. """ try: return cls._create_property(*args, property_type='UnionProperty', **kwargs) except TypeError: return _partial_property(cls._create_property, *args, property_type='UnionProperty', **kwargs)
@classmethod def _create_property_class( cls, linkName, property_type, value_type=None, multiple=False, link=None, lazy=True, inverse_of=None, **kwargs): # XXX This should actually get called for all of the properties when # their owner classes are defined. The initialization, however, # must happen with the owner object's creation owner_class = cls owner_class_name = owner_class.__name__ property_class_name = str(owner_class_name + "_" + linkName) _PropertyTypes_key = (cls, linkName) if value_type is This: value_type = owner_class if value_type is None: value_type = BaseDataObject c = None if _PropertyTypes_key in PropertyTypes: c = PropertyTypes[_PropertyTypes_key] else: klass = None if property_type == 'ObjectProperty': value_rdf_type = value_type.rdf_type klass = SP.ObjectProperty elif property_type == 'DatatypeProperty': value_rdf_type = False klass = SP.DatatypeProperty elif property_type == 'UnionProperty': value_rdf_type = False klass = SP.UnionProperty else: value_rdf_type = False if link is None: if owner_class.rdf_namespace is None: raise Exception("{}.rdf_namespace is None".format(FCN(owner_class))) link = owner_class.rdf_namespace[linkName] classes = [klass] props = dict(linkName=linkName, link=link, property_type=property_type, value_rdf_type=value_rdf_type, value_type=value_type, owner_type=owner_class, rdf_object=PropertyDataObject.contextualize(owner_class.definition_context)(ident=link), lazy=lazy, multiple=multiple, inverse_of=inverse_of, **kwargs) if inverse_of is not None: invc = inverse_of[0] if invc is This: invc = owner_class InverseProperty(owner_class, linkName, invc, inverse_of[1]) c = type(property_class_name, tuple(classes), props) c.__module__ = owner_class.__module__ owner_class.mapper.add_class(c) PropertyTypes[_PropertyTypes_key] = c return c @classmethod def _create_property(cls, *args, **kwargs): owner = None if len(args) == 2: owner = args[1] args = (args[0],) else: owner = kwargs.get('owner', None) if owner is not None: del kwargs['owner'] if owner is None: raise TypeError('No owner') return owner.attach_property(cls._create_property_class(*args, **kwargs)) def attach_property(self, c, name=None): ctxd_pclass = c.contextualize_class(self.context) res = ctxd_pclass(owner=self, conf=self.conf, resolver=_Resolver.get_instance()) self.properties.append(res) if name is None: name = c.linkName setattr(self, name, res) return res
[docs] def graph_pattern(self, shorten=False, show_namespaces=True, **kwargs): """ Get the graph pattern for this object. It should be as simple as converting the result of triples() into a BGP Parameters ---------- shorten : bool Indicates whether to shorten the URLs with the namespace manager attached to the ``self`` """ nm = None if shorten: nm = self.namespace_manager return triples_to_bgp(self.triples(**kwargs), namespace_manager=nm, show_namespaces=show_namespaces)
[docs] def retract(self): """ Remove this object from the data store. """ self.retract_statements(self.graph_pattern(query=True))
[docs] def save(self): """ Write in-memory data to the database. Derived classes should call this to update the store. """ self.add_statements(self.triples())
@classmethod def object_from_id(cls, identifier_or_rdf_type, rdf_type=None): if not isinstance(identifier_or_rdf_type, URIRef): identifier_or_rdf_type = URIRef(identifier_or_rdf_type) if rdf_type is None: return oid(identifier_or_rdf_type) else: rdf_type = URIRef(rdf_type) return oid(identifier_or_rdf_type, rdf_type)
[docs] def decontextualize(self): if self.context is None: return self res = decontextualize_helper(self) if self is not res: cprop = res.properties.decontextualize() res.add_attr_override('properties', cprop) for p in cprop: res.add_attr_override(p.linkName, p) return res
def contextualize_augment(self, context): if context is not None: return contextualized_data_object(context, self) else: return self
class RDFSCommentProperty(SP.DatatypeProperty): link = R.RDFS['comment'] linkName = 'rdfs_comment' owner_type = BaseDataObject multiple = True lazy = True class RDFSLabelProperty(SP.DatatypeProperty): link = R.RDFS['label'] linkName = 'rdfs_label' owner_type = BaseDataObject multiple = True lazy = True
[docs]class DataObject(BaseDataObject): rdfs_comment = CPThunk(RDFSCommentProperty) rdfs_label = CPThunk(RDFSLabelProperty)
class DataObjectSingletonMeta(type(BaseDataObject)): @property def context(self): return self.definition_context class DataObjectSingleton(six.with_metaclass(DataObjectSingletonMeta, BaseDataObject)): instance = None class_context = URIRef('http://openworm.org/schema') def __init__(self, *args, **kwargs): if self._gettingInstance: super(DataObjectSingleton, self).__init__(*args, **kwargs) else: raise Exception("You must call getInstance to get " + type(self).__name__) @classmethod def get_instance(cls, **kwargs): if cls.instance is None: cls._gettingInstance = True cls.instance = cls(**kwargs) cls._gettingInstance = False return cls.instance class TypeDataObject(BaseDataObject): class_context = URIRef('http://openworm.org/schema') class RDFSClass(DataObjectSingleton): # This maybe becomes a DataObject later """ The DataObject corresponding to rdfs:Class """ # XXX: This class may be changed from a singleton later to facilitate # dumping and reloading the object graph rdf_type = R.RDFS['Class'] auto_mapped = True class_context = 'http://www.w3.org/2000/01/rdf-schema' def __init__(self, *args, **kwargs): super(RDFSClass, self).__init__(ident=R.RDFS["Class"], *args, **kwargs) class RDFTypeProperty(SP.ObjectProperty): link = R.RDF['type'] linkName = "rdf_type_property" value_type = RDFSClass owner_type = BaseDataObject multiple = True lazy = False class RDFProperty(DataObjectSingleton): """ The DataObject corresponding to rdf:Property """ rdf_type = R.RDF['Property'] class_context = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns') def __init__(self, *args, **kwargs): super(RDFProperty, self).__init__(ident=R.RDF["Property"], *args, **kwargs) def oid(identifier_or_rdf_type, rdf_type=None, context=None): """ Create an object from its rdf type Parameters ---------- identifier_or_rdf_type : :class:`str` or :class:`rdflib.term.URIRef` If `rdf_type` is provided, then this value is used as the identifier for the newly created object. Otherwise, this value will be the :attr:`rdf_type` of the object used to determine the Python type and the object's identifier will be randomly generated. rdf_type : :class:`str`, :class:`rdflib.term.URIRef`, :const:`False` If provided, this will be the :attr:`rdf_type` of the newly created object. Returns ------- The newly created object """ identifier = identifier_or_rdf_type if rdf_type is None: rdf_type = identifier_or_rdf_type identifier = None c = None try: c = PyOpenWorm.CONTEXT.mapper.RDFTypeTable[rdf_type] except KeyError: c = BaseDataObject L.debug("oid: making a {} with ident {}".format(c, identifier)) # if its our class name, then make our own object # if there's a part after that, that's the property name o = None if context is not None: c = context(c) if identifier is not None: o = c(ident=identifier) else: o = c() return o def disconnect(): global PropertyTypes global DataObjectTypes global RDFTypeTable global DataObjectsParents DataObjectTypes.clear() RDFTypeTable.clear() DataObjectsParents.clear() PropertyTypes.clear()
[docs]class values(DataObject): """ A convenience class for working with a collection of objects Example:: v = values('unc-13 neurons and muscles') n = P.Neuron() m = P.Muscle() n.receptor('UNC-13') m.receptor('UNC-13') for x in n.load(): v.value(x) for x in m.load(): v.value(x) # Save the group for later use v.save() ... # get the list back u = values('unc-13 neurons and muscles') nm = list(u.value()) Parameters ---------- group_name : string A name of the group of objects Attributes ---------- name : DatatypeProperty The name of the group of objects value : ObjectProperty An object in the group add : ObjectProperty an alias for ``value`` """ class_context = URIRef('http://openworm.org/schema') def __init__(self, group_name, **kwargs): super(values, self).__init__(self, **kwargs) self.add = values.ObjectProperty('value', owner=self) self.group_name = values.DatatypeProperty('name', owner=self) self.name(group_name) @property def identifier(self): return self.make_identifier(self.group_name)
def get_most_specific_rdf_type(types): """ Gets the most specific rdf_type. Returns the URI corresponding to the lowest in the DataObject class hierarchy from among the given URIs. """ mapper = PyOpenWorm.CONTEXT.mapper most_specific_types = tuple(mapper.base_classes.values()) for x in types: try: class_object = mapper.RDFTypeTable[x] if issubclass(class_object, most_specific_types): most_specific_types = (class_object,) except KeyError: L.warning( """A Python class corresponding to the type URI "{}" couldn't be found. You may want to import the module containing the class as well as add additional type annotations in order to resolve your objects to a more precise type.""".format(x)) return most_specific_types[0].rdf_type class PropertyDataObject(DataObject): """ A PropertyDataObject represents the property-as-object. Try not to confuse this with the Property class """ rdf_type = R.RDF['Property'] class_context = URIRef('http://openworm.org/schema') def __init__(self, *args, **kwargs): super(PropertyDataObject, self).__init__(*args, **kwargs) class _Resolver(RDFTypeResolver): instance = None @classmethod def get_instance(cls): if cls.instance is None: cls.instance = RDFTypeResolver( BaseDataObject.rdf_type, get_most_specific_rdf_type, oid, deserialize_rdflib_term) return cls.instance __yarom_mapped_classes__ = (BaseDataObject, DataObject, RDFSClass, TypeDataObject, RDFProperty, values, PropertyDataObject)