Source code for PyOpenWorm.bibtex

from .datasource import DataTranslator, DataSource
import re
import bibtexparser
from PyOpenWorm.evidence import Evidence


def listify_one(record, name):
    if not isinstance(record[name], (list, tuple)):
        record[name] = [record[name]]
    elif isinstance(record[name], tuple):
        record[name] = list(record[name])
    return record


def listify(record):
    # Since some items can be multiples, it simplifies code in most places to
    # just make everything a list, even if it cannot appear more than once in
    # the properly formatted record.
    for val in record:
        if val not in ('ID', 'ENTRYTYPE'):
            listify_one(record, val)
    return record


[docs]def doi(record): """ :param record: the record. :type record: dict :returns: dict -- the modified record. """ doi = record.get('doi') if doi is not None: if 'link' not in record: record['link'] = [] for item in record['link']: if 'doi' in item: break else: # no break if not isinstance(doi, (list, tuple)): doi = [doi] for link in doi: if link.startswith('10'): link = 'http://dx.doi.org/' + link record['link'].append(link) return record
[docs]def author(record): """ Split author field by 'and' into a list of names. :param record: the record. :type record: dict :returns: dict -- the modified record. """ if "author" in record: if record["author"]: record["author"] = [i.strip() for i in record["author"].replace('\n', ' ').split(" and ")] else: del record["author"] return record
HOWPUB_URL_RE = re.compile(r'\\url{([^}]+)}') def note_url(record): note = record.get('note') if note is not None: for n in note: for u in HOWPUB_URL_RE.finditer(n): url = record.get('url') if url is None: record['url'] = [u.group(1)] else: listify_one(record, 'url')['url'].append(u.group(1)) return record def url(record): u = record.get('howpublished', '') md = HOWPUB_URL_RE.match(u) if md: v = record.get('url') if isinstance(v, tuple): record['url'] = list(v) if isinstance(v, list): v.append(md[1]) url = record.get('url') link = record.get('link') if url is None: if isinstance(link, tuple): record['url'] = list(link) elif isinstance(link, list): record['url'] = link elif link is not None: record['url'] = [link] return record if isinstance(url, tuple): url = list(url) record['url'] = url if isinstance(url, list): if isinstance(link, (list, tuple)): url.extend(link) elif link is not None: url.append(link) return record
[docs]def customizations(record): """Use some functions delivered by the library :param record: a record :returns: -- customized record """ return url(note_url(doi(listify(author(record)))))
[docs]def bibtex_to_document(bibtex_entry, context=None): """ Takes a single BibTeX entry and translates it into a Document object """ from PyOpenWorm.document import Document res = Document.contextualize(context)() update_document_with_bibtex(res, bibtex_entry) return res
def update_document_with_bibtex(document, bibtex_entry): document.set_key(bibtex_entry['ID']) for ath in bibtex_entry.get('author', tuple()): document.author(ath) fields = ['title', 'year', 'author', 'doi', ('link', 'uri')] for x in fields: if isinstance(x, tuple): key, prop = x else: prop = x key = x for m in bibtex_entry.get(key, ()): getattr(document, prop)(m) def make_default_bibtex_parser(): parser = bibtexparser.bparser.BibTexParser(common_strings=True) parser.customization = customizations return parser def loads(bibtex_string): parser = make_default_bibtex_parser() return bibtexparser.loads(bibtex_string, parser=parser) def load(bibtex_file): parser = make_default_bibtex_parser() return bibtexparser.load(bibtex_file, parser=parser) def load_from_file_named(file_name): with open(file_name) as bibtex_file: return load(bibtex_file) def parse_bibtex_into_documents(file_name, context=None): res = dict() bib_database = load_from_file_named(file_name) for entry in bib_database.entries: entry_id = entry['ID'] res[entry_id] = bibtex_to_document(entry, context) return res def parse_bibtex_into_evidence(file_name, context=None): return {k: Evidence.contextualize(context)(reference=v, supports=v.contextualize(context).as_context.rdf_object) for k, v in parse_bibtex_into_documents(file_name, context).items()}
[docs]class BibTexDataSource(DataSource): def __init__(self, bibtex_file_name, **kwargs): super(BibTexDataSource, self).__init__(**kwargs) self.bibtex_file_name = bibtex_file_name
[docs]class BibTexDataTranslator(DataTranslator): data_source_type = BibTexDataSource
[docs] def translate(data_source): evidences = parse_bibtex_into_evidence(data_source.bibtex_file_name) return evidences.values()