Source code for pydov.util.codelists

import warnings

from dataclasses import dataclass

from pydov.util.dovutil import (
    build_dov_sparql_request, get_remote_url, get_remote_request)
from pydov.util.errors import CodelistFetchWarning, RemoteFetchError
from pydov.util.hooks import HookRunner

from owslib.etree import etree

from pydov.util.notebook import HtmlFormatter
from pydov.util.owsutil import typeconvert
from pydov.util.wrappers import AbstractDictLike


[docs] class MemoryCache(object): """Simple cache to save objects in memory. Used here to avoid downloading the same codelists twice during the runtime of a pydov session. """ cache = {}
[docs] @staticmethod def get(key, fn, *args, **kwargs): """Retrieve an object from the in-memory cache or generate it if not present. This method checks if the provided `key` is present in the cache. If the key is not found, it calls the provided `fn` function with the given `*args` and `**kwargs`, and stores the result in the cache under the `key`. If the key is found in the cache, the method simply returns the cached object. Returns ------- object The object retrieved from the cache or generated by the `fn` function. """ if key not in MemoryCache.cache: MemoryCache.cache[key] = fn(*args, **kwargs) return MemoryCache.cache.get(key)
[docs] @staticmethod def clear(): """Clear the memory cache. This method clears the memory cache, removing all cached data. """ MemoryCache.cache.clear()
[docs] @dataclass class CodeListItem(HtmlFormatter): """Class to represent an item in a codelist. Attributes ---------- code : str The code of the item. label : str The label of the item. definition : str, optional The definition of the item. """ code: str label: str definition: str = None def __repr__(self): """String representation of the codelist item. Returns ------- str String representation of the codelist item. """ s = (f'code: {self.code}, label: {self.label}, ' f'definition: {self.definition}') return f'<pydov.util.codelists.CodeListItem: {s}>' def _repr_html_(self): """HTML representation of the codelist item. Returns ------- str HTML representation of the codelist item. """ if self.definition is not None: html = (f'<p><b>{self.code}</b> - {self.label} - ' f'<i>{self.definition}</i></p>') else: html = f'<p><b>{self.code}</b> - {self.label}</p>' return super()._repr_html_(html, with_header=False)
[docs] class AbstractCodeList(AbstractDictLike, HtmlFormatter): """Abstract base class for codelists.""" def __init__(self): """Initialisation.""" self.items = {} super().__init__(self.items)
[docs] def get_label(self, code): """Get the label for a given code. Parameters ---------- code : str The code to get the label for. Returns ------- label : str or None The label for the given code, or None if the code is not found. """ item = self.items.get(code, None) if item is not None: return item.label
[docs] def get_definition(self, code): """Get the definition for a given code. Parameters ---------- code : str The code to get the definition for. Returns ------- definition : str or None The definition for the given code, or None if the code is not found. """ item = self.items.get(code, None) if item is not None: return item.definition
[docs] def get_codelist(self): """Get the codelist. Returns ------- codelist : AbstractCodeList The codelist itself. """ return self
[docs] def get(self, *args, **kwargs): """Get an item from the codelist. Parameters ---------- *args Arguments to pass to the underlying dictionary get method. **kwargs Keyword arguments to pass to the underlying dictionary get method. Returns ------- item : CodeListItem or None The item for the given code, or None if the code is not found. """ return self.items.get(*args, **kwargs)
[docs] def get_values(self): """Get the values of the codelist. Returns ------- values : dict A dictionary with the codes as keys and the labels as values. """ if len(self.items) > 0: return {i.code: i.label for i in self.items.values()} return None
[docs] def add_item(self, item): """Add an item to the codelist. Parameters ---------- item : CodeListItem The item to add to the codelist. """ self.items[item.code] = item
[docs] def add_items(self, items): """Add multiple items to the codelist. Parameters ---------- items : list of CodeListItem The items to add to the codelist. """ for item in items: self.add_item(item)
[docs] def is_empty(self): """Check if the codelist is empty. Returns ------- is_empty : bool True if the codelist is empty, False otherwise. """ return len(self.items) == 0
def __repr__(self): """String representation of the codelist. Returns ------- str String representation of the codelist. """ s = ', '.join(i.__repr__() for i in sorted( self.items.values(), key=lambda x: x.code)) return f'<pydov.util.codelists.AbstractCodeList: {s}>' def _repr_html_(self): """HTML representation of the codelist. Returns ------- str HTML representation of the codelist. """ s = ''.join(i._repr_html_() for i in sorted( self.items.values(), key=lambda x: x.code)) return super()._repr_html_(s)
[docs] class AbstractResolvableCodeList(AbstractCodeList): """Abstract base class for resolvable codelists. A resolvable codelist will load its values from another datasource. """ def __init__(self, datatype): """Initialisation. Parameters ---------- datatype : str The datatype of the codes of the codelist. One of `string`, `float`, `integer`, `date`, `datetime`, `boolean`. """ super().__init__() self.datatype = datatype self._is_resolved = False
[docs] def get_id(self): """Get a unique id for this codelist. Needs to be unique among all codelists and will be used for caching. Raises ------ NotImplementedError This is an abstract method that should be implemented in a subclass. """ raise NotImplementedError
[docs] def get_remote_codelist(self): """Fetch the remote codelist. Returns ------- codelist : any Codelist in a format that can be parsed by the parse_codelist_items method. Raises ------ NotImplementedError This is an abstract method that should be implemented in a subclass. """ raise NotImplementedError
[docs] def parse_codelist_items(self, codelist): """Parse the codelist items from the fetched codelist. Parameters ---------- codelist : any Codelist to parse. Yields ------ item : CodelistItem A CodelistItem for each item in the codelist. Raises ------ NotImplementedError This is an abstract method that should be implemented in a subclass. """ raise NotImplementedError
[docs] def resolve(self): """Resolve the remote codelist values.""" if not self._is_resolved: codelist = self.get_remote_codelist() for item in self.parse_codelist_items(codelist): self.add_item(item) self._is_resolved = True
[docs] def is_empty(self): self.resolve() return super().is_empty()
[docs] def get_codelist(self): self.resolve() return super().get_codelist()
[docs] def get_values(self): self.resolve() return super().get_values()
[docs] def get_label(self, code): self.resolve() return super().get_label(code)
[docs] def get_definition(self, code): self.resolve() return super().get_definition(code)
[docs] class OsloCodeList(AbstractResolvableCodeList): """Class representing a codelist in the OSLO linked data format.""" def __init__(self, conceptscheme, datatype): """Initialisation. Parameters ---------- conceptscheme : str OSLO conceptscheme which will be used for this codelist datatype : str The datatype of the codes of the codelist. One of `string`, `float`, `integer`, `date`, `datetime`, `boolean`. """ super().__init__(datatype) self.conceptscheme = conceptscheme self._codelist = None
[docs] def get_id(self): return f'{self.conceptscheme}.xml'
[docs] def build_sparql_query(self): """Build the SPARQL query to fetch the codelist from DOV. Returns ------- query : str The SPARQL query to fetch the codelist. """ return """ PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX conceptscheme: <https://data.bodemenondergrond.vlaanderen.be/id/conceptscheme/> SELECT ?code ?label ?definition WHERE {{ ?s skos:inScheme conceptscheme:{} . ?s skos:notation ?code . ?s skos:prefLabel ?label . OPTIONAL {{ ?s skos:note ?definition . }} }} """.format(self.conceptscheme)
[docs] def get_remote_codelist(self): request = build_dov_sparql_request(self.build_sparql_query()) response = HookRunner.execute_inject_meta_response(request.url) if response is None: try: response = MemoryCache.get( self.get_id(), get_remote_request, request) except RemoteFetchError: warnings.warn( "Failed to fetch remote sparql data, metadata will " "be incomplete.", CodelistFetchWarning) response = None HookRunner.execute_meta_received(request.url, response) return response
[docs] def parse_codelist_items(self, codelist): if codelist is not None: tree = etree.fromstring(codelist) tree_solutions = tree.findall( './/{http://www.w3.org/2005/sparql-results#}solution' ) for s in tree_solutions: code = s.find( './/{http://www.w3.org/2005/sparql-results#}binding[' '{http://www.w3.org/2005/sparql-results#}variable="code"]' '/{http://www.w3.org/2005/sparql-results#}value').text label = s.find( './/{http://www.w3.org/2005/sparql-results#}binding[' '{http://www.w3.org/2005/sparql-results#}variable="label"]' '/{http://www.w3.org/2005/sparql-results#}value').text definition = s.find( './/{http://www.w3.org/2005/sparql-results#}binding[' '{http://www.w3.org/2005/sparql-results#}' 'variable="definition"]' '/{http://www.w3.org/2005/sparql-results#}value') if definition is not None: definition = definition.text yield CodeListItem(code, label, definition)
[docs] class XsdType(AbstractResolvableCodeList): """Class representing a codelist in the XSD format.""" def __init__(self, xsd_schema, typename, datatype): """Initialise a XSD type reference. Parameters ---------- xsd_schema : str URL of XSD schema record containing the specified typename. typename : str Name of the type. datatype : str The datatype of the codes of the codelist. One of `string`, `float`, `integer`, `date`, `datetime`, `boolean`. """ super().__init__(datatype) self.source_url = xsd_schema self.typename = typename self._schema = None
[docs] def get_id(self): return self.source_url.split('/')[-1]
[docs] def get_remote_codelist(self): response = HookRunner.execute_inject_meta_response(self.source_url) if response is None: try: response = MemoryCache.get( self.get_id(), get_remote_url, self.source_url) except RemoteFetchError: warnings.warn( "Failed to fetch remote codelist, metadata will " "be incomplete.", CodelistFetchWarning) response = None HookRunner.execute_meta_received(self.source_url, response) return response
[docs] def parse_codelist_items(self, codelist): if codelist is not None: tree = etree.fromstring(codelist) tree_values = tree.findall( './/{{http://www.w3.org/2001/XMLSchema}}simpleType[' '@name="{}"]/' '{{http://www.w3.org/2001/XMLSchema}}restriction/' '{{http://www.w3.org/2001/XMLSchema}}enumeration'.format( self.typename)) for e in tree_values: code = typeconvert( e.get('value'), self.datatype) label = str(code) definition = e.findtext( './{http://www.w3.org/2001/XMLSchema}annotation/{' 'http://www.w3.org/2001/XMLSchema}documentation') yield CodeListItem(code, label, definition)
[docs] class FeatureCatalogueValues(AbstractCodeList): """Class representing a codelist from feature catalogue values."""