import warnings
from dataclasses import dataclass
from pydov.util.dovutil import (
build_dov_sparql_request, get_remote_url, get_remote_request)
from pydov.util.errors import CodelistFetchWarning, RemoteFetchError
from pydov.util.hooks import HookRunner
from owslib.etree import etree
from pydov.util.notebook import HtmlFormatter
from pydov.util.owsutil import typeconvert
from pydov.util.wrappers import AbstractDictLike
[docs]
class MemoryCache(object):
"""Simple cache to save objects in memory.
Used here to avoid downloading the same codelists twice during the
runtime of a pydov session.
"""
cache = {}
[docs]
@staticmethod
def get(key, fn, *args, **kwargs):
"""Retrieve an object from the in-memory cache or generate it if not
present.
This method checks if the provided `key` is present in the cache. If
the key is not found, it calls the provided `fn` function with the
given `*args` and `**kwargs`, and stores the result in the cache under
the `key`.
If the key is found in the cache, the method simply returns the cached
object.
Returns
-------
object
The object retrieved from the cache or generated by the `fn`
function.
"""
if key not in MemoryCache.cache:
MemoryCache.cache[key] = fn(*args, **kwargs)
return MemoryCache.cache.get(key)
[docs]
@staticmethod
def clear():
"""Clear the memory cache.
This method clears the memory cache, removing all cached data.
"""
MemoryCache.cache.clear()
[docs]
@dataclass
class CodeListItem(HtmlFormatter):
"""Class to represent an item in a codelist.
Attributes
----------
code : str
The code of the item.
label : str
The label of the item.
definition : str, optional
The definition of the item.
"""
code: str
label: str
definition: str = None
def __repr__(self):
"""String representation of the codelist item.
Returns
-------
str
String representation of the codelist item.
"""
s = (f'code: {self.code}, label: {self.label}, '
f'definition: {self.definition}')
return f'<pydov.util.codelists.CodeListItem: {s}>'
def _repr_html_(self):
"""HTML representation of the codelist item.
Returns
-------
str
HTML representation of the codelist item.
"""
if self.definition is not None:
html = (f'<p><b>{self.code}</b> - {self.label} - '
f'<i>{self.definition}</i></p>')
else:
html = f'<p><b>{self.code}</b> - {self.label}</p>'
return super()._repr_html_(html, with_header=False)
[docs]
class AbstractCodeList(AbstractDictLike, HtmlFormatter):
"""Abstract base class for codelists."""
def __init__(self):
"""Initialisation."""
self.items = {}
super().__init__(self.items)
[docs]
def get_label(self, code):
"""Get the label for a given code.
Parameters
----------
code : str
The code to get the label for.
Returns
-------
label : str or None
The label for the given code, or None if the code is not found.
"""
item = self.items.get(code, None)
if item is not None:
return item.label
[docs]
def get_definition(self, code):
"""Get the definition for a given code.
Parameters
----------
code : str
The code to get the definition for.
Returns
-------
definition : str or None
The definition for the given code, or None if the code is not
found.
"""
item = self.items.get(code, None)
if item is not None:
return item.definition
[docs]
def get_codelist(self):
"""Get the codelist.
Returns
-------
codelist : AbstractCodeList
The codelist itself.
"""
return self
[docs]
def get(self, *args, **kwargs):
"""Get an item from the codelist.
Parameters
----------
*args
Arguments to pass to the underlying dictionary get method.
**kwargs
Keyword arguments to pass to the underlying dictionary get method.
Returns
-------
item : CodeListItem or None
The item for the given code, or None if the code is not found.
"""
return self.items.get(*args, **kwargs)
[docs]
def get_values(self):
"""Get the values of the codelist.
Returns
-------
values : dict
A dictionary with the codes as keys and the labels as values.
"""
if len(self.items) > 0:
return {i.code: i.label for i in self.items.values()}
return None
[docs]
def add_item(self, item):
"""Add an item to the codelist.
Parameters
----------
item : CodeListItem
The item to add to the codelist.
"""
self.items[item.code] = item
[docs]
def add_items(self, items):
"""Add multiple items to the codelist.
Parameters
----------
items : list of CodeListItem
The items to add to the codelist.
"""
for item in items:
self.add_item(item)
[docs]
def is_empty(self):
"""Check if the codelist is empty.
Returns
-------
is_empty : bool
True if the codelist is empty, False otherwise.
"""
return len(self.items) == 0
def __repr__(self):
"""String representation of the codelist.
Returns
-------
str
String representation of the codelist.
"""
s = ', '.join(i.__repr__() for i in sorted(
self.items.values(), key=lambda x: x.code))
return f'<pydov.util.codelists.AbstractCodeList: {s}>'
def _repr_html_(self):
"""HTML representation of the codelist.
Returns
-------
str
HTML representation of the codelist.
"""
s = ''.join(i._repr_html_() for i in sorted(
self.items.values(), key=lambda x: x.code))
return super()._repr_html_(s)
[docs]
class AbstractResolvableCodeList(AbstractCodeList):
"""Abstract base class for resolvable codelists.
A resolvable codelist will load its values from another datasource.
"""
def __init__(self, datatype):
"""Initialisation.
Parameters
----------
datatype : str
The datatype of the codes of the codelist. One of
`string`, `float`, `integer`, `date`, `datetime`, `boolean`.
"""
super().__init__()
self.datatype = datatype
self._is_resolved = False
[docs]
def get_id(self):
"""Get a unique id for this codelist. Needs to be unique among all
codelists and will be used for caching.
Raises
------
NotImplementedError
This is an abstract method that should be implemented in a
subclass.
"""
raise NotImplementedError
[docs]
def get_remote_codelist(self):
"""Fetch the remote codelist.
Returns
-------
codelist : any
Codelist in a format that can be parsed by the
parse_codelist_items method.
Raises
------
NotImplementedError
This is an abstract method that should be implemented in a
subclass.
"""
raise NotImplementedError
[docs]
def parse_codelist_items(self, codelist):
"""Parse the codelist items from the fetched codelist.
Parameters
----------
codelist : any
Codelist to parse.
Yields
------
item : CodelistItem
A CodelistItem for each item in the codelist.
Raises
------
NotImplementedError
This is an abstract method that should be implemented in a
subclass.
"""
raise NotImplementedError
[docs]
def resolve(self):
"""Resolve the remote codelist values."""
if not self._is_resolved:
codelist = self.get_remote_codelist()
for item in self.parse_codelist_items(codelist):
self.add_item(item)
self._is_resolved = True
[docs]
def is_empty(self):
self.resolve()
return super().is_empty()
[docs]
def get_codelist(self):
self.resolve()
return super().get_codelist()
[docs]
def get_values(self):
self.resolve()
return super().get_values()
[docs]
def get_label(self, code):
self.resolve()
return super().get_label(code)
[docs]
def get_definition(self, code):
self.resolve()
return super().get_definition(code)
[docs]
class OsloCodeList(AbstractResolvableCodeList):
"""Class representing a codelist in the OSLO linked data format."""
def __init__(self, conceptscheme, datatype):
"""Initialisation.
Parameters
----------
conceptscheme : str
OSLO conceptscheme which will be used for this codelist
datatype : str
The datatype of the codes of the codelist. One of
`string`, `float`, `integer`, `date`, `datetime`, `boolean`.
"""
super().__init__(datatype)
self.conceptscheme = conceptscheme
self._codelist = None
[docs]
def get_id(self):
return f'{self.conceptscheme}.xml'
[docs]
def build_sparql_query(self):
"""Build the SPARQL query to fetch the codelist from DOV.
Returns
-------
query : str
The SPARQL query to fetch the codelist.
"""
return """
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX conceptscheme:
<https://data.bodemenondergrond.vlaanderen.be/id/conceptscheme/>
SELECT ?code ?label ?definition
WHERE {{
?s skos:inScheme conceptscheme:{} .
?s skos:notation ?code .
?s skos:prefLabel ?label .
OPTIONAL {{ ?s skos:note ?definition . }}
}}
""".format(self.conceptscheme)
[docs]
def get_remote_codelist(self):
request = build_dov_sparql_request(self.build_sparql_query())
response = HookRunner.execute_inject_meta_response(request.url)
if response is None:
try:
response = MemoryCache.get(
self.get_id(), get_remote_request, request)
except RemoteFetchError:
warnings.warn(
"Failed to fetch remote sparql data, metadata will "
"be incomplete.", CodelistFetchWarning)
response = None
HookRunner.execute_meta_received(request.url, response)
return response
[docs]
def parse_codelist_items(self, codelist):
if codelist is not None:
tree = etree.fromstring(codelist)
tree_solutions = tree.findall(
'.//{http://www.w3.org/2005/sparql-results#}solution'
)
for s in tree_solutions:
code = s.find(
'.//{http://www.w3.org/2005/sparql-results#}binding['
'{http://www.w3.org/2005/sparql-results#}variable="code"]'
'/{http://www.w3.org/2005/sparql-results#}value').text
label = s.find(
'.//{http://www.w3.org/2005/sparql-results#}binding['
'{http://www.w3.org/2005/sparql-results#}variable="label"]'
'/{http://www.w3.org/2005/sparql-results#}value').text
definition = s.find(
'.//{http://www.w3.org/2005/sparql-results#}binding['
'{http://www.w3.org/2005/sparql-results#}'
'variable="definition"]'
'/{http://www.w3.org/2005/sparql-results#}value')
if definition is not None:
definition = definition.text
yield CodeListItem(code, label, definition)
[docs]
class XsdType(AbstractResolvableCodeList):
"""Class representing a codelist in the XSD format."""
def __init__(self, xsd_schema, typename, datatype):
"""Initialise a XSD type reference.
Parameters
----------
xsd_schema : str
URL of XSD schema record containing the specified typename.
typename : str
Name of the type.
datatype : str
The datatype of the codes of the codelist. One of
`string`, `float`, `integer`, `date`, `datetime`, `boolean`.
"""
super().__init__(datatype)
self.source_url = xsd_schema
self.typename = typename
self._schema = None
[docs]
def get_id(self):
return self.source_url.split('/')[-1]
[docs]
def get_remote_codelist(self):
response = HookRunner.execute_inject_meta_response(self.source_url)
if response is None:
try:
response = MemoryCache.get(
self.get_id(), get_remote_url, self.source_url)
except RemoteFetchError:
warnings.warn(
"Failed to fetch remote codelist, metadata will "
"be incomplete.", CodelistFetchWarning)
response = None
HookRunner.execute_meta_received(self.source_url, response)
return response
[docs]
def parse_codelist_items(self, codelist):
if codelist is not None:
tree = etree.fromstring(codelist)
tree_values = tree.findall(
'.//{{http://www.w3.org/2001/XMLSchema}}simpleType['
'@name="{}"]/'
'{{http://www.w3.org/2001/XMLSchema}}restriction/'
'{{http://www.w3.org/2001/XMLSchema}}enumeration'.format(
self.typename))
for e in tree_values:
code = typeconvert(
e.get('value'), self.datatype)
label = str(code)
definition = e.findtext(
'./{http://www.w3.org/2001/XMLSchema}annotation/{'
'http://www.w3.org/2001/XMLSchema}documentation')
yield CodeListItem(code, label, definition)
[docs]
class FeatureCatalogueValues(AbstractCodeList):
"""Class representing a codelist from feature catalogue values."""