Source code for pydov.util.dovutil

# -*- coding: utf-8 -*-
"""Module grouping utility functions for DOV XML services."""
import os
import requests

from owslib.etree import etree

from pydov.util.errors import RemoteFetchError, XmlParseError
from pydov.util.hooks import HookRunner
from pydov.util.net import SessionFactory

import re

re_environment = re.compile(r'https://([^\.]+)\.dov\.vlaanderen\.be.*')


[docs] def build_dov_url(path): """Build the DOV url consisting of the fixed DOV base url, appended with the given path. Returns ------- str The absolute DOV url. """ if 'PYDOV_BASE_URL' in os.environ: base_url = os.environ['PYDOV_BASE_URL'].rstrip('/') + '/' else: base_url = 'https://www.dov.vlaanderen.be/' return base_url + path.lstrip('/')
[docs] def build_dov_sparql_request(query): """Build a request with the given SPARQL query for execution on the DOV SPARQL endpoint. Parameters ---------- query : str SPARQL query to execute. Returns ------- requests.Request Request prepared with the correct endpoint, parameters and headers to execute the SPARQL query. """ base_url = build_dov_url('') env = ('-' + re_environment.search(base_url).group(1)).replace( '-www', '') endpoint = f'https://data{env}.bodemenondergrond.vlaanderen.be/sparql' return requests.Request( method='GET', url=endpoint, params={'query': query}, headers={'Accept': 'application/rdf+xml'} )
[docs] def get_remote_url(url, session=None): """Request the URL from the remote service and return its contents. Parameters ---------- url : str URL to download. session : requests.Session Session to use to perform HTTP requests for data. Defaults to None, which means a new session will be created for each request. Returns ------- xml : bytes The raw XML data as bytes. """ if session is None: session = SessionFactory.get_session() request = session.get(url) if request.status_code != 200: raise RemoteFetchError("Failed to fetch data at {}".format(url)) request.encoding = 'utf-8' return request.text.encode('utf8')
[docs] def get_remote_request(request, session=None): """Prepare the request, execute it and return its contents. Parameters ---------- request : requests.Request Request to execute. session : requests.Session Session to use to perform HTTP requests for data. Defaults to None, which means a new session will be created for each request. Returns ------- xml : bytes The raw XML data as bytes. """ if session is None: session = SessionFactory.get_session() req = session.send(session.prepare_request(request)) if req.status_code != 200: raise RemoteFetchError("Failed to fetch data at {}".format( req.url)) req.encoding = 'utf-8' return req.text.encode('utf8')
[docs] def get_dov_xml(url, session=None): """Request the XML from the remote DOV webservices and return it. Parameters ---------- url : str URL of the DOV object to download. session : requests.Session Session to use to perform HTTP requests for data. Defaults to None, which means a new session will be created for each request. Returns ------- xml : bytes The raw XML data of this DOV object as bytes. """ response = HookRunner.execute_inject_xml_response(url) if response is None: response = get_remote_url(url, session) HookRunner.execute_xml_received(url, response) return response
[docs] def parse_dov_xml(xml_data): """Parse the given XML data into an ElementTree. Parameters ---------- xml_data : bytes The raw XML data of a DOV object as bytes. Returns ------- tree : etree.ElementTree Parsed XML tree of the DOV object. """ try: parser = etree.XMLParser( ns_clean=True, recover=True, resolve_entities=False) except TypeError: parser = etree.XMLParser() try: tree = etree.fromstring(xml_data, parser=parser) return tree except Exception: raise XmlParseError("Failed to parse XML record.")