# -*- coding: utf-8 -*-
"""Module containing extra query classes to build attribute search queries."""
from owslib.fes2 import OgcExpression, Or, PropertyIsEqualTo, PropertyIsLike
[docs]
class PropertyInList(OgcExpression):
"""Filter expression to test whether a given property has one of the
values from a list.
Internally translates to an Or combination of PropertyIsEqualTo
expressions:
PropertyInList('methode', ['spade', 'spoelboring']) is equivalent to
Or([PropertyIsEqualTo('methode', 'spade'), PropertyIsEqualTo('methode',
'spoelboring')])
"""
def __init__(self, propertyname, lst):
"""Initialisation.
Parameters
----------
propertyname : str
Name of the attribute to query.
lst : list of str
List of literals to match against (exact matches).
Raises
------
ValueError
If the given list does not contain at least a single item.
"""
super(PropertyInList, self).__init__()
if not isinstance(lst, list) and not isinstance(lst, set):
raise ValueError('list should be of type "list" or "set"')
if len(set(lst)) < 1:
raise ValueError('list should contain at least a single item')
elif len(set(lst)) == 1:
self.query = PropertyIsEqualTo(propertyname, set(lst).pop())
else:
self.query = Or(
[PropertyIsEqualTo(propertyname, i) for i in sorted(set(lst))])
[docs]
def toXML(self):
"""Return the XML representation of the PropertyInList query.
Returns
-------
xml : etree.ElementTree
XML representation of the PropertyInList
"""
return self.query.toXML()
[docs]
class PropertyLikeList(OgcExpression):
"""Filter expression to test whether a given property is like one of the
values from a list.
Internally translates to an Or combination of PropertyIsLike
expressions:
PropertyLikeList('methode', ['spade', 'spoelboring'], '%{item}%') is
equivalent to
Or([PropertyIsLike('methode', '%spade%'), PropertyIsLike('methode',
'%spoelboring%')])
"""
def __init__(self, propertyname, lst, modifier='%{item}%'):
"""Initialisation.
Parameters
----------
propertyname : str
Name of the attribute to query.
lst : list of str
List of item literals to match against.
modifier : str
Optional, modifier to apply to the lst items when constructing the
query. You can use the string '{item}' in it which will be replaced
by the lst item.
Raises
------
ValueError
If the given list does not contain at least a single item.
If the modifier is not of type str.
"""
super(PropertyLikeList, self).__init__()
if not isinstance(lst, list) and not isinstance(lst, set):
raise ValueError('list should be of type "list" or "set"')
if not isinstance(modifier, str):
raise ValueError('modifier should be of type "str"')
if len(set(lst)) < 1:
raise ValueError('list should contain at least a single item')
elif len(set(lst)) == 1:
self.query = PropertyIsLike(
propertyname, modifier.format(item=set(lst).pop()))
else:
self.query = Or(
[PropertyIsLike(propertyname, modifier.format(item=i)) for i
in sorted(set(lst))])
[docs]
def toXML(self):
"""Return the XML representation of the PropertyInList query.
Returns
-------
xml : etree.ElementTree
XML representation of the PropertyInList
"""
return self.query.toXML()
[docs]
class AbstractJoin:
"""Abstract base class for the Join classes."""
@staticmethod
def _is_iterable_type(dataframe, column):
"""Check if the first element in a specified column of a dataframe is
an iterable type (list or set).
Parameters
----------
dataframe : pandas.DataFrame
A pandas DataFrame containing the data.
column : str
The name of the column to check.
Returns
-------
bool
True if the first element of the column is a list or set, False
otherwise.
Raises
------
ValueError
If the input dataframe is empty.
"""
if len(dataframe) < 1:
raise ValueError("dataframe should not be empty")
return isinstance(dataframe[column].iloc[0], list) or \
isinstance(dataframe[column].iloc[0], set) or \
isinstance(dataframe[column].iloc[0], tuple)
@staticmethod
def _get_unique_value_list(dataframe, column):
"""Retrieve a list of unique values from a specified column in a pandas
DataFrame. If the values are iterable (list or set), it aggregates
them.
Parameters
----------
dataframe : pandas.DataFrame
A pandas DataFrame containing the data.
column : str
The name of the column to process.
Returns
-------
list
A list of unique values from the specified column, possibly
aggregating iterable values.
Raises
------
ValueError
If the input dataframe is empty.
"""
if AbstractJoin._is_iterable_type(dataframe, column):
value_list = dataframe[column].dropna().aggregate('sum')
return list(set(value_list))
return list(dataframe[column].dropna().unique())
[docs]
class Join(AbstractJoin, PropertyInList):
"""Filter expression to join different searches together.
Internally translates to a PropertyInList:
Join(df, 'pkey_boring') is equivalent to
PropertyInList('pkey_boring', list(df['pkey_boring')) which is
equivalent to
Or([PropertyIsEqualTo('pkey_boring', x), PropertyIsEqualTo(
'pkey_boring', y), ...]) for every x, y, in df['pkey_boring']
"""
def __init__(self, dataframe, on, using=None):
"""Initialisation.
Parameters
----------
dataframe : pandas.DataFrame
Dataframe to use a basis for joining.
on : str
Name of the column in the queried datatype to join on.
using : str, optional
Name of the column in the dataframe to use for joining. By
default, the same column name as in `on` is assumed.
Raises
------
ValueError
If the `using` column is not present in the dataframe.
If `using` is None and the `on` column is not present in the
dataframe.
If the dataframe does not contain at least a single non-null value
in the `using` column.
"""
if using is None:
using = on
if using not in list(dataframe):
raise ValueError(
"column '{}' should be present in the dataframe.".format(
using))
value_list = self._get_unique_value_list(dataframe, using)
if len(set(value_list)) < 1:
raise ValueError("dataframe should contain at least a single "
"value in column '{}'.".format(using))
super(Join, self).__init__(on, value_list)
[docs]
class FuzzyJoin(AbstractJoin, PropertyLikeList):
"""Filter expression to join different searches together in a fuzzy
(non-exact) way.
Internally translates to a PropertyLikeList:
FuzzyJoin(df, 'pkey_boring', modifier='%|{item}|%') is equivalent to
PropertyLikeList('pkey_boring', list(df['pkey_boring'),
modifier='%|{item}|%')
which is equivalent to
Or([PropertyIsLike('pkey_boring', '%|x|%'), PropertyIsLike(
'pkey_boring', '%|y|%'), ...]) for every x, y, in df['pkey_boring']
"""
def __init__(self, dataframe, on, using=None, modifier='%|{item}|%'):
"""Initialisation.
Parameters
----------
dataframe : pandas.DataFrame
Dataframe to use a basis for joining.
on : str
Name of the column in the queried datatype to join on.
using : str, optional
Name of the column in the dataframe to use for joining. By
default, the same column name as in `on` is assumed.
modifier : str, optional, defaults to `'%|{item}|%'`
Optional, modifier to apply to the dataframe items when
constructing the query. You can use the string '{item}' in it which
will be replaced by the dataframe item.
Raises
------
ValueError
If the `using` column is not present in the dataframe.
If `using` is None and the `on` column is not present in the
dataframe.
If the dataframe does not contain at least a single non-null value
in the `using` column.
"""
if using is None:
using = on
if using not in list(dataframe):
raise ValueError(
"column '{}' should be present in the dataframe.".format(
using))
value_list = self._get_unique_value_list(dataframe, using)
if len(set(value_list)) < 1:
raise ValueError("dataframe should contain at least a single "
"value in column '{}'.".format(using))
super(FuzzyJoin, self).__init__(on, value_list, modifier)