Skip to content
Open
5 changes: 5 additions & 0 deletions fair_eva/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
#!/usr/bin/env python3

import argparse
import os.path

import connexion
from connexion.resolver import RestyResolver

import fair_eva

app_dirname = os.path.dirname(fair_eva.__file__)


def set_parser():
parser = argparse.ArgumentParser(description="FAIR EVA API server")
Expand Down
454 changes: 439 additions & 15 deletions fair_eva/api/evaluator.py

Large diffs are not rendered by default.

20 changes: 15 additions & 5 deletions fair_eva/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,13 +881,23 @@ def resolve_handle(handle_id):
return resolves, msg, values


def check_link(address):
def check_link(address, return_http_code=False):
resolves = False
req = urllib.request.Request(url=address)
resp = urllib.request.urlopen(req)
if resp.status in [400, 404, 403, 408, 409, 501, 502, 503]:
return False
try:
resp = urllib.request.urlopen(req, timeout=15)
except urllib.error.URLError as e:
logging.warning("Timeout reached while trying to connect to '%s'" % address)
except urllib.error.HTTPError as e:
logging.warning("Could not access to resource: %s" % address)
else:
return True
http_code = resp.status
logging.debug("Returned HTTP status from '%s': %s" % (address, http_code))
if return_http_code:
return http_code
if http_code not in ["400", "404", "403", "408", "409", "501", "502", "503"]:
resolves = True
return resolves


def get_protocol_scheme(url):
Expand Down
227 changes: 227 additions & 0 deletions fair_eva/api/vocabulary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
import ast
import json
import logging
import os

import requests

from fair_eva import app_dirname

logger = logging.getLogger("plugin.py")


class VocabularyConnection:
def __init__(self, **config_items):
self.vocabulary_name = config_items.get("vocabulary_name", "")
self.enable_remote_check = ast.literal_eval(
config_items.get("enable_remote_check", "True")
)
self.remote_path = config_items.get("remote_path", "")
self.remote_username = config_items.get("remote_username", "")
self.remote_password = config_items.get("remote_password", "")
self.local_path = config_items.get("local_path", "")
self.local_path_full = ""

def _get_token(self):
return NotImplementedError

def _login(self):
return NotImplementedError

def _remote_collect(self):
"""Performs the remote call to the vocabulary registry.

It shall return a tuple (error_on_request, content), where 'error_on_request' is
a boolean and 'content' is the actual content returned by the request when
successful.
"""
raise NotImplementedError

def _local_collect(self):
raise NotImplementedError

def collect(self, search_item=None, perform_login=False):
content = []
# Get content from remote endpoint
error_on_request = False
if self.enable_remote_check:
logger.debug(
"Accessing vocabulary '%s' remotely through %s"
% (self.name, self.remote_path)
)
error_on_request, content = self._remote_collect()
# Get content from local cache
if not self.enable_remote_check or error_on_request:
logger.debug(
"Accessing vocabulary '%s' from local cache: %s"
% (self.name, self.local_path)
)
self.local_path_full = os.path.join(app_dirname, self.local_path)
logger.debug("Full path to local cache: %s" % self.local_path_full)
content = self._local_collect()

return content


class IANAMediaTypes(VocabularyConnection):
def __init__(self, config):
self.name = "IANA Media Types"
self._config_items = dict(config.items("vocabularies:iana_media_types"))

def _parse_xml(self, from_file=False, from_string=""):
property_key_xml = self._config_items.get(
"property_key_xml", "{http://www.iana.org/assignments}file"
)
logger.debug(
"Using XML property key '%s' to gather the list of media types"
% property_key_xml
)

import xml.etree.ElementTree as ET

tree = None
if from_file:
tree = ET.parse(self.local_path_full)
root = tree.getroot()
elif from_string:
root = ET.fromstring(from_string)
else:
logger.error("Could not get IANA Media Types from %s" % self.remote_path)
return []

media_types_list = [
media_type.text for media_type in root.iter(property_key_xml)
]
logger.debug("Found %s items for IANA media types" % len(media_types_list))

return media_types_list

def _remote_collect(self):
error_on_request = False
content = []
headers = {"Content-Type": "application/xml"}
response = requests.request("GET", self.remote_path, headers=headers)
if response.ok:
content = response.text
media_types_list = self._parse_xml(from_string=content)
if not media_types_list:
error_on_request = True
else:
error_on_request = True

return error_on_request, content

def _local_collect(self):
return self._parse_xml(from_file=True)

def collect(self):
super().__init__(**self._config_items)
content = super().collect()

return content


class FAIRsharingRegistry(VocabularyConnection):
def __init__(self, config):
self.name = "FAIRsharing registry"
self._config_items = dict(config.items("vocabularies:fairsharing"))

def _login(self):
url_api_login = "https://api.fairsharing.org/users/sign_in"
payload = {
"user": {"login": self.remote_username, "password": self.remote_password}
}
login_headers = {
"Accept": "application/json",
"Content-Type": "application/json",
}
response = requests.request(
"POST", url_api_login, headers=login_headers, data=json.dumps(payload)
)
# Get the JWT from the response.text to use in the next part.
headers = {}
if response.ok:
data = response.json()
token = data["jwt"]
logger.debug("Get token from FAIRsharing API: %s" % token)
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Bearer {0}".format(token),
}
else:
logger.warning(
"Could not get token from FAIRsharing API: %s" % response.text
)

return headers

def _remote_collect(self):
error_on_request = False
content = []
if not (self.remote_username and self.remote_password):
logger.error(
"Could not get required 'username' and 'password' properties for accessing FAIRsharing registry API"
)
else:
headers = self._login()
logger.debug("Got headers from sign in process: %s" % headers)
response = requests.request("POST", self.remote_path, headers=headers)
if response.ok:
content = response.json().get("data", [])
if content:
logger.debug(
"Successfully returned %s items from search query: %s"
% (len(content), self.remote_path)
)
else:
error_on_request = True
else:
logger.warning(
"Failed to obtain records from endpoint: %s" % response.text
)
error_on_request = True

return error_on_request, content

def _local_collect(self):
with open(self.local_path, "r") as f:
content = json.load(f).get("data", [])
logger.debug("Successfully loaded local cache: %s" % content)

return content

def collect(self, search_topic):
# Set specific query parameters for remote requests
remote_path = self._config_items.get("remote_path", "")
if not remote_path:
logger.warning(
"Could not get FAIRsharing API endpoint from configuration (check 'remote_path' property)"
)
else:
query_parameter = "q=%s" % search_topic
remote_path_with_query = "?page[size]=2500&".join(
[remote_path, query_parameter]
)
self._config_items["remote_path"] = remote_path_with_query
logger.debug(
"Request URL to FAIRsharing API with search topic '%s': %s"
% (search_topic, self._config_items["remote_path"])
)
super().__init__(**self._config_items)
content = super().collect()

return content


class Vocabulary:
def __init__(self, config):
self.config = config

def get_iana_media_types(self):
vocabulary = IANAMediaTypes(self.config)
return vocabulary.collect()

def get_fairsharing(self, search_topic):
vocabulary = FAIRsharingRegistry(self.config)
return vocabulary.collect(search_topic=search_topic)
Loading