diff --git a/.gitignore b/.gitignore index 73cc46a..1abf7b2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ *.pyc .idea -venv \ No newline at end of file +venv +attachments/* +*.zip +.DS_Store \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 92ff2e6..0e377e2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ LABEL maintainer="Specify Collections Consortium " RUN apt-get update && apt-get -y install --no-install-recommends \ ghostscript \ imagemagick \ - python3.6 \ + python3.12 \ python3-venv \ && apt-get clean && rm -rf /var/lib/apt/lists/* @@ -19,7 +19,7 @@ WORKDIR /home/specify COPY --chown=specify:specify requirements.txt . -RUN python3.6 -m venv ve && ve/bin/pip install --no-cache-dir -r requirements.txt +RUN python3.12 -m venv ve && ve/bin/pip install --no-cache-dir -r requirements.txt COPY --chown=specify:specify *.py views ./ diff --git a/manage_collection_dirs.py b/manage_collection_dirs.py index 2ca4d2d..742aa2a 100644 --- a/manage_collection_dirs.py +++ b/manage_collection_dirs.py @@ -1,63 +1,180 @@ -# This can be given either a single name or a list of names -# -# ```bash -# python manage_collection_dirs.py add geo_swiss -# ``` -# or -# ```bash -# python3 manage_collection_dirs.py remove geo_swiss naag mcsn -# ``` -# -# It creates new collection attachment directories. When a -# collection is removed, the directory and attachments remain. - +#!/usr/bin/env python3 import sys -import os import subprocess +import re +import boto3 +from urllib.parse import urlparse -def add_collection_dir(collection_dir_names): - # This creates a new directory for the collection - attachments_dir = 'attachments' - if not os.path.exists(attachments_dir): - os.mkdir(attachments_dir) - for collection_dir_name in collection_dir_names: - dir_path = f'{attachments_dir}/{collection_dir_name}' - if not os.path.exists(dir_path): - os.mkdir(dir_path) - with open("settings.py", "r+") as f: - lines = f.readlines() - for i, line in enumerate(lines): - if line.startswith("COLLECTION_DIRS = {"): - for collection_dir_name in collection_dir_names: - lines.insert(i+1, f" '{collection_dir_name}': '{collection_dir_name}',\n") - break - f.seek(0) - f.truncate() - f.writelines(lines) +SETTINGS_FILE = "settings.py" +SERVICE_NAME = "web-asset-server.service" # adjust if different -def remove_collection_dir(collection_dir_names): - with open("settings.py", "r+") as f: - lines = f.readlines() - for i, line in enumerate(lines): - for collection_dir_name in collection_dir_names: - if line.startswith(f" '{collection_dir_name}': '{collection_dir_name}',"): - lines.pop(i) - break - f.seek(0) - f.truncate() + +def load_settings_contents(): + with open(SETTINGS_FILE, "r") as f: + return f.readlines() + + +def write_settings_contents(lines): + with open(SETTINGS_FILE, "w") as f: f.writelines(lines) -if __name__ == "__main__": + +def parse_action_args(): if len(sys.argv) < 3: - print("Usage: python manage_collection_dirs.py add [ ...]") - print("Usage: python manage_collection_dirs.py remove [ ...]") + print("Usage:") + print(" python manage_collection_dirs.py add [ ...]") + print(" python manage_collection_dirs.py remove [ ...]") + sys.exit(1) + action = sys.argv[1] + args = sys.argv[2:] + if action == "add": + if len(args) % 2 != 0: + print("For add, provide pairs: ...") + sys.exit(1) + pairs = [(args[i], args[i+1]) for i in range(0, len(args), 2)] + return action, pairs + elif action == "remove": + names = args + return action, names else: - action = sys.argv[1] - collection_dir_names = sys.argv[2:] - if action == "add": - add_collection_dir(collection_dir_names) - elif action == "remove": - remove_collection_dir(collection_dir_names) + print("Invalid action. Use 'add' or 'remove'.") + sys.exit(1) + + +def ensure_valid_s3_uri(uri): + parsed = urlparse(uri) + return parsed.scheme == "s3" and parsed.netloc + + +def add_collections(pairs): + lines = load_settings_contents() + # find COLLECTION_S3_PATHS block + pattern = re.compile(r"^COLLECTION_S3_PATHS\s*=\s*{") + start_idx = None + for i, line in enumerate(lines): + if pattern.match(line): + start_idx = i + break + if start_idx is None: + print("Couldn't find COLLECTION_S3_PATHS definition in settings.py") + sys.exit(1) + + # find end of dict (matching closing brace) + end_idx = start_idx + brace_depth = 0 + for i in range(start_idx, len(lines)): + if "{" in lines[i]: + brace_depth += lines[i].count("{") + if "}" in lines[i]: + brace_depth -= lines[i].count("}") + if brace_depth == 0: + end_idx = i + break + # build existing entries map to avoid duplicates + existing = {} + for line in lines[start_idx+1:end_idx]: + m = re.match(r"\s*['\"]([^'\"]+)['\"]\s*:\s*['\"]([^'\"]+)['\"],?", line) + if m: + existing[m.group(1)] = m.group(2) + + # insert or update entries + insertion = [] + for coll, uri in pairs: + if not ensure_valid_s3_uri(uri): + print(f"Skipping invalid S3 URI for '{coll}': {uri}") + continue + if coll in existing: + print(f"Updating existing collection '{coll}' to '{uri}'") + # replace line in place later + for i in range(start_idx+1, end_idx): + if re.match(rf"\s*['\"]{re.escape(coll)}['\"]\s*:", lines[i]): + lines[i] = f" '{coll}': '{uri}',\n" + break else: - print("Invalid action. Use 'add' or 'remove'.") - subprocess.run(['systemctl', 'restart', 'web-asset-server.service']) + print(f"Adding collection '{coll}' -> '{uri}'") + insertion.append(f" '{coll}': '{uri}',\n") + + # inject new entries just before end_idx + if insertion: + lines = lines[:end_idx] + insertion + lines[end_idx:] + + write_settings_contents(lines) + + # create placeholder directories in S3 under originals/ and thumbnails/ + import settings as user_settings # reload after edit + s3 = boto3.client("s3") + for coll, uri in pairs: + if not ensure_valid_s3_uri(uri): + continue + bucket, base_prefix = parse_s3_uri(uri) + for sub in (user_settings.ORIG_DIR, user_settings.THUMB_DIR): + key_prefix = f"{base_prefix}/{sub}/" + # create a zero-byte object to ensure the prefix is visible (not strictly needed) + s3.put_object(Bucket=bucket, Key=key_prefix) + + +def remove_collections(names): + lines = load_settings_contents() + pattern = re.compile(r"^COLLECTION_S3_PATHS\s*=\s*{") + start_idx = None + for i, line in enumerate(lines): + if pattern.match(line): + start_idx = i + break + if start_idx is None: + print("Couldn't find COLLECTION_S3_PATHS in settings.py") + sys.exit(1) + + # locate end of dict + end_idx = start_idx + brace_depth = 0 + for i in range(start_idx, len(lines)): + if "{" in lines[i]: + brace_depth += lines[i].count("{") + if "}" in lines[i]: + brace_depth -= lines[i].count("}") + if brace_depth == 0: + end_idx = i + break + + # filter out lines for the named collections + new_block = [] + removed = [] + for line in lines[start_idx+1:end_idx]: + skip = False + for name in names: + if re.match(rf"\s*['\"]{re.escape(name)}['\"]\s*:", line): + skip = True + removed.append(name) + break + if not skip: + new_block.append(line) + + if not removed: + print("No matching collections to remove found.") + return + + # reconstruct file + new_lines = lines[: start_idx+1] + new_block + lines[end_idx:] + write_settings_contents(new_lines) + print(f"Removed collections: {', '.join(removed)}") + + +def parse_s3_uri(s3_uri): + parsed = urlparse(s3_uri) + if parsed.scheme != 's3' or not parsed.netloc: + raise ValueError(f"Invalid S3 URI: {s3_uri}") + bucket = parsed.netloc + prefix = parsed.path.lstrip('/').rstrip('/') + return bucket, prefix + + +if __name__ == "__main__": + action, payload = parse_action_args() + if action == "add": + add_collections(payload) + else: # remove + remove_collections(payload) + + # restart service + subprocess.run(["systemctl", "restart", SERVICE_NAME]) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5e0c9a9..291e29b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ ExifRead==2.3.1 Paste==3.4.4 -sh==1.14.0 +sh==2.0 Bottle>=0.12.23,<0.13 +boto3>=1.26.0,<2.0 +boto3-stubs>=1.26.0,<2.0 \ No newline at end of file diff --git a/server.py b/server.py index b906f66..7be7717 100644 --- a/server.py +++ b/server.py @@ -1,45 +1,45 @@ from collections import defaultdict, OrderedDict from functools import wraps -from glob import glob from mimetypes import guess_type -from os import path, mkdir, remove -from urllib.parse import quote -from urllib.request import pathname2url +from os import path +from urllib.parse import quote, urlparse +from io import BytesIO import exifread import hmac import json import time from sh import convert +import boto3 +from botocore.exceptions import ClientError import settings from bottle import ( Response, request, response, static_file, template, abort, - HTTPResponse, route) + HTTPResponse, route, redirect +) -def log(msg): - if settings.DEBUG: - print(msg) - +from bottle import error -def get_rel_path(coll, thumb_p): - """Return originals or thumbnails subdirectory of the main - attachments directory for the given collection. - """ - type_dir = settings.THUMB_DIR if thumb_p else settings.ORIG_DIR +@error(500) +def show_500(exc): + import traceback + tb = traceback.format_exc() + print(tb) # prints in your console + return "

500 Internal Server Error

" + tb + "
" - if settings.COLLECTION_DIRS is None: - return type_dir - try: - coll_dir = settings.COLLECTION_DIRS[coll] - except KeyError: - abort(404, "Unknown collection: %r" % coll) +# S3 client (shared) +s3 = boto3.client('s3') - return path.join(coll_dir, type_dir) +def log(msg): + print(msg) + # if getattr(settings, "DEBUG", False): + # print(msg) +### Token/Auth helpers (unchanged semantics) ### def generate_token(timestamp, filename): """Generate the auth token for the given filename and timestamp. This is for comparing to the client submited token. @@ -68,22 +68,19 @@ def validate_token(token_in, filename): """ if settings.KEY is None: return - if token_in == '': - raise TokenException("Auth token is missing.") - if ':' not in token_in: - raise TokenException("Auth token is malformed.") - - mac_in, timestr = token_in.split(':') + if not token_in or ':' not in token_in: + raise TokenException("Auth token is missing or malformed.") + mac_in, timestr = token_in.split(':', 1) try: timestamp = int(timestr) except ValueError: raise TokenException("Auth token is malformed.") - if settings.TIME_TOLERANCE is not None: current_time = get_timestamp() if not abs(current_time - timestamp) < settings.TIME_TOLERANCE: - raise TokenException("Auth token timestamp out of range: %s vs %s" % (timestamp, current_time)) - + raise TokenException( + "Auth token timestamp out of range: %s vs %s" % (timestamp, current_time) + ) if token_in != generate_token(timestamp, filename): raise TokenException("Auth token is invalid.") @@ -102,7 +99,6 @@ def require_token(filename_param, always=False): """ def decorator(func): - @include_timestamp @wraps(func) def wrapper(*args, **kwargs): if always or request.method not in ('GET', 'HEAD') or settings.REQUIRE_KEY_FOR_GET: @@ -112,11 +108,9 @@ def wrapper(*args, **kwargs): except TokenException as e: response.content_type = 'text/plain; charset=utf-8' response.status = 403 - return e + return str(e) return func(*args, **kwargs) - return wrapper - return decorator @@ -128,10 +122,9 @@ def include_timestamp(func): @wraps(func) def wrapper(*args, **kwargs): result = func(*args, **kwargs) - (result if isinstance(result, Response) else response) \ - .set_header('X-Timestamp', str(get_timestamp())) + target = result if isinstance(result, Response) else response + target.set_header('X-Timestamp', str(get_timestamp())) return result - return wrapper @@ -145,99 +138,192 @@ def wrapper(*args, **kwargs): except HTTPResponse as r: r.set_header('Access-Control-Allow-Origin', '*') raise - - (result if isinstance(result, Response) else response) \ - .set_header('Access-Control-Allow-Origin', '*') + target = result if isinstance(result, Response) else response + target.set_header('Access-Control-Allow-Origin', '*') return result - return wrapper -def resolve_file(): - """Inspect the request object to determine the file being requested. - If the request is for a thumbnail and it has not been generated, do - so before returning. +### S3 URI / key helpers ### +def parse_s3_uri(s3_uri): + """ + Parse s3://bucket/prefix and return (bucket, prefix_without_trailing_slash) + """ + parsed = urlparse(s3_uri) + if parsed.scheme != 's3' or not parsed.netloc: + raise ValueError(f"Invalid S3 URI: {s3_uri!r}") + return parsed.netloc, parsed.path.lstrip('/').rstrip('/') - Returns the relative path to the requested file in the base - attachments directory. +def get_collection_base(coll): + """ + Return (bucket, base_prefix) for the collection, or 404 if unknown. """ - thumb_p = (request.query['type'] == "T") - storename = request.query.filename - relpath = get_rel_path(request.query.coll, thumb_p) + try: + s3_uri = settings.COLLECTION_S3_PATHS[coll] + except KeyError: + abort(404, f"Unknown collection: {coll!r}") + try: + return parse_s3_uri(s3_uri) + except ValueError as e: + abort(500, str(e)) - if not thumb_p: - return path.join(relpath, storename) - basepath = path.join(settings.BASE_DIR, relpath) +def make_s3_key(coll, thumb, filename=''): + """ + Build bucket and key for given collection, thumb/orig, and filename. + """ + bucket, base_prefix = get_collection_base(coll) + subdir = settings.THUMB_DIR if thumb else settings.ORIG_DIR + parts = [] + if base_prefix: + parts.append(base_prefix) + parts.append(subdir) + if filename: + parts.append(filename) + key = '/'.join(p.strip('/') for p in parts) + return bucket, key + + +def stream_s3_object(bucket, key): + """Retrieve object from S3, abort 404 if missing.""" + try: + obj = s3.get_object(Bucket=bucket, Key=key) + except ClientError as e: + if e.response['Error']['Code'] in ('404', 'NoSuchKey'): + abort(404, f"Missing object: {key}") + raise + return obj['Body'].read(), obj.get('ContentType', 'application/octet-stream') - scale = int(request.query.scale) - mimetype, encoding = guess_type(storename) - assert mimetype in settings.CAN_THUMBNAIL +def resolve_s3_key(): + """ + Determine bucket+key for requested file or thumbnail. Generate thumbnail if needed. + Returns (bucket, key). + """ + thumb_p = (request.query.get('type') == "T") + coll = request.query.coll + name = request.query.filename - root, ext = path.splitext(storename) + if not thumb_p: + return make_s3_key(coll, False, name) - if mimetype in ('application/pdf', 'image/tiff'): - # use PNG for PDF thumbnails + # Thumbnail logic + scale = int(request.query.scale) + root, ext = path.splitext(name) + if ext.lower() in ('.pdf', '.tiff', '.tif'): ext = '.png' + thumb_name = f"{root}_{scale}{ext}" + bucket, thumb_key = make_s3_key(coll, True, thumb_name) - scaled_name = "%s_%d%s" % (root, scale, ext) - scaled_pathname = path.join(basepath, scaled_name) - - if path.exists(scaled_pathname): - log("Serving previously scaled thumbnail") - return path.join(relpath, scaled_name) - - if not path.exists(basepath): - mkdir(basepath) - - orig_dir = path.join(settings.BASE_DIR, get_rel_path(request.query.coll, thumb_p=False)) - orig_path = path.join(orig_dir, storename) - - if not path.exists(orig_path): - abort(404, "Missing original: %s" % orig_path) + # If thumbnail exists, return it + try: + s3.head_object(Bucket=bucket, Key=thumb_key) + log(f"Serving cached thumbnail {thumb_key}") + return bucket, thumb_key + except ClientError as e: + if e.response['Error']['Code'] not in ('404', 'NoSuchKey'): + raise - input_spec = orig_path - convert_args = ('-resize', "%dx%d>" % (scale, scale)) - if mimetype == 'application/pdf': - input_spec += '[0]' # only thumbnail first page of PDF - convert_args += ('-background', 'white', '-flatten') # add white background to PDFs + # Need to generate thumbnail: fetch original + orig_bucket, orig_key = make_s3_key(coll, False, name) + try: + obj = s3.get_object(Bucket=orig_bucket, Key=orig_key) + except ClientError as e: + if e.response['Error']['Code'] in ('404', 'NoSuchKey'): + abort(404, f"Missing original: {orig_key}") + raise + data = obj['Body'].read() + + # Write temp files for ImageMagick processing + from tempfile import gettempdir + tmp = gettempdir() + local_orig = path.join(tmp, name) + local_thumb = path.join(tmp, thumb_name) + with open(local_orig, 'wb') as f: + f.write(data) + + convert_args = ['-resize', f"{scale}x{scale}>"] + if obj.get('ContentType', '') == 'application/pdf': + convert_args += ['-background', 'white', '-flatten'] + local_orig_with_page = local_orig + '[0]' + else: + local_orig_with_page = local_orig + + log(f"Scaling thumbnail to {scale}") + convert(local_orig_with_page, *convert_args, local_thumb) + + # Upload generated thumbnail + ctype, _ = guess_type(local_thumb) + with open(local_thumb, 'rb') as f: + s3.put_object( + Bucket=bucket, + Key=thumb_key, + Body=f, + ContentType=ctype or 'application/octet-stream' + ) + + return bucket, thumb_key + + +### Routes ### +@route('/static/') +def static_handler(path): + """Serve local static files (unchanged).""" + if not settings.ALLOW_STATIC_FILE_ACCESS: + abort(404) - log("Scaling thumbnail to %d" % scale) - convert(input_spec, *(convert_args + (scaled_pathname,))) + parts = path.split('/', 1) + if len(parts) != 2: + abort(404, f"Bad static path: {path!r}") - return path.join(relpath, scaled_name) + coll, rest = parts + try: + bucket, base_prefix = parse_s3_uri(settings.COLLECTION_S3_PATHS[coll]) + except KeyError: + abort(404, f"Unknown collection: {coll!r}") + key = '/'.join(p for p in (base_prefix, rest) if p) + data, ctype = stream_s3_object(bucket, key) -@route('/static/') -def static(path): - """Serve static files to the client. Primarily for Web Portal.""" - if not settings.ALLOW_STATIC_FILE_ACCESS: - abort(404) - return static_file(path, root=settings.BASE_DIR) + response.content_type = ctype + return data @route('/getfileref') @allow_cross_origin def getfileref(): - """Returns a URL to the static file indicated by the query parameters.""" + """Return the fileget URL for the requested attachment (client will append token etc).""" if not settings.ALLOW_STATIC_FILE_ACCESS: abort(404) + + coll = request.query.coll + filename = request.query.filename + + # URL-encode the “collection/filename” into a single static path + static_path = f"{quote(coll)}/{quote(filename)}" + url = f"http://{settings.HOST}:{settings.PORT}/static/{static_path}" + response.content_type = 'text/plain; charset=utf-8' - return "http://%s:%d/static/%s" % (settings.HOST, settings.PORT, - pathname2url(resolve_file())) + return url @route('/fileget') @require_token('filename') def fileget(): - """Returns the file data of the file indicated by the query parameters.""" - r = static_file(resolve_file(), root=settings.BASE_DIR) - download_name = request.query.downloadname + bucket, key = resolve_s3_key() + data, content_type = stream_s3_object(bucket, key) + + response.content_type = content_type + + download_name = request.query.get('downloadname') if download_name: - download_name = quote(path.basename(download_name).encode('ascii', 'replace')) - r.set_header('Content-Disposition', "inline; filename*=utf-8''%s" % download_name) - return r + dl = quote(path.basename(download_name).encode('ascii', 'replace')) + response.set_header( + 'Content-Disposition', + f"inline; filename*=utf-8''{dl}" + ) + + return data @route('/fileupload', method='OPTIONS') @@ -251,22 +337,20 @@ def fileupload_options(): @allow_cross_origin @require_token('store') def fileupload(): - """Accept original file uploads and store them in the proper - attchment subdirectory. - """ - thumb_p = (request.forms['type'] == "T") - storename = request.forms.store - basepath = path.join(settings.BASE_DIR, get_rel_path(request.forms.coll, thumb_p)) - pathname = path.join(basepath, storename) + """Upload a new original (thumbnails are derived later).""" + thumb_p = (request.forms.get('type') == "T") + coll = request.forms.coll + name = request.forms.store if thumb_p: return 'Ignoring thumbnail upload!' - if not path.exists(basepath): - mkdir(basepath) - + bucket, key = make_s3_key(coll, False, name) upload = list(request.files.values())[0] - upload.save(pathname, overwrite=True) + body = upload.file.read() + content_type = upload.content_type or 'application/octet-stream' + + s3.put_object(Bucket=bucket, Key=key, Body=body, ContentType=content_type) response.content_type = 'text/plain; charset=utf-8' return 'Ok.' @@ -275,26 +359,21 @@ def fileupload(): @route('/filedelete', method='POST') @require_token('filename') def filedelete(): - """Delete the file indicated by the query parameters. Returns 404 - if the original file does not exist. Any associated thumbnails will - also be deleted. - """ - storename = request.forms.filename - basepath = path.join(settings.BASE_DIR, get_rel_path(request.forms.coll, thumb_p=False)) - thumbpath = path.join(settings.BASE_DIR, get_rel_path(request.forms.coll, thumb_p=True)) - - pathname = path.join(basepath, storename) - if not path.exists(pathname): - abort(404) - - log("Deleting %s" % pathname) - remove(pathname) - - prefix = storename.split('.att')[0] - pattern = path.join(thumbpath, prefix + '*') - log("Deleting thumbnails matching %s" % pattern) - for name in glob(pattern): - remove(name) + """Delete original + derived thumbnails for a collection.""" + coll = request.forms.coll + name = request.forms.filename + + # Delete original + bucket, orig_key = make_s3_key(coll, False, name) + s3.delete_object(Bucket=bucket, Key=orig_key) + + # Delete matching thumbnails (prefix: basename_) + thumb_bucket, thumb_prefix_base = make_s3_key(coll, True, '') + base = name.split('.', 1)[0] + '_' + paginator = s3.get_paginator('list_objects_v2') + for page in paginator.paginate(Bucket=thumb_bucket, Prefix=f"{thumb_prefix_base}{base}"): + for obj in page.get('Contents', []): + s3.delete_object(Bucket=thumb_bucket, Key=obj['Key']) response.content_type = 'text/plain; charset=utf-8' return 'Ok.' @@ -303,52 +382,40 @@ def filedelete(): @route('/getmetadata') @require_token('filename') def getmetadata(): - """Provides access to EXIF metadata.""" - storename = request.query.filename - basepath = path.join(settings.BASE_DIR, get_rel_path(request.query.coll, thumb_p=False)) - pathname = path.join(basepath, storename) - datatype = request.query.dt - - if not path.exists(pathname): - abort(404) - - with open(pathname, 'rb') as f: - try: - tags = exifread.process_file(f) - except: - log("Error reading exif data.") - tags = {} + """Fetch original from S3 and return EXIF metadata.""" + coll = request.query.coll + name = request.query.filename + bucket, key = make_s3_key(coll, False, name) + data, _ = stream_s3_object(bucket, key) + f = BytesIO(data) + try: + tags = exifread.process_file(f) + except Exception as e: + log(f"Error reading EXIF data: {e}") + tags = {} - if datatype == 'date': + if request.query.get('dt') == 'date': try: + response.content_type = 'text/plain; charset=utf-8' return str(tags['EXIF DateTimeOriginal']) except KeyError: abort(404, 'DateTime not found in EXIF') - data = defaultdict(dict) - for key, value in list(tags.items()): - parts = key.split() - if len(parts) < 2: continue - try: - v = str(value).decode('ascii', 'replace').encode('utf-8') - except TypeError: - v = repr(value) - - data[parts[0]][parts[1]] = str(v) - - response.content_type = 'application/json' - data = [OrderedDict((('Name', key), ('Fields', value))) - for key, value in list(data.items())] + out = defaultdict(dict) + for k, v in tags.items(): + parts = k.split() + if len(parts) < 2: + continue + out.setdefault(parts[0], {})[parts[1]] = str(v) - return json.dumps(data, indent=4) + result = [OrderedDict((('Name', k), ('Fields', f))) for k, f in out.items()] + response.content_type = 'application/json; charset=utf-8' + return json.dumps(result, indent=4) @route('/testkey') @require_token('random', always=True) def testkey(): - """If access to this resource succeeds, clients can conclude - that they have a valid access key. - """ response.content_type = 'text/plain; charset=utf-8' return 'Ok.' @@ -356,9 +423,9 @@ def testkey(): @route('/web_asset_store.xml') @include_timestamp def web_asset_store(): - """Serve an XML description of the URLs available here.""" response.content_type = 'text/xml; charset=utf-8' - return template('web_asset_store.xml', host="%s:%d" % (settings.SERVER_NAME, settings.SERVER_PORT)) + protocol = request.headers.get('X-Forwarded-Proto', request.urlparts.scheme) + return template('web_asset_store.xml', host=f"{protocol}://{settings.SERVER_NAME}") @route('/') @@ -368,6 +435,10 @@ def main_page(): if __name__ == '__main__': from bottle import run - - run(host='0.0.0.0', port=settings.PORT, server=settings.SERVER, - debug=settings.DEBUG, reloader=settings.DEBUG) + run( + host='0.0.0.0', + port=settings.PORT, + server=settings.SERVER, + debug=settings.DEBUG, + reloader=settings.DEBUG + ) diff --git a/settings.py b/settings.py index 8c2d989..b04949d 100644 --- a/settings.py +++ b/settings.py @@ -1,8 +1,12 @@ +import os + # Sample Specify web asset server settings. +# Bottle / server settings # Turns on bottle.py debugging, module reloading and printing some # information to console. -DEBUG = True +DEBUG = False +#DEBUG = True # This secret key is used to generate authentication tokens for requests. # The same key must be set in the Web Store Attachment Preferences in Specify. @@ -11,15 +15,7 @@ # will allow anyone on the internet to use the attachment server to store # arbitrary files. KEY = 'test_attachment_key' - -# Auth token timestamp must be within this many seconds of server time -# in order to be considered valid. This prevents replay attacks. -# Set to None to disable time validation. -TIME_TOLERANCE = 150 - -# Set this to True to require authentication for downloads in addition -# to uploads and deletes. Static file access, if enabled, is not -# affected by this setting. +TIME_TOLERANCE = 600 REQUIRE_KEY_FOR_GET = False # This is required for use with the Web Portal. @@ -30,37 +26,31 @@ # so the client knows how to talk to the server. HOST = 'localhost' PORT = 8080 - SERVER_NAME = HOST SERVER_PORT = PORT - -# Port the development test server should listen on. -DEVELOPMENT_PORT = PORT +DEVELOPMENT_PORT = PORT # Port the development test server should listen on. # Map collection names to directories. Set to None to store # everything in the same originals and thumbnail directories. This is # recommended unless some provision is made to allow attachments for # items scoped above collections to be found. +COLLECTION_S3_PATHS = { + 'coll1': 's3://my-bucket/path/to/coll1', + 'coll2': 's3://my-bucket/path/to/coll2', + # ... add all your collections here ... +} -# COLLECTION_DIRS = { -# # 'COLLECTION_NAME': 'DIRECTORY_NAME', -# 'KUFishvoucher': 'Ichthyology', -# 'KUFishtissue': 'Ichthyology', -# } +# Local BASE_DIR no longer used for attachments; kept for static assets +BASE_DIR = '/home/specify/attachments' -COLLECTION_DIRS = None - -# Base directory for all attachments. -BASE_DIR = '/home/specify/attachments/' - -# Originals and thumbnails are stored in separate directories. THUMB_DIR = 'thumbnails' ORIG_DIR = 'originals' - -# Set of mime types that the server will try to thumbnail. -CAN_THUMBNAIL = {'image/jpeg', 'image/gif', 'image/png', 'image/tiff', 'application/pdf'} +CAN_THUMBNAIL = { + 'image/jpeg','image/gif','image/png', + 'image/tiff','application/pdf' +} # What HTTP server to use for stand-alone operation. # SERVER = 'paste' # Requires python-paste package. Fast, and seems to work good. -SERVER = 'wsgiref' # For testing. Requires no extra packages. - +# use wsgiref for testing. Requires no extra packages. +SERVER = 'paste' # or 'wsgiref'