Skip to content

Commit 4ac63b5

Browse files
author
Johannes Hötter
committed
solve conflicts
2 parents 98bd2c3 + 288ce77 commit 4ac63b5

File tree

4 files changed

+84
-30
lines changed

4 files changed

+84
-30
lines changed

kern/__init__.py

Lines changed: 46 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
from wasabi import msg
44
import pandas as pd
5-
from kern import authentication, api_calls, settings, exceptions
5+
from kern import authentication, api_calls, settings, exceptions, util
66
from typing import List, Optional, Dict
77
import json
8+
import os.path
89
from tqdm import tqdm
910
import spacy
1011

@@ -127,30 +128,47 @@ def get_record_export(
127128
msg.good(f"Downloaded export to {download_to}")
128129
return df
129130

130-
# TODO: issue #6
131-
# def post_file_import(self, upload_from: str):
132-
# upload_from = f"{upload_from}_SCALE"
133-
# file_type = "records"
134-
# import_file_options = None
135-
# config_url = settings.get_config_url()
136-
# config_api_response = api_calls.get_request(config_url, self.session_token)
137-
# endpoint = config_api_response["KERN_S3_ENDPOINT"]
138-
139-
# import_url = settings.get_import_url(self.project_id)
140-
# import_api_response = api_calls.post_request(
141-
# import_url,
142-
# {
143-
# "file_name": upload_from,
144-
# "file_type": file_type,
145-
# "import_file_options": import_file_options,
146-
# },
147-
# self.session_token,
148-
# )
149-
150-
# credentials = import_api_response["Credentials"]
151-
# access_key = credentials["AccessKeyId"]
152-
# secret_key = credentials["SecretAccessKey"]
153-
# session_token = credentials["SessionToken"]
154-
155-
# upload_task_id = import_api_response["uploadTaskId"]
156-
# return endpoint, access_key, secret_key, session_token, upload_task_id
131+
def post_file_import(self, path: str) -> bool:
132+
if not os.path.exists(path):
133+
raise Exception(f"Given filepath is not valid. Path: {path}")
134+
last_path_part = path.split("/")[-1]
135+
file_name = f"{last_path_part}_SCALE"
136+
file_type = "records"
137+
import_file_options = ""
138+
139+
# config
140+
config_url = settings.get_base_config(self.project_id)
141+
config_api_response = api_calls.get_request(
142+
config_url,
143+
self.session_token,
144+
)
145+
endpoint = config_api_response.get("KERN_S3_ENDPOINT")
146+
147+
# credentials
148+
credentials_url = settings.get_import_url(self.project_id)
149+
credentials_api_response = api_calls.post_request(
150+
credentials_url,
151+
{
152+
"file_name": file_name,
153+
"file_type": file_type,
154+
"import_file_options": import_file_options,
155+
},
156+
self.session_token,
157+
)
158+
credentials = credentials_api_response["Credentials"]
159+
access_key = credentials["AccessKeyId"]
160+
secret_key = credentials["SecretAccessKey"]
161+
session_token = credentials["SessionToken"]
162+
upload_task_id = credentials_api_response["uploadTaskId"]
163+
bucket = credentials_api_response["bucket"]
164+
success = util.s3_upload(
165+
access_key,
166+
secret_key,
167+
session_token,
168+
bucket,
169+
endpoint,
170+
upload_task_id,
171+
path,
172+
file_name,
173+
)
174+
return True if success else False

kern/settings.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,7 @@ def get_export_url(project_id: str) -> str:
4242

4343
def get_import_url(project_id: str) -> str:
4444
return f"{get_project_url(project_id)}/import"
45+
46+
47+
def get_base_config(project_id: str) -> str:
48+
return f"{get_project_url(project_id)}/import/base_config"

kern/util.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import boto3
2+
from botocore.client import Config
3+
4+
5+
def s3_upload(
6+
access_key: str,
7+
secret_key: str,
8+
aws_session_token: str,
9+
target_bucket: str,
10+
url: str,
11+
upload_task_id: str,
12+
file_path: str,
13+
file_name: str,
14+
) -> bool:
15+
"""
16+
Connects to the object storage with temporary credentials generated for the
17+
given user_id, project_id and bucket
18+
"""
19+
s3 = boto3.resource(
20+
"s3",
21+
endpoint_url=url,
22+
aws_access_key_id=access_key,
23+
aws_secret_access_key=secret_key,
24+
aws_session_token=aws_session_token,
25+
config=Config(signature_version="s3v4"),
26+
region_name="us-east-1",
27+
)
28+
s3_object = s3.Object(target_bucket, f"{upload_task_id}/{file_name}")
29+
with open(file_path, "rb") as file:
30+
s3_object.put(Body=file)
31+
return True

requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ backcall==0.2.0
77
beautifulsoup4==4.11.1
88
black==22.3.0
99
bleach==5.0.0
10-
boto3==1.23.1
11-
botocore==1.26.1
10+
boto3==1.24.26
11+
botocore==1.27.26
1212
certifi==2021.10.8
1313
cffi==1.15.0
1414
charset-normalizer==2.0.12
@@ -73,6 +73,7 @@ s3transfer==0.5.2
7373
Send2Trash==1.8.0
7474
six==1.16.0
7575
soupsieve==2.3.2.post1
76+
spacy==3.3.1
7677
stack-data==0.2.0
7778
terminado==0.15.0
7879
tinycss2==1.1.1

0 commit comments

Comments
 (0)