1313from __future__ import absolute_import
1414
1515import gzip
16- import io
1716import json
1817import os
1918import pickle
2019import sys
2120
22- import boto3
23- import numpy as np
2421import pytest
2522
2623import sagemaker
2724from sagemaker .amazon .amazon_estimator import registry
28- from sagemaker .amazon .common import write_numpy_to_dense_tensor
2925from sagemaker .estimator import Estimator
3026from sagemaker .utils import name_from_base
3127from tests .integ import DATA_DIR
@@ -57,6 +53,7 @@ def test_byo_estimator(sagemaker_session, region):
5753
5854 """
5955 image_name = registry (region ) + "/factorization-machines:1"
56+ training_data_path = os .path .join (DATA_DIR , 'dummy_tensor' )
6057
6158 with timeout (minutes = 15 ):
6259 data_path = os .path .join (DATA_DIR , 'one_p_mnist' , 'mnist.pkl.gz' )
@@ -65,19 +62,11 @@ def test_byo_estimator(sagemaker_session, region):
6562 with gzip .open (data_path , 'rb' ) as f :
6663 train_set , _ , _ = pickle .load (f , ** pickle_args )
6764
68- # take 100 examples for faster execution
69- vectors = np .array ([t .tolist () for t in train_set [0 ][:100 ]]).astype ('float32' )
70- labels = np .where (np .array ([t .tolist () for t in train_set [1 ][:100 ]]) == 0 , 1.0 , 0.0 ).astype ('float32' )
71-
72- buf = io .BytesIO ()
73- write_numpy_to_dense_tensor (buf , vectors , labels )
74- buf .seek (0 )
75-
76- bucket = sagemaker_session .default_bucket ()
7765 prefix = 'test_byo_estimator'
7866 key = 'recordio-pb-data'
79- boto3 .resource ('s3' ).Bucket (bucket ).Object (os .path .join (prefix , 'train' , key )).upload_fileobj (buf )
80- s3_train_data = 's3://{}/{}/train/{}' .format (bucket , prefix , key )
67+
68+ s3_train_data = sagemaker_session .upload_data (path = training_data_path ,
69+ key_prefix = os .path .join (prefix , 'train' , key ))
8170
8271 estimator = Estimator (image_name = image_name ,
8372 role = 'SageMakerRole' , train_instance_count = 1 ,
@@ -111,6 +100,7 @@ def test_byo_estimator(sagemaker_session, region):
111100def test_async_byo_estimator (sagemaker_session , region ):
112101 image_name = registry (region ) + "/factorization-machines:1"
113102 endpoint_name = name_from_base ('byo' )
103+ training_data_path = os .path .join (DATA_DIR , 'dummy_tensor' )
114104 training_job_name = ""
115105
116106 with timeout (minutes = 5 ):
@@ -120,19 +110,11 @@ def test_async_byo_estimator(sagemaker_session, region):
120110 with gzip .open (data_path , 'rb' ) as f :
121111 train_set , _ , _ = pickle .load (f , ** pickle_args )
122112
123- # take 100 examples for faster execution
124- vectors = np .array ([t .tolist () for t in train_set [0 ][:100 ]]).astype ('float32' )
125- labels = np .where (np .array ([t .tolist () for t in train_set [1 ][:100 ]]) == 0 , 1.0 , 0.0 ).astype ('float32' )
126-
127- buf = io .BytesIO ()
128- write_numpy_to_dense_tensor (buf , vectors , labels )
129- buf .seek (0 )
130-
131- bucket = sagemaker_session .default_bucket ()
132113 prefix = 'test_byo_estimator'
133114 key = 'recordio-pb-data'
134- boto3 .resource ('s3' ).Bucket (bucket ).Object (os .path .join (prefix , 'train' , key )).upload_fileobj (buf )
135- s3_train_data = 's3://{}/{}/train/{}' .format (bucket , prefix , key )
115+
116+ s3_train_data = sagemaker_session .upload_data (path = training_data_path ,
117+ key_prefix = os .path .join (prefix , 'train' , key ))
136118
137119 estimator = Estimator (image_name = image_name ,
138120 role = 'SageMakerRole' , train_instance_count = 1 ,
0 commit comments