Skip to content

Commit 32761ee

Browse files
author
Ziqun Ye
committed
rebase to main
2 parents 509a052 + a531cb2 commit 32761ee

21 files changed

+217
-544
lines changed

ads/opctl/conda/cmds.py

Lines changed: 68 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
from ads.opctl.config.base import ConfigProcessor
4747
from ads.opctl.config.merger import ConfigMerger
4848
from ads.opctl.conda.multipart_uploader import MultiPartUploader
49+
import tempfile
4950

5051

5152
def _fetch_manifest_template() -> Dict:
@@ -108,6 +109,7 @@ def _create(
108109
conda_pack_folder: str,
109110
gpu: bool,
110111
overwrite: bool,
112+
prepare_publish: bool = False,
111113
) -> str:
112114
"""Create a conda pack given an environment yaml file under conda pack folder specified.
113115
@@ -123,6 +125,8 @@ def _create(
123125
whether to build against GPU image
124126
overwrite : bool
125127
whether to overwrite existing pack of the same slug
128+
prepare_pubish : bool
129+
whether to create conda pack archive after conda pack is created
126130
127131
Raises
128132
------
@@ -180,6 +184,11 @@ def _create(
180184
manifest["manifest"]["manifest_version"] = "1.0"
181185

182186
logger.info(f"Creating conda environment {slug}")
187+
conda_dep = None
188+
with open(env_file) as mfile:
189+
conda_dep = yaml.safe_load(mfile.read())
190+
conda_dep["manifest"] = manifest["manifest"]
191+
183192
if is_in_notebook_session() or NO_CONTAINER:
184193
command = f"conda env create --prefix {pack_folder_path} --file {os.path.abspath(os.path.expanduser(env_file))}"
185194
run_command(command, shell=True)
@@ -191,35 +200,56 @@ def _create(
191200
)
192201

193202
create_command = f"conda env create --prefix {docker_pack_folder_path} --file {docker_env_file_path}"
194-
203+
195204
volumes = {
196205
pack_folder_path: {"bind": docker_pack_folder_path},
197206
os.path.abspath(os.path.expanduser(env_file)): {
198207
"bind": docker_env_file_path
199208
},
209+
200210
}
211+
201212
if gpu:
202213
image = ML_JOB_GPU_IMAGE
203214
else:
204215
image = ML_JOB_IMAGE
205216
try:
206-
run_container(
207-
image=image, bind_volumes=volumes, env_vars={}, command=create_command
208-
)
217+
if prepare_publish:
218+
tmp_file = tempfile.NamedTemporaryFile(suffix=".yaml")
219+
# Save the manifest in the temp file that can be mounted inside the container so that archiving will work
220+
with open(tmp_file.name, 'w') as f:
221+
yaml.safe_dump(conda_dep, f)
222+
223+
pack_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pack.py")
224+
pack_command = f"python {os.path.join(DEFAULT_IMAGE_HOME_DIR, 'pack.py')} --conda-path {docker_pack_folder_path} --manifest-location {os.path.join(DEFAULT_IMAGE_HOME_DIR, 'manifest.yaml')}"
225+
226+
# add pack script and manifest file to the mount so that archive can be created in the same container run
227+
condapack_script = {
228+
pack_script: {"bind": os.path.join(DEFAULT_IMAGE_HOME_DIR, "pack.py")},
229+
tmp_file.name: {"bind": os.path.join(DEFAULT_IMAGE_HOME_DIR, "manifest.yaml")}
230+
}
231+
volumes = {**volumes, **condapack_script} # | not supported in python 3.8
232+
233+
run_container(
234+
image=image, bind_volumes=volumes, entrypoint="/bin/bash -c ", env_vars={}, command=f" '{create_command} && {pack_command}'"
235+
)
236+
else:
237+
run_container(
238+
image=image, bind_volumes=volumes, env_vars={}, command=create_command
239+
)
209240
except Exception:
210241
if os.path.exists(pack_folder_path):
211242
shutil.rmtree(pack_folder_path)
212243
raise RuntimeError(f"Could not create environment {slug}.")
213244

214-
conda_dep = None
215-
with open(env_file) as mfile:
216-
conda_dep = yaml.safe_load(mfile.read())
217-
conda_dep["manifest"] = manifest["manifest"]
218-
with open(f"{os.path.join(pack_folder_path, slug)}_manifest.yaml", "w") as mfile:
245+
# Save the manifest file inside the host machine, where the conda environment is saved.
246+
manifest_location = f"{os.path.join(pack_folder_path, slug)}_manifest.yaml"
247+
with open(manifest_location, "w") as mfile:
219248
yaml.safe_dump(conda_dep, mfile)
220249

221250
logger.info(f"Environment `{slug}` setup complete.")
222251
print(f"Pack {slug} created under {pack_folder_path}.")
252+
223253
return slug
224254

225255

@@ -467,6 +497,7 @@ def _install(
467497
def publish(**kwargs) -> None:
468498
p = ConfigProcessor().step(ConfigMerger, **kwargs)
469499
exec_config = p.config["execution"]
500+
skip_archive = False
470501
if exec_config.get("environment_file", None):
471502
name = _get_name(exec_config.get("name"), exec_config.get("environment_file"))
472503
slug = _create(
@@ -476,7 +507,9 @@ def publish(**kwargs) -> None:
476507
conda_pack_folder=exec_config["conda_pack_folder"],
477508
gpu=exec_config.get("gpu", False),
478509
overwrite=exec_config["overwrite"],
510+
prepare_publish=True
479511
)
512+
skip_archive = True # The conda pack archive is already created during create process.
480513
else:
481514
slug = exec_config.get("slug")
482515
if not slug:
@@ -493,9 +526,10 @@ def publish(**kwargs) -> None:
493526
oci_profile=exec_config.get("oci_profile"),
494527
overwrite=exec_config["overwrite"],
495528
auth_type=exec_config["auth"],
529+
skip_archive=skip_archive
496530
)
497531

498-
532+
499533
def _publish(
500534
conda_slug: str,
501535
conda_uri_prefix: str,
@@ -504,6 +538,7 @@ def _publish(
504538
oci_profile: str,
505539
overwrite: bool,
506540
auth_type: str,
541+
skip_archive: bool = False
507542
) -> None:
508543
"""Publish a local conda pack to object storage location
509544
@@ -579,29 +614,30 @@ def _publish(
579614
publish_slug = "_".join(ans.lower().split(" "))
580615

581616
pack_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pack.py")
582-
if is_in_notebook_session() or NO_CONTAINER:
583-
command = f"python {pack_script} {pack_folder_path}"
584-
run_command(command, shell=True)
585-
else:
586-
volumes = {
587-
pack_folder_path: {
588-
"bind": os.path.join(DEFAULT_IMAGE_HOME_DIR, conda_slug)
589-
},
590-
pack_script: {"bind": os.path.join(DEFAULT_IMAGE_HOME_DIR, "pack.py")},
591-
}
592-
command = f"python {os.path.join(DEFAULT_IMAGE_HOME_DIR, 'pack.py')} {os.path.join(DEFAULT_IMAGE_HOME_DIR, conda_slug)}"
593-
gpu = env["manifest"]["arch_type"] == "GPU"
594-
_check_job_image_exists(gpu)
595-
if gpu:
596-
image = ML_JOB_GPU_IMAGE
617+
if not skip_archive:
618+
if is_in_notebook_session() or NO_CONTAINER:
619+
command = f"python {pack_script} --conda-path {pack_folder_path}"
620+
run_command(command, shell=True)
597621
else:
598-
image = ML_JOB_IMAGE
599-
try:
600-
run_container(
601-
image=image, bind_volumes=volumes, env_vars={}, command=command
602-
)
603-
except Exception:
604-
raise RuntimeError(f"Could not pack environment {conda_slug}.")
622+
volumes = {
623+
pack_folder_path: {
624+
"bind": os.path.join(DEFAULT_IMAGE_HOME_DIR, conda_slug)
625+
},
626+
pack_script: {"bind": os.path.join(DEFAULT_IMAGE_HOME_DIR, "pack.py")},
627+
}
628+
command = f"python {os.path.join(DEFAULT_IMAGE_HOME_DIR, 'pack.py')} --conda-path {os.path.join(DEFAULT_IMAGE_HOME_DIR, conda_slug)}"
629+
gpu = env["manifest"]["arch_type"] == "GPU"
630+
_check_job_image_exists(gpu)
631+
if gpu:
632+
image = ML_JOB_GPU_IMAGE
633+
else:
634+
image = ML_JOB_IMAGE
635+
try:
636+
run_container(
637+
image=image, bind_volumes=volumes, env_vars={}, command=command
638+
)
639+
except Exception:
640+
raise RuntimeError(f"Could not pack environment {conda_slug}.")
605641
if "/" in conda_slug:
606642
raise ValueError("Invalid conda_slug. found `/` in slug name. Please use a different slug name.")
607643
pack_file = os.path.join(pack_folder_path, f"{conda_slug}.tar.gz")

ads/opctl/conda/pack.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,14 @@
1616
import conda_pack
1717

1818
import yaml
19+
import argparse
1920

2021

21-
def main(pack_folder_path):
22+
def main(pack_folder_path, manifest_file=None):
2223
slug = os.path.basename(pack_folder_path)
23-
manifest_path = glob.glob(os.path.join(pack_folder_path, "*_manifest.yaml"))[0]
24+
manifest_path = (
25+
manifest_file or glob.glob(os.path.join(pack_folder_path, "*_manifest.yaml"))[0]
26+
)
2427
with open(manifest_path) as f:
2528
env = yaml.safe_load(f.read())
2629

@@ -60,8 +63,10 @@ def main(pack_folder_path):
6063
raise RuntimeError(
6164
"Error creating the pack file using `conda_pack.pack()`."
6265
)
66+
print(f"Copy {pack_file} to {pack_folder_path}")
6367
shutil.copy(pack_file, pack_folder_path)
6468
file_path = os.path.join(pack_folder_path, os.path.basename(pack_file))
69+
print(f"Pack built at {file_path}")
6570
print(
6671
f"changing permission for {file_path}",
6772
flush=True,
@@ -70,4 +75,14 @@ def main(pack_folder_path):
7075

7176

7277
if __name__ == "__main__":
73-
main(sys.argv[1])
78+
parser = argparse.ArgumentParser(
79+
prog="Prepare conda archive",
80+
description="Uses conda_pack library to pack the conda environment.",
81+
)
82+
parser.add_argument("--conda-path", type=str, help="Path to the conda environment")
83+
parser.add_argument(
84+
"--manifest-location", type=str, default=None, help="Path to manifest location"
85+
)
86+
args = parser.parse_args()
87+
88+
main(args.conda_path, args.manifest_location)

ads/opctl/docker/Dockerfile.job

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ ENV DATASCIENCE_USER datascience
88
ENV DATASCIENCE_UID 1000
99
ENV HOME /home/$DATASCIENCE_USER
1010
ENV DATASCIENCE_INSTALL_DIR /etc/datascience
11-
ENV LOGS_DIRECTORY /logs
1211

1312
ARG release=19
1413
ARG update=13
@@ -47,9 +46,7 @@ RUN \
4746
chown -R $DATASCIENCE_USER /home/$DATASCIENCE_USER && \
4847
chown -R $DATASCIENCE_USER:users /usr/local/ && \
4948
touch /etc/sudoers.d/$DATASCIENCE_USER && echo "$DATASCIENCE_USER ALL=(ALL:ALL) NOPASSWD: ALL" >> /etc/sudoers.d/$DATASCIENCE_USER && \
50-
mkdir -p $DATASCIENCE_INSTALL_DIR && chown $DATASCIENCE_USER $DATASCIENCE_INSTALL_DIR && \
51-
mkdir -p $LOGS_DIRECTORY && chown -R $DATASCIENCE_USER:users $LOGS_DIRECTORY && \
52-
mkdir -p $LOGS_DIRECTORY/harness && chown -R $DATASCIENCE_USER:users $LOGS_DIRECTORY/harness
49+
mkdir -p $DATASCIENCE_INSTALL_DIR && chown $DATASCIENCE_USER $DATASCIENCE_INSTALL_DIR
5350

5451
RUN mkdir -p /etc/datascience/build
5552
RUN mkdir -p $DATASCIENCE_INSTALL_DIR/{pre-build-ds,post-build-ds,pre-run-ds,pre-run-user}
@@ -65,9 +62,10 @@ RUN chown -R $DATASCIENCE_USER /opt
6562

6663
USER $DATASCIENCE_USER
6764
WORKDIR /home/datascience
68-
RUN wget -nv https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /home/datascience/miniconda.sh \
69-
&& /bin/bash /home/datascience/miniconda.sh -f -b -p /opt/conda \
70-
&& rm /home/datascience/miniconda.sh \
65+
ARG MINICONDA_VER=23.5.2-0
66+
RUN wget -nv https://repo.anaconda.com/miniconda/Miniconda3-py38_${MINICONDA_VER}-Linux-x86_64.sh -O /home/datascience/Miniconda3.sh \
67+
&& /bin/bash /home/datascience/Miniconda3.sh -f -b -p /opt/conda \
68+
&& rm /home/datascience/Miniconda3.sh \
7169
&& /opt/conda/bin/conda clean -yaf
7270

7371
WORKDIR /
@@ -90,11 +88,8 @@ WORKDIR /
9088

9189
RUN conda list
9290

93-
USER root
94-
95-
ARG PIP_INDEX_URL
96-
9791
############# Setup Conda environment tools ###########################
92+
USER root
9893
ARG RAND=1
9994

10095
ARG RUN_WORKING_DIR="/home/datascience"
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# Copyright (c) 2023 Oracle and/or its affiliates.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
3+
4+
# Used OL8 because miniconda required a higher version of glibc that was unavoidable
5+
FROM ghcr.io/oracle/oraclelinux:8-slim
6+
7+
# Configure environment
8+
ENV DATASCIENCE_USER datascience
9+
ENV DATASCIENCE_UID 1000
10+
ENV HOME /home/$DATASCIENCE_USER
11+
ENV DATASCIENCE_INSTALL_DIR /etc/datascience
12+
13+
ARG release=19
14+
ARG update=10
15+
16+
RUN microdnf install yum yum-utils && yum clean all && rm -rf /var/cache/yum
17+
18+
RUN \
19+
yum -y -q install \
20+
oracle-release-el8 && \
21+
yum-config-manager --enable ol8_addons --enable ol8_oracle_instantclient > /dev/null && \
22+
yum groupinstall -y -q 'Development Tools' && \
23+
yum update -y && \
24+
yum install -y --setopt=skip_missing_names_on_install=False \
25+
bzip2 \
26+
curl \
27+
git \
28+
gcc-gfortran \
29+
libcurl-devel \
30+
libxml2-devel \
31+
oracle-instantclient${release}.${update}-basic \
32+
oracle-instantclient${release}.${update}-sqlplus \
33+
openssl \
34+
openssl-devel \
35+
patch \
36+
sudo \
37+
unzip \
38+
zip \
39+
gcc-c++ \
40+
wget \
41+
gcc \
42+
&& yum clean all \
43+
&& rm -rf /var/cache/yum/*
44+
45+
# setup user
46+
RUN \
47+
mkdir -p /home/$DATASCIENCE_USER && \
48+
useradd -m -s /bin/bash -N -u $DATASCIENCE_UID $DATASCIENCE_USER && \
49+
chown -R $DATASCIENCE_USER /home/$DATASCIENCE_USER && \
50+
chown -R $DATASCIENCE_USER:users /usr/local/ && \
51+
touch /etc/sudoers.d/$DATASCIENCE_USER && echo "$DATASCIENCE_USER ALL=(ALL:ALL) NOPASSWD: ALL" >> /etc/sudoers.d/$DATASCIENCE_USER && \
52+
mkdir -p $DATASCIENCE_INSTALL_DIR && chown $DATASCIENCE_USER $DATASCIENCE_INSTALL_DIR
53+
54+
RUN mkdir -p /etc/datascience/build
55+
RUN mkdir -p $DATASCIENCE_INSTALL_DIR/{pre-build-ds,post-build-ds,pre-run-ds,pre-run-user}
56+
57+
#conda
58+
# set a default language for localization. necessary for oci cli
59+
ARG LANG=en_US.utf8
60+
ENV LANG=$LANG
61+
ENV SHELL=/bin/bash
62+
63+
# set /opt folder permissions for $DATASCIENCE_USER. Conda is going to live in this folder.
64+
RUN chown -R $DATASCIENCE_USER /opt
65+
66+
USER $DATASCIENCE_USER
67+
WORKDIR /home/datascience
68+
# Note in order to run sudo commands as a non root user, you must specify --credential yes if using qemu static to build the image
69+
ARG MINICONDA_VER=23.5.2-0
70+
RUN wget -nv https://repo.anaconda.com/miniconda/Miniconda3-py38_${MINICONDA_VER}-Linux-aarch64.sh -O /home/datascience/Miniconda3.sh \
71+
&& /bin/bash /home/datascience/Miniconda3.sh -f -b -p /opt/conda \
72+
&& rm /home/datascience/Miniconda3.sh \
73+
&& /opt/conda/bin/conda clean -yaf
74+
75+
WORKDIR /
76+
USER root
77+
RUN printf "#!/bin/bash\nsource /opt/conda/bin/activate\n" > /etc/profile.d/enableconda.sh \
78+
&& chmod +x /etc/profile.d/enableconda.sh
79+
80+
USER $DATASCIENCE_USER
81+
ENV PATH="/opt/conda/bin:${PATH}"
82+
WORKDIR /home/datascience
83+
84+
COPY docker/base-env.yaml /opt/base-env.yaml
85+
RUN conda env update -q -n root -f /opt/base-env.yaml && conda clean -yaf && rm -rf /home/datascience/.cache/pip
86+
87+
USER $DATASCIENCE_USER
88+
89+
####### WRAP UP ###############################
90+
RUN python -c 'import sys; assert(sys.version_info[:2]) == (3, 8), "Python 3.8 is not detected"'
91+
WORKDIR /
92+
93+
RUN conda list
94+
95+
############# Setup Conda environment tools ###########################
96+
USER root
97+
ARG RAND=1
98+
99+
ARG RUN_WORKING_DIR="/home/datascience"
100+
WORKDIR $RUN_WORKING_DIR
101+
102+
# clean tmp folder
103+
RUN rm -rf /tmp/*
104+
105+
RUN mkdir -p /etc/datascience/operators
106+
107+
USER datascience

0 commit comments

Comments
 (0)