Skip to content

Commit 319f000

Browse files
authored
Merge pull request #87 from NimbleBoxAI/v1
0.13.0rc18
2 parents 9d03779 + 697d08f commit 319f000

37 files changed

+551
-4205
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,6 @@ notebooks/
146146
# not need to add / in symlink
147147
.nbx
148148
ex_jobs
149-
scripts
149+
scripts
150+
.nboxignore
151+
stories/requirements.txt

README.md

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,32 @@ Version](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%2
66

77
## 🧐 What is Nbox?
88

9-
`nbox` provides first class API support for all NimbleBox.ai infrastructure (NBX-Build, Jobs, Deploy) and services (NBX-Workspaces) components. Write jobs using `nbox.Operators`
9+
`nbox` provides first class CLI + python package support for all NimbleBox infrastructure and services. You can
1010

11-
## Installation
1211

1312
```bash
1413
# on macos find the correct wheel file based on python version: https://github.com/pietrodn/grpcio-mac-arm-build/releases/tag/1.51.1
1514
pip install <wheel_url>
16-
pip install nbox
1715

18-
# on linux
1916
pip install nbox
2017
```
2118

19+
Next you need to authenticate yourself with the CLI:
20+
21+
```bash
22+
nbx login
23+
```
24+
2225
# Stability and Compatibility
2326

2427
Status: The library is currently undergoing heavy development.
2528

26-
☝️ Important Note: Current major version is zero (v0.x.x) to accommodate rapid development and fast iteration while getting early feedback from users (feedback on APIs are appreciated!). **The public API need not change** without a major version update before v1.0.0 release.
29+
- `nbx projects`:
30+
- `nbx projects - artifacts --help` stable ✅
31+
- `nbx projects - run --help` stable ✅
32+
- `nbx jobs --help`: mostly stable 🟡
33+
- `nbx serve --help`: mostly stable 🟡
34+
2735

2836
# 🤷Why NimbleBox
2937

nbox/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from nbox.jobs import Job, Serve, Schedule
99
from nbox.instance import Instance
1010
from nbox.relics import Relics
11-
from nbox.lmao import Lmao, LmaoLive
1211
from nbox.network import zip_to_nbox_folder
1312
from nbox.version import __version__
1413
from nbox.hyperloop.common.common_pb2 import Resource

nbox/cli.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,11 @@
2626
from nbox.instance import Instance
2727
from nbox.sub_utils.ssh import tunnel
2828
from nbox.relics import Relics
29-
from nbox.lmao import LmaoCLI
3029
from nbox.lmao_v4 import LmaoCLI as Lmaov4CLI
3130
from nbox.version import __version__ as V
3231
from nbox.projects import Project
3332
from nbox.utils import logger, lo
34-
from nbox.plugins.base import PluginCLI
33+
# from nbox.plugins.base import PluginCLI
3534

3635
# from nbox.jobs import Job, Serve
3736
from nbox.jd_core import JobsCli, ServeCli
@@ -181,16 +180,13 @@ def main():
181180
"build" : Instance,
182181
"config" : Config,
183182
"get" : get,
184-
# "jobs" : Job,
185183
"jobs" : JobsCli,
186-
# "lmao" : LmaoCLI,
187184
"lmao" : Lmaov4CLI,
188185
"login" : login,
189186
"open" : open_home,
190-
"plugins" : PluginCLI,
187+
# "plugins" : PluginCLI,
191188
"projects" : Project,
192189
"relics" : Relics,
193-
# "serve" : Serve,
194190
"serve" : ServeCli,
195191
"tunnel" : tunnel,
196192
"version" : version,

nbox/instance.py

Lines changed: 48 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -83,21 +83,25 @@ class Instance():
8383
status = staticmethod(print_status)
8484

8585
# each instance has a lot of data against it, we need to store only a few as attributes
86-
useful_keys = ["project_id", "project_name", "size_used", "size", "state"]
86+
useful_keys = ["instance_id", "project_name", "size_used", "size", "state"]
8787

8888
def __init__(self, i: str, *, workspace_id: str = ""):
8989
"""NBX-Build Instance class manages the both individual instance, but provides webserver functionality using
9090
`nbox_ws_v1`, such as starting and stopping, deletion and more.
9191
9292
Args:
93-
i (str): name or `project_id` of the instance
93+
i (str): name or `instance_id` of the instance
9494
"""
9595
if not i:
9696
raise ValueError("Instance id must be provided, try --i='1023'")
97+
98+
# if user provided a number we assume that they gave an instance ID, this is a weak assumption because
99+
# people usually use names.
100+
_instance_id = isinstance(i, int)
97101
i = str(i)
98102

99103
# simply add useful keys to the instance
100-
self.project_id: str = None
104+
self.instance_id: str = None
101105
self.project_name: str = None
102106
self.workspace_id: str = workspace_id or secret.workspace_id
103107
self.size_used: float = None
@@ -113,31 +117,36 @@ def __init__(self, i: str, *, workspace_id: str = ""):
113117
stub_ws_instance = create_webserver_subway("v1", sess)
114118
stub_projects = stub_ws_instance.instances
115119

116-
# filter and get the data
117-
project_details = stub_projects()["project_details"]
118-
# print(project_details)
119-
if i not in project_details:
120-
by_name = list(filter(lambda x: x[1]['project_name'] == i, list(project_details.items())))
121-
if len(by_name) == 0:
122-
raise ValueError(f"Instance '{i}' not found")
123-
elif len(by_name) > 1:
124-
raise ValueError(f"Multiple instances with name '{i}' found")
125-
data = by_name[0]
126-
project_id = data[0]
127-
data = data[1]
120+
if _instance_id:
121+
# if user provided an instance id, we can directly get the data
122+
data = stub_projects.u(i)()
123+
instance_id = i
128124
else:
129-
data = project_details[i]
130-
project_id = i
131-
data["project_id"] = project_id
132-
logger.info(f"Found instance '{data['project_name']}' ({data['project_id']})")
125+
# else filter and get the data
126+
project_details = stub_projects()["project_details"]
127+
if i not in project_details:
128+
by_name = list(filter(lambda x: x[1]['project_name'] == i, list(project_details.items())))
129+
if len(by_name) == 0:
130+
raise ValueError(f"Instance '{i}' not found")
131+
elif len(by_name) > 1:
132+
raise ValueError(f"Multiple instances with name '{i}' found")
133+
data = by_name[0]
134+
instance_id = data[0]
135+
data = data[1]
136+
else:
137+
data = project_details[i]
138+
instance_id = i
139+
140+
data["instance_id"] = instance_id
141+
logger.info(f"Found instance '{data['project_name']}' ({data['instance_id']})")
133142
# print(data)
134-
for x in self.useful_keys:
143+
for x in Instance.useful_keys:
135144
setattr(self, x, data[x])
136145

137146
# some data points require extra processing before usage
138147
self.custom_ports: Dict[str, int] = loads(data["custom_ports"]) if data["custom_ports"] is not None else {}
139148
self.exposed_ports: Dict[str, int] = loads(data["exposed_ports"]) if data["exposed_ports"] is not None else {}
140-
self.stub_ws_instance = stub_projects.u(self.project_id)
149+
self.stub_ws_instance = stub_projects.u(self.instance_id)
141150
logger.debug(f"WS: {self.stub_ws_instance}")
142151

143152
# set values
@@ -257,13 +266,13 @@ def is_running(self) -> bool:
257266

258267
def refresh(self):
259268
"""Update the data, get latest state"""
260-
self.data = self.stub_ws_instance() # GET /user/projects/{project_id}
269+
self.data = self.stub_ws_instance() # GET /user/projects/{instance_id}
261270
for k in self.useful_keys:
262271
setattr(self, k, self.data[k])
263272

264273
def _start(self, cpu, gpu, gpu_count, auto_shutdown, dedicated_hw, zone):
265274
"""Turn on the the unserlying compute"""
266-
logger.info(f"Starting instance {self.project_name} ({self.project_id})")
275+
logger.info(f"Starting instance {self.project_name} ({self.instance_id})")
267276
hw_config = {
268277
"cpu":f"n1-standard-{cpu}"
269278
}
@@ -283,21 +292,21 @@ def _start(self, cpu, gpu, gpu_count, auto_shutdown, dedicated_hw, zone):
283292
region = zone
284293
)
285294

286-
logger.info(f"Waiting for instance {self.project_name} ({self.project_id}) to start ...")
295+
logger.info(f"Waiting for instance {self.project_name} ({self.instance_id}) to start ...")
287296
_i = 0
288297
while self.state != "RUNNING":
289298
time.sleep(5)
290299
self.refresh()
291300
_i += 1
292301
if _i > TIMEOUT_CALLS:
293302
raise TimeoutError("Instance did not start within timeout, please check dashboard")
294-
logger.info(f"Instance {self.project_name} ({self.project_id}) started")
303+
logger.info(f"Instance {self.project_name} ({self.instance_id}) started")
295304

296305
def _open(self):
297306
# now the instance is running, we can open it, opening will assign a bunch of cookies and
298307
# then get us the exact location of the instance
299308
if not self.__opened:
300-
logger.debug(f"Opening instance {self.project_name} ({self.project_id})")
309+
logger.debug(f"Opening instance {self.project_name} ({self.instance_id})")
301310
launch_data = self.stub_ws_instance.launch(_method = "post")
302311
base_domain = launch_data['base_domain']
303312
self.open_data = {
@@ -340,7 +349,7 @@ def start(
340349
self._start(cpu, gpu, gpu_count, auto_shutdown, dedicated_hw, zone)
341350
else:
342351
# TODO: @yashbonde: inform user in case of hardware mismatch?
343-
logger.info(f"Instance {self.project_name} ({self.project_id}) is already running")
352+
logger.info(f"Instance {self.project_name} ({self.instance_id}) is already running")
344353

345354
# prevent rate limiting
346355
if not self.__opened:
@@ -349,31 +358,34 @@ def start(
349358
def stop(self):
350359
"""Stop the Instance"""
351360
if self.state == "STOPPED":
352-
logger.info(f"Instance {self.project_name} ({self.project_id}) is already stopped")
361+
logger.info(f"Instance {self.project_name} ({self.instance_id}) is already stopped")
353362
return
354363

355-
logger.debug(f"Stopping instance {self.project_name} ({self.project_id})")
356-
message = self.stub_ws_instance.stop_instance("post", data = {"instance_id":self.project_id})["msg"]
364+
logger.debug(f"Stopping instance {self.project_name} ({self.instance_id})")
365+
message = self.stub_ws_instance.stop(
366+
"post",
367+
data = {"workspace_id": secret.workspace_id, "instance_id": self.instance_id}
368+
)["msg"]
357369
if not message == "success":
358370
raise ValueError(message)
359371

360-
logger.debug(f"Waiting for instance {self.project_name} ({self.project_id}) to stop")
372+
logger.debug(f"Waiting for instance {self.project_name} ({self.instance_id}) to stop")
361373
_i = 0 # timeout call counter
362374
while self.state != "STOPPED":
363375
time.sleep(5)
364376
self.refresh()
365377
_i += 1
366378
if _i > TIMEOUT_CALLS:
367379
raise TimeoutError("Instance did not stop within timeout, please check dashboard")
368-
logger.debug(f"Instance {self.project_name} ({self.project_id}) stopped")
380+
logger.debug(f"Instance {self.project_name} ({self.instance_id}) stopped")
369381

370382
self.__opened = False
371383

372384
def delete(self, force = False):
373385
"""With great power comes great responsibility."""
374386
if self.__opened and not force:
375387
raise ValueError("Instance is still opened, please call .stop() first")
376-
logger.warning(f"Deleting instance {self.project_name} ({self.project_id})")
388+
logger.warning(f"Deleting instance {self.project_name} ({self.instance_id})")
377389
if input(f"> Are you sure you want to delete '{self.project_name}'? (y/N): ") == "y":
378390
self.stub_ws_instance("delete")
379391
else:
@@ -390,8 +402,8 @@ def delete(self, force = False):
390402
def _unopened_error(self):
391403
if not self.__opened:
392404
logger.error(f"You are trying to move files to a {self.state} instance, you will have to start the instance first:")
393-
logger.error(f' - nbox.Instance("{self.project_id}", "{self.workspace_id}").start(...)')
394-
logger.error(f' - python3 -m nbox build --i "{self.project_id}" --workspace_id "{self.workspace_id}" start --help')
405+
logger.error(f' - nbox.Instance("{self.instance_id}", "{self.workspace_id}").start(...)')
406+
logger.error(f' - python3 -m nbox build --i "{self.instance_id}" --workspace_id "{self.workspace_id}" start --help')
395407
raise ValueError("Instance is not opened, please call .open() first")
396408

397409
def __create_connection(self, *, port: int = 6174):
@@ -401,7 +413,7 @@ def __create_connection(self, *, port: int = 6174):
401413
# create logging for RSock
402414
folder = U.join(U.env.NBOX_HOME_DIR(), "tunnel_logs")
403415
os.makedirs(folder, exist_ok=True)
404-
filepath = U.join(folder, f"tunnel_{self.project_id}.log") # consistency with IDs instead of names
416+
filepath = U.join(folder, f"tunnel_{self.instance_id}.log") # consistency with IDs instead of names
405417
file_logger = FileLogger(filepath)
406418
logger.debug(f"Logging RSock server to {filepath}")
407419

nbox/jd_core/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from nbox.jd_core.schedule import Schedule
2-
from nbox.jd_core.jobs import Job, _get_job_data, get_job_list
2+
from nbox.jd_core.jobs import Job, _get_job_data, print_job_list, new_job
33
from nbox.jd_core.serving import Serve, _get_deployment_data, print_serving_list
44
from nbox.jd_core.upload import upload_job_folder
55
from nbox.jd_core.cli import JobsCli, ServeCli

nbox/jd_core/cli.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
from functools import partial
22

33
from nbox.jd_core.upload import upload_job_folder
4-
from nbox.jd_core.jobs import get_job_list, Job
4+
from nbox.jd_core.jobs import print_job_list, Job, new_job, get_job_list
55
from nbox.jd_core.serving import print_serving_list, Serve
66

77
JobsCli = {
8-
"status": get_job_list,
8+
"status": print_job_list,
9+
"list": print_job_list,
910
"upload": partial(upload_job_folder, "job"),
10-
"get": Job,
11+
"pick": Job,
12+
"new": new_job,
1113
}
1214

1315

1416
ServeCli = {
1517
"status": print_serving_list,
1618
"upload": partial(upload_job_folder, "serving"),
17-
"get": Serve,
19+
"pick": Serve,
1820
}

0 commit comments

Comments
 (0)