Skip to content

Commit 5fe617c

Browse files
committed
Add docstrings.
1 parent b09f843 commit 5fe617c

File tree

1 file changed

+100
-0
lines changed

1 file changed

+100
-0
lines changed

ads/jobs/builders/runtimes/pytorch_runtime.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,23 +48,75 @@ def with_git(
4848

4949
@property
5050
def git(self) -> str:
51+
"""The specification for source code from Git repository."""
5152
return self.get_spec(self.CONST_GIT)
5253

5354
def with_inputs(self, mappings: dict):
55+
"""Specifies the input files to be copied into the job run.
56+
57+
Parameters
58+
----------
59+
mappings : dict
60+
Each key is the source path (uri). It can be http/ftp link or OCI object storage URI.
61+
The corresponding value is the destination path in the job run, relative to the working directory.
62+
63+
Returns
64+
-------
65+
self
66+
The runtime instance.
67+
68+
Examples
69+
--------
70+
>>> pt_runtime.with_inputs({"oci://bucket@namespace/path/to/file.txt": "data/input.txt"})
71+
72+
"""
5473
return self.set_spec(self.CONST_INPUT, mappings)
5574

5675
@property
5776
def inputs(self) -> dict:
77+
"""The input files to be copied into the job run."""
5878
return self.get_spec(self.CONST_INPUT)
5979

6080
def with_replica(self, count: int):
81+
"""Specifies the number of nodes (job runs) for the job.
82+
83+
Parameters
84+
----------
85+
count : int
86+
Number of nodes (job runs)
87+
88+
Returns
89+
-------
90+
self
91+
The runtime instance.
92+
"""
6193
return self.set_spec(self.CONST_REPLICA, count)
6294

6395
@property
6496
def replica(self) -> int:
97+
"""The number of nodes (job runs)."""
6598
return self.get_spec(self.CONST_REPLICA)
6699

67100
def with_dependency(self, pip_req=None, pip_pkg=None):
101+
"""Specifies additional dependencies to be installed using pip.
102+
103+
Parameters
104+
----------
105+
pip_req : str, optional
106+
Path of the requirements.txt file, relative to the working directory, by default None
107+
pip_pkg : str, optional
108+
Command line args for `pip install`, by default None.
109+
Packages with version specification needs to be quoted.
110+
111+
Returns
112+
-------
113+
self
114+
The runtime instance.
115+
116+
Examples
117+
--------
118+
>>> pt_runtime.with_dependency('"package>1.0"')
119+
"""
68120
dep = {}
69121
if pip_req:
70122
dep[self.CONST_PIP_REQ] = pip_req
@@ -76,24 +128,72 @@ def with_dependency(self, pip_req=None, pip_pkg=None):
76128

77129
@property
78130
def dependencies(self) -> dict:
131+
"""Additional pip dependencies."""
79132
return self.get_spec(self.CONST_DEP)
80133

81134
def with_command(self, command: str, use_deepspeed=False):
135+
"""Specifies the command for launching the workload.
136+
137+
Parameters
138+
----------
139+
command : str
140+
The command for launching the workload.
141+
The command should start with `torchrun`, `deepspeed` or `accelerate launch`.
142+
143+
For `torchrun`,
144+
ADS will set `--nnode`, `--nproc_per_node`, `--rdzv_backend` and `--rdzv_endpoint` automatically.
145+
The default `rdzv_backend` will be `c10d`.
146+
The default port for `rdzv_endpoint` is 29400
147+
148+
For `deepspeed`,
149+
ADS will generate the hostfile automatically and setup the SSH configurations.
150+
151+
For `accelerate launch`
152+
You can add your config YAML to the source code and specify it using `--config_file` argument.
153+
In your config, please use `LOCAL_MACHINE` as the compute environment.
154+
The same config file will be used by all nodes in multi-node workload.
155+
ADS will set `--num_processes`, `--num_machines`, `--machine_rank`, `--main_process_ip`
156+
and `--main_process_port` automatically. These values will override the ones from your config YAML.
157+
The default `main_process_port` is 29400
158+
159+
If you don't want to use the options set by ADS automatically,
160+
you can specify them explicitly in the command.
161+
162+
use_deepspeed : bool, optional
163+
Indicate whether to configure deepspeed for multi-node workload, by default False.
164+
If your command starts with "deepspeed" or contains the argument "--use_deepspeed",
165+
your job runs will be configured for deepspeed regardless of this setting.
166+
Make sure to set use_deepspeed to `True` here
167+
if you are using `accelerate launch` with deepspeed setting in config YAML.
168+
169+
Returns
170+
-------
171+
self
172+
The runtime instance.
173+
174+
Examples
175+
--------
176+
>>> pt_runtime.with_command("torchrun train.py")
177+
"""
82178
if use_deepspeed:
83179
self.set_spec(self.CONST_DEEPSPEED, True)
84180
return self.set_spec(self.CONST_COMMAND, command)
85181

86182
@property
87183
def command(self):
184+
"""The command for launching the workload."""
88185
return self.get_spec(self.CONST_COMMAND)
89186

90187
@property
91188
def use_deepspeed(self):
189+
"""Indicate whether whether to configure deepspeed for multi-node workload"""
92190
if self.get_spec(self.CONST_DEEPSPEED):
93191
return True
94192
return False
95193

96194
def run(self, dsc_job, **kwargs):
195+
"""Starts the job runs
196+
"""
97197
replicas = self.replica if self.replica else 1
98198
main_run = None
99199
for i in range(replicas):

0 commit comments

Comments
 (0)