@@ -25,16 +25,32 @@ def get_onnx_model_path(
2525 local_model_dir : Optional [str ] = None ,
2626):
2727 """
28- exports the model to onnx if pre-exported file is not found and returns onnx_model_path
29-
30- ``Mandatory`` Args:
31- :model_name (str): Hugging Face Model Card name, Example: ``gpt2``.
32- ``Optional`` Args:
33- :cache_dir (str): Cache dir where downloaded HuggingFace files are stored. ``Defaults to None.``
34- :tokenizer (Union[PreTrainedTokenizer, PreTrainedTokenizerFast]): Pass model tokenizer. ``Defaults to None.``
35- :hf_token (str): HuggingFace login token to access private repos. ``Defaults to None.``
36- :local_model_dir (str): Path to custom model weights and config files. ``Defaults to None.``
37- :full_batch_size (int): Set full batch size to enable continuous batching mode. ``Defaults to None.``
28+ Exports the PyTorch model to ONNX format if a pre-exported file is not found,
29+ and returns the path to the ONNX model.
30+
31+ This function loads a Hugging Face model via QEFFCommonLoader, then calls
32+ its export method to generate the ONNX graph.
33+
34+ Parameters
35+ ----------
36+ model_name : str
37+ Hugging Face Model Card name (e.g., ``gpt2``).
38+
39+ Other Parameters
40+ ----------------
41+ cache_dir : str, optional
42+ Cache directory where downloaded HuggingFace files are stored. Default is None.
43+ hf_token : str, optional
44+ HuggingFace login token to access private repositories. Default is None.
45+ full_batch_size : int, optional
46+ Sets the full batch size to enable continuous batching mode. Default is None.
47+ local_model_dir : str, optional
48+ Path to custom model weights and config files. Default is None.
49+
50+ Returns
51+ -------
52+ str
53+ Path of the generated ONNX graph file.
3854 """
3955 logger .info (f"Exporting Pytorch { model_name } model to ONNX..." )
4056
@@ -58,20 +74,35 @@ def main(
5874 full_batch_size : Optional [int ] = None ,
5975) -> None :
6076 """
61- Helper function used by export CLI app for exporting to ONNX Model.
62-
63- ``Mandatory`` Args:
64- :model_name (str): Hugging Face Model Card name, Example: ``gpt2``.
65-
66- ``Optional`` Args:
67- :cache_dir (str): Cache dir where downloaded HuggingFace files are stored. ``Defaults to None.``
68- :hf_token (str): HuggingFace login token to access private repos. ``Defaults to None.``
69- :local_model_dir (str): Path to custom model weights and config files. ``Defaults to None.``
70- :full_batch_size (int): Set full batch size to enable continuous batching mode. ``Defaults to None.``
77+ Main function for the QEfficient ONNX export CLI application.
78+
79+ This function serves as the entry point for exporting a PyTorch model, loaded
80+ via QEFFCommonLoader, to the ONNX format. It prepares the necessary
81+ paths and calls `get_onnx_model_path`.
82+
83+ Parameters
84+ ----------
85+ model_name : str
86+ Hugging Face Model Card name (e.g., ``gpt2``).
87+
88+ Other Parameters
89+ ----------------
90+ cache_dir : str, optional
91+ Cache directory where downloaded HuggingFace files are stored. Default is None.
92+ hf_token : str, optional
93+ HuggingFace login token to access private repositories. Default is None.
94+ local_model_dir : str, optional
95+ Path to custom model weights and config files. Default is None.
96+ full_batch_size : int, optional
97+ Sets the full batch size to enable continuous batching mode. Default is None.
98+
99+ Example
100+ -------
101+ To export a model from the command line:
71102
72103 .. code-block:: bash
73104
74- python -m QEfficient.cloud.export OPTIONS
105+ python -m QEfficient.cloud.export --model-name gpt2 --cache-dir /path/to/cache
75106
76107 """
77108 cache_dir = check_and_assign_cache_dir (local_model_dir , cache_dir )
0 commit comments