|
1736 | 1736 | } |
1737 | 1737 | } |
1738 | 1738 | }, |
| 1739 | + "opt_2.7b_rtn_gpu": { |
| 1740 | + "working_dir": "huggingface/pytorch/text-generation/quantization", |
| 1741 | + "hf_model_name": "facebook/opt-2.7b", |
| 1742 | + "tune": { |
| 1743 | + "cmd": "python run_generation_gpu_woq.py", |
| 1744 | + "params": { |
| 1745 | + "model": "facebook/opt-2.7b", |
| 1746 | + "output_dir": "saved_results", |
| 1747 | + "weight_dtype": "int4_fullrange" |
| 1748 | + } |
| 1749 | + }, |
| 1750 | + "benchmark": { |
| 1751 | + "cmd": "python run_generation_gpu_woq.py", |
| 1752 | + "params": { |
| 1753 | + "model": "/tf_dataset2/models/nlp_toolkit/opt_rtn", |
| 1754 | + "mode": "benchmark", |
| 1755 | + "output_dir": "saved_results" |
| 1756 | + } |
| 1757 | + } |
| 1758 | + }, |
| 1759 | + "chatglm3_6b_rtn_gpu": { |
| 1760 | + "working_dir": "huggingface/pytorch/text-generation/quantization", |
| 1761 | + "hf_model_name": "THUDM/chatglm3-6b", |
| 1762 | + "tune": { |
| 1763 | + "cmd": "python run_generation_gpu_woq.py", |
| 1764 | + "params": { |
| 1765 | + "model": "THUDM/chatglm3-6b", |
| 1766 | + "output_dir": "saved_results", |
| 1767 | + "weight_dtype": "int4_fullrange" |
| 1768 | + } |
| 1769 | + }, |
| 1770 | + "benchmark": { |
| 1771 | + "cmd": "python run_generation_gpu_woq.py", |
| 1772 | + "params": { |
| 1773 | + "model": "/tf_dataset2/models/nlp_toolkit/chatglm3_rtn", |
| 1774 | + "mode": "benchmark", |
| 1775 | + "output_dir": "saved_results" |
| 1776 | + } |
| 1777 | + } |
| 1778 | + }, |
| 1779 | + "bloom_7b1_rtn_gpu": { |
| 1780 | + "working_dir": "huggingface/pytorch/text-generation/quantization", |
| 1781 | + "hf_model_name": "bigscience/bloom-7b1", |
| 1782 | + "tune": { |
| 1783 | + "cmd": "python run_generation_gpu_woq.py", |
| 1784 | + "params": { |
| 1785 | + "model": "bigscience/bloom-7b1", |
| 1786 | + "output_dir": "saved_results", |
| 1787 | + "weight_dtype": "int4_fullrange" |
| 1788 | + } |
| 1789 | + }, |
| 1790 | + "benchmark": { |
| 1791 | + "cmd": "python run_generation_gpu_woq.py", |
| 1792 | + "params": { |
| 1793 | + "model": "/tf_dataset2/models/nlp_toolkit/bloom_rtn", |
| 1794 | + "mode": "benchmark", |
| 1795 | + "output_dir": "saved_results" |
| 1796 | + } |
| 1797 | + } |
| 1798 | + }, |
| 1799 | + "baichuan_7b_rtn_gpu": { |
| 1800 | + "working_dir": "huggingface/pytorch/text-generation/quantization", |
| 1801 | + "hf_model_name": "baichuan-inc/Baichuan-7B", |
| 1802 | + "tune": { |
| 1803 | + "cmd": "python run_generation_gpu_woq.py", |
| 1804 | + "params": { |
| 1805 | + "model": "baichuan-inc/Baichuan-7B", |
| 1806 | + "output_dir": "saved_results", |
| 1807 | + "weight_dtype": "int4_fullrange" |
| 1808 | + } |
| 1809 | + }, |
| 1810 | + "benchmark": { |
| 1811 | + "cmd": "python run_generation_gpu_woq.py", |
| 1812 | + "params": { |
| 1813 | + "model": "/tf_dataset2/models/nlp_toolkit/baichuan_rtn", |
| 1814 | + "mode": "benchmark", |
| 1815 | + "output_dir": "saved_results" |
| 1816 | + } |
| 1817 | + } |
| 1818 | + }, |
| 1819 | + "baichuan2_7b_rtn_gpu": { |
| 1820 | + "working_dir": "huggingface/pytorch/text-generation/quantization", |
| 1821 | + "hf_model_name": "baichuan-inc/Baichuan2-7B-Chat", |
| 1822 | + "tune": { |
| 1823 | + "cmd": "python run_generation_gpu_woq.py", |
| 1824 | + "params": { |
| 1825 | + "model": "baichuan-inc/Baichuan2-7B-Chat", |
| 1826 | + "output_dir": "saved_results", |
| 1827 | + "weight_dtype": "int4_fullrange" |
| 1828 | + } |
| 1829 | + }, |
| 1830 | + "benchmark": { |
| 1831 | + "cmd": "python run_generation_gpu_woq.py", |
| 1832 | + "params": { |
| 1833 | + "model": "/tf_dataset2/models/nlp_toolkit/baichuan2_rtn", |
| 1834 | + "mode": "benchmark", |
| 1835 | + "output_dir": "saved_results" |
| 1836 | + } |
| 1837 | + } |
| 1838 | + }, |
| 1839 | + "codestral_rtn_gpu": { |
| 1840 | + "working_dir": "huggingface/pytorch/text-generation/quantization", |
| 1841 | + "hf_model_name": "mistralai/Codestral-22B-v0.1", |
| 1842 | + "tune":{ |
| 1843 | + "cmd": "python run_generation_gpu_woq.py", |
| 1844 | + "params": { |
| 1845 | + "model": "mistralai/Codestral-22B-v0.1", |
| 1846 | + "output_dir": "saved_results", |
| 1847 | + "weight_dtype": "int4_fullrange" |
| 1848 | + } |
| 1849 | + }, |
| 1850 | + "benchmark": { |
| 1851 | + "cmd": "python run_generation_gpu_woq.py", |
| 1852 | + "params": { |
| 1853 | + "model": "/tf_dataset2/models/nlp_toolkit/codestral_rtn", |
| 1854 | + "mode": "benchmark", |
| 1855 | + "output_dir": "saved_results" |
| 1856 | + } |
| 1857 | + } |
| 1858 | + }, |
1739 | 1859 | "mistral_7b_rtn_gpu": { |
1740 | 1860 | "working_dir": "huggingface/pytorch/text-generation/quantization", |
1741 | 1861 | "hf_model_name": "mistralai/Mistral-7B-v0.1", |
|
0 commit comments