1111import dspy
1212from dspy .clients .cache import request_cache
1313from dspy .clients .openai import OpenAIProvider
14- from dspy .clients .provider import Provider , TrainingJob
14+ from dspy .clients .provider import Provider , TrainingJob , ReinforceJob
1515from dspy .clients .utils_finetune import TrainDataFormat
1616from dspy .dsp .utils .settings import settings
1717from dspy .utils .callback import BaseCallback
@@ -188,10 +188,6 @@ def finetune(
188188 ) -> TrainingJob :
189189 from dspy import settings as settings
190190
191- err = "Fine-tuning is an experimental feature."
192- err += " Set `dspy.settings.experimental` to `True` to use it."
193- assert settings .experimental , err
194-
195191 err = f"Provider { self .provider } does not support fine-tuning."
196192 assert self .provider .finetunable , err
197193
@@ -212,6 +208,17 @@ def thread_function_wrapper():
212208
213209 return job
214210
211+ def reinforce (self , train_kwargs ) -> ReinforceJob :
212+ # TODO(GRPO Team): Should we return an initialized job here?
213+ from dspy import settings as settings
214+
215+ err = f"Provider { self .provider } does not implement the reinforcement learning interface."
216+ assert self .provider .reinforceable , err
217+
218+ job = self .provider .ReinforceJob (lm = self , train_kwargs = train_kwargs )
219+ job .initialize ()
220+ return job
221+
215222 def _run_finetune_job (self , job : TrainingJob ):
216223 # TODO(enhance): We should listen for keyboard interrupts somewhere.
217224 # Requires TrainingJob.cancel() to be implemented for each provider.
0 commit comments