Completes OPEN-4820 Add column with timestamp for each prediction for model runners

gustavocidornelas · whoseoyster · commit c80e41c5917f · 2023-08-23T00:41:43.000-07:00
diff --git a/examples/llms/general-llm/product-names.ipynb b/examples/llms/general-llm/product-names.ipynb
@@ -356,7 +356,7 @@
    "outputs": [],
    "source": [
     "# There are costs in running this cell!\n",
-    "dataset[\"model_output\"] = llm_runner.run(dataset)"
+    "dataset[\"model_output\"] = llm_runner.run(dataset)[\"predictions\"]"
    ]
   },
   {
diff --git a/examples/llms/ner/entity-extraction.ipynb b/examples/llms/ner/entity-extraction.ipynb
@@ -389,7 +389,7 @@
    "outputs": [],
    "source": [
     "# There are costs in running this cell!\n",
-    "dataset[\"model_output\"] = llm_runner.run(dataset)"
+    "dataset[\"model_output\"] = llm_runner.run(dataset)[\"predictions\"]"
    ]
   },
   {
diff --git a/openlayer/model_runners/ll_model_runners.py b/openlayer/model_runners/ll_model_runners.py
@@ -4,6 +4,7 @@
 """
 
 import logging
+import time
 import warnings
 from abc import ABC, abstractmethod
 from typing import Any, Dict, List, Optional, Union
@@ -67,10 +68,11 @@ def _run_in_memory(
     ) -> pd.DataFrame:
         """Runs the input data through the model in memory."""
         self.logger.info("Running LLM in memory...")
+
         model_outputs = []
+        timestamps = []
         run_exceptions = set()
         run_cost = 0
-
         for input_data_row in input_data_df.iterrows():
             # Check if output column already has a value to avoid re-running
             if (
@@ -96,6 +98,7 @@ def _run_in_memory(
             except Exception as exc:
                 model_outputs.append(None)
                 run_exceptions.add(exc)
+            timestamps.append(time.time())
 
         self.logger.info("Successfully ran data through the model!")
 
@@ -110,7 +113,7 @@ def _run_in_memory(
             )
         self.cost_estimates.append(run_cost)
 
-        return pd.DataFrame({"predictions": model_outputs})
+        return pd.DataFrame({"predictions": model_outputs, "timestamps": timestamps})
 
     def _inject_prompt(self, input_variables_dict: dict) -> List[Dict[str, str]]:
         """Injects the input variables into the prompt template.
diff --git a/openlayer/model_runners/traditional_ml_model_runners.py b/openlayer/model_runners/traditional_ml_model_runners.py
@@ -7,6 +7,7 @@
 import os
 import shutil
 import tempfile
+import time
 from abc import ABC, abstractmethod
 
 import pandas as pd
@@ -71,6 +72,7 @@ def _run_in_conda(self, input_data: pd.DataFrame) -> pd.DataFrame:
             output_data = pd.read_csv(f"{temp_dir}/output_data.csv")
 
             output_data = self._post_process_output(output_data)
+            output_data["timestamps"] = time.time()
 
         return output_data
 

Original file line number	Diff line number	Diff line change
`@@ -356,7 +356,7 @@`
`356`	`356`	`"outputs": [],`
`357`	`357`	`"source": [`
`358`	`358`	`"# There are costs in running this cell!\n",`
`359`		`- "dataset[\"model_output\"] = llm_runner.run(dataset)"`
	`359`	`+ "dataset[\"model_output\"] = llm_runner.run(dataset)[\"predictions\"]"`
`360`	`360`	`]`
`361`	`361`	`},`
`362`	`362`	`{`
Original file line number	Diff line number	Diff line change
`@@ -389,7 +389,7 @@`
`389`	`389`	`"outputs": [],`
`390`	`390`	`"source": [`
`391`	`391`	`"# There are costs in running this cell!\n",`
`392`		`- "dataset[\"model_output\"] = llm_runner.run(dataset)"`
	`392`	`+ "dataset[\"model_output\"] = llm_runner.run(dataset)[\"predictions\"]"`
`393`	`393`	`]`
`394`	`394`	`},`
`395`	`395`	`{`