projectmesa
diff --git a/‎mesa_frames/concrete/datacollector.py‎
Lines changed: 83 additions & 93 deletions b/‎mesa_frames/concrete/datacollector.py‎
Lines changed: 83 additions & 93 deletions
@@ -54,7 +54,6 @@ def step(self):
             self.dc.flush()
 """
 
-from unittest import result
 import polars as pl
 import boto3
 from urllib.parse import urlparse
@@ -65,14 +64,15 @@ def step(self):
 from collections.abc import Callable
 from mesa_frames import Model
 from psycopg2.extensions import connection
+import logging
 
 
 class DataCollector(AbstractDataCollector):
     def __init__(
         self,
         model: Model,
         model_reporters: dict[str, Callable] | None = None,
-        agent_reporters: dict[str, str | Callable] | None = None,
+        agent_reporters: dict[str, str] | None = None,
         trigger: Callable[[Any], bool] | None = None,
         reset_memory: bool = True,
         storage: Literal[
@@ -106,6 +106,14 @@ def __init__(
         max_worker : int
             Maximum number of worker threads used for flushing collected data asynchronously
         """
+        if agent_reporters:
+            for key, value in agent_reporters.items():
+                if not isinstance(value, str):
+                    raise TypeError(
+                        f"Agent reporter for '{key}' must be a string (the column name), "
+                        f"not a {type(value)}. Callable reporters are not supported for agents."
+                    )
+        
         super().__init__(
             model=model,
             model_reporters=model_reporters,
@@ -173,99 +181,73 @@ def _collect_model_reporters(self, current_model_step: int, batch_id: int):
 
     def _collect_agent_reporters(self, current_model_step: int, batch_id: int):
         """
-        Collect agent-level data using the agent_reporters, including unique agent IDs.
+        Collect agent-level data using the agent_reporters.
 
-        Constructs a LazyFrame with one column per reporter and includes:
-        - agent_id : unique identifier for each agent
-        - step, seed, and batch columns for context
-        - Columns for all requested agent reporters
+        This method iterates through all AgentSets in the model, selects the
+        `unique_id` and the requested reporter columns from each AgentSet's
+        DataFrame, adds an `agent_type` column, and concatenates them
+        into a single "long" format LazyFrame.
         """
         all_agent_frames = []
+        reporter_map = self._agent_reporters
+
+        try:
+            agent_sets_list = self._model.sets._agentsets
+        except AttributeError:
+            logging.error(
+                "DataCollector could not find '_agentsets' attribute on model.sets. "
+                "Agent data collection will be skipped."
+            )
+            return
 
-        for col_name, reporter in self._agent_reporters.items():
-            if isinstance(reporter, str):
-                agent_set = self._model.sets[reporter]
+        for agent_set in agent_sets_list:
+            if not hasattr(agent_set, "df"):
+                logging.warning(
+                    f"AgentSet {agent_set.__class__.__name__} has no 'df' attribute. Skipping."
+                )
+                continue
 
-                if hasattr(agent_set, "df"):
-                    df = agent_set.df.select(["id", col_name]).rename(
-                        {"id": "agent_id"}
-                    )
-                elif hasattr(agent_set, "to_polars"):
-                    df = (
-                        agent_set.to_polars()
-                        .select(["id", col_name])
-                        .rename({"id": "agent_id"})
-                    )
-                else:
-                    records = []
-                    for agent in agent_set.values():
-                        agent_id = getattr(
-                            agent, "unique_id", getattr(agent, "id", None)
-                        )
-                        records.append(
-                            {
-                                "agent_id": agent_id,
-                                col_name: getattr(agent, col_name, None),
-                            }
-                        )
-                    df = pl.DataFrame(records)
+            agent_df = agent_set.df.lazy()
+            agent_type = agent_set.__class__.__name__
+            available_cols = agent_df.columns
 
-            else:
-                result = reporter(self._model)
-
-                ## Case 1: already a DataFrame
-                if isinstance(result, pl.DataFrame):
-                    df = result
-                ## Case 2: dict or list -> convert
-                elif isinstance(result, dict):
-                    df = pl.DataFrame([result])
-                elif isinstance(result, list):
-                    df = pl.DataFrame(result)
-                else:
-                    ## Case 3: scalar or callable reporter
-                    if hasattr(self._model, "agents"):
-                        records = []
-                        for agent in self._model.agents:
-                            agent_id = getattr(
-                                agent, "unique_id", getattr(agent, "id", None)
-                            )
-                            value = getattr(
-                                agent,
-                                col_name,
-                                result if not callable(result) else None,
-                            )
-                            records.append({"agent_id": agent_id, col_name: value})
-                        df = pl.DataFrame(records)
-                    else:
-                        df = pl.DataFrame([{col_name: result}])
-
-                ## Ensure agent_id exists
-                if "agent_id" not in df.columns:
-                    df = df.with_columns(pl.lit(None).alias("agent_id"))
-
-            ## Add meta columns
-            df = df.with_columns(
-                [
-                    pl.lit(current_model_step).alias("step"),
-                    pl.lit(str(self.seed)).alias("seed"),
-                    pl.lit(batch_id).alias("batch"),
-                ]
-            )
-            all_agent_frames.append(df)
-
-        if all_agent_frames:
-            merged_df = all_agent_frames[0]
-            for next_df in all_agent_frames[1:]:
-                if "agent_id" not in next_df.columns:
-                    continue
-                merged_df = merged_df.join(
-                    next_df, on=["agent_id", "step", "seed", "batch"], how="outer"
+            if "unique_id" not in available_cols:
+                logging.warning(
+                    f"AgentSet {agent_type} 'df' has no 'unique_id' column. Skipping."
                 )
+                continue
+
+            cols_to_select = [pl.col("unique_id")]
+
+            for final_name, source_col in reporter_map.items():
+                if source_col in available_cols:
+                    ## Add the column, aliasing it if the key is different
+                    cols_to_select.append(pl.col(source_col).alias(final_name))
+            
+            ## Only proceed if we have more than just unique_id
+            if len(cols_to_select) > 1:
+                set_frame = agent_df.select(cols_to_select)
+                ## Add the agent_type column
+                set_frame = set_frame.with_columns(
+                    pl.lit(agent_type).alias("agent_type")
+                )
+                all_agent_frames.append(set_frame)
 
-            agent_lazy_frame = merged_df.lazy()
-            self._frames.append(
-                ("agent", current_model_step, batch_id, agent_lazy_frame)
-            )
+        if not all_agent_frames:
+            return
+
+        ## Combine all agent set DataFrames into one
+        final_agent_frame = pl.concat(all_agent_frames, how="diagonal_relaxed")
+
+        ## Add metadata and append
+        final_agent_frame = final_agent_frame.with_columns(
+            [
+                pl.lit(current_model_step).alias("step"),
+                pl.lit(str(self.seed)).alias("seed"),
+                pl.lit(batch_id).alias("batch"),
+            ]
+        )
+        self._frames.append(("agent", current_model_step, batch_id, final_agent_frame))
 
     @property
     def data(self) -> dict[str, pl.DataFrame]:
@@ -534,13 +516,20 @@ def _validate_reporter_table_columns(
             If any expected columns are missing from the table.
         """
         expected_columns = set()
+        
+        ## Add columns required for the new long agent format
+        if table_name == "agent_data":
+            expected_columns.add("unique_id")
+            expected_columns.add("agent_type")
+
+        ## Add all keys from the reporter dict
         for col_name, required_column in reporter.items():
-            if isinstance(required_column, str):
-                for k, v in self._model.sets[required_column].items():
-                    expected_columns.add(
-                        (col_name + "_" + str(k.__class__.__name__)).lower()
-                    )
+            if table_name == "agent_data":
+                if isinstance(required_column, str):
+                    expected_columns.add(col_name.lower())
+                ## Callables are not supported for agents
             else:
+                ## For model, all reporters are callable
                 expected_columns.add(col_name.lower())
 
         query = f"""
@@ -560,6 +549,7 @@ def _validate_reporter_table_columns(
         required_columns = {
             "step": "Integer",
             "seed": "Varchar",
+            "batch": "Integer"
         }
 
         missing_required = {
@@ -606,4 +596,4 @@ def _execute_query_with_result(self, conn: connection, query: str) -> list[tuple
         """
         with conn.cursor() as cur:
             cur.execute(query)
-            return cur.fetchall()
+            return cur.fetchall()