Merge pull request #133 from kibitzing/update_tutorial

init27 · web-flow · commit db31b64a4364 · 2025-11-02T11:30:21.000-08:00
Update OpenEnv tutorial
diff --git a/examples/OpenEnv_Tutorial.ipynb b/examples/OpenEnv_Tutorial.ipynb
@@ -691,9 +691,14 @@
     "\n",
     "```\n",
     "⬜ ⬜ 🔴 ⬜ ⬜\n",
+    "⬜ ⬜ ⬜ ⬜ ⬜\n",
     "⬜ ⬜ ⬜ ⬜ ⬜   Ball\n",
+    "⬜ ⬜ ⬜ ⬜ ⬜\n",
     "⬜ ⬜ ⬜ ⬜ ⬜   falls\n",
+    "⬜ ⬜ ⬜ ⬜ ⬜\n",
     "⬜ ⬜ ⬜ ⬜ ⬜   down\n",
+    "⬜ ⬜ ⬜ ⬜ ⬜\n",
+    "⬜ ⬜ ⬜ ⬜ ⬜\n",
     "⬜ ⬜ 🏓 ⬜ ⬜\n",
     "     Paddle\n",
     "```\n",
@@ -702,7 +707,7 @@
     "<td width=\"60%\">\n",
     "\n",
     "**Rules:**\n",
-    "- 5×5 grid\n",
+    "- 10×5 grid\n",
     "- Ball falls from random column\n",
     "- Move paddle left/right to catch it\n",
     "\n",
@@ -817,8 +822,8 @@
     "         \"OPENSPIEL_GAME\": \"catch\",\n",
     "         \"OPENSPIEL_AGENT_PLAYER\": \"0\",\n",
     "         \"OPENSPIEL_OPPONENT_POLICY\": \"random\"},\n",
-    "    stdout=subprocess.PIPE,\n",
-    "    stderr=subprocess.PIPE,\n",
+    "    stdout=subprocess.DEVNULL,\n",
+    "    stderr=subprocess.DEVNULL,\n",
     "    text=True,\n",
     "    cwd=work_dir\n",
     ")\n",
@@ -895,6 +900,7 @@
     "\n",
     "print(\"📥 Received OpenSpielObservation:\")\n",
     "print(f\"   • info_state: {result.observation.info_state[:10]}... (first 10 values)\")\n",
+    "print(f\"   • number of info_state: {len(result.observation.info_state)}\")\n",
     "print(f\"   • legal_actions: {result.observation.legal_actions}\")\n",
     "print(f\"   • game_phase: {result.observation.game_phase}\")\n",
     "print(f\"   • done: {result.done}\")\n",
@@ -1006,23 +1012,25 @@
     "\n",
     "    def select_action(self, obs: OpenSpielObservation) -> int:\n",
     "        # Parse OpenSpiel observation\n",
-    "        # For Catch: info_state is a flattened 5x5 grid\n",
+    "        # For Catch: info_state is a flattened 10x5 grid\n",
     "        # Ball position and paddle position encoded in the vector\n",
     "        info_state = obs.info_state\n",
     "\n",
     "        # Find ball and paddle positions from info_state\n",
-    "        # Catch uses a 5x5 grid, so 25 values\n",
+    "        # Catch uses a 10x5 grid, so 50 values\n",
     "        grid_size = 5\n",
     "\n",
-    "        # Find positions (ball = 1.0, paddle = 0.5 in the flattened grid)\n",
+    "        # Find positions (ball = 1.0 in the flattened grid, paddle = 1.0 in the last row of the flattened grid)\n",
     "        ball_col = None\n",
     "        paddle_col = None\n",
     "\n",
     "        for idx, val in enumerate(info_state):\n",
     "            if abs(val - 1.0) < 0.01:  # Ball\n",
     "                ball_col = idx % grid_size\n",
-    "            elif abs(val - 0.5) < 0.01:  # Paddle\n",
-    "                paddle_col = idx % grid_size\n",
+    "                break\n",
+    "\n",
+    "        last_row = info_state[-grid_size:]\n",
+    "        paddle_col = last_row.index(1.0) # Paddle\n",
     "\n",
     "        if ball_col is not None and paddle_col is not None:\n",
     "            if paddle_col < ball_col:\n",