Skip to content

Commit fc5ef85

Browse files
Merge pull request #2371 from AI-Hypercomputer:wstcliyu/pw-unit
PiperOrigin-RevId: 827685629
2 parents 83b3519 + d9cbcb2 commit fc5ef85

File tree

3 files changed

+138
-3
lines changed

3 files changed

+138
-3
lines changed

.github/workflows/RunTests.yml

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,19 @@ jobs:
9595
container_resource_option: "--privileged"
9696
is_scheduled_run: ${{ github.event_name == 'schedule' }}
9797

98+
tpu_pathways_unit_tests:
99+
needs: tpu_image
100+
uses: ./.github/workflows/run_pathways_tests_internal.yml
101+
with:
102+
device_type: tpu
103+
device_name: v4-8
104+
cloud_runner: linux-x86-ct4p-240-4tpu
105+
pytest_marker: 'not cpu_only and not gpu_only and not integration_test'
106+
xla_python_client_mem_fraction: 0.75
107+
tf_force_gpu_allow_growth: false
108+
container_resource_option: "--privileged"
109+
is_scheduled_run: ${{ github.event_name == 'schedule' }}
110+
98111
tpu_integration_tests:
99112
needs: tpu_image
100113
uses: ./.github/workflows/run_tests_internal.yml
@@ -138,7 +151,7 @@ jobs:
138151

139152
clean_up:
140153
if: ${{ always() }}
141-
needs: [cpu_unit_tests, gpu_unit_tests, gpu_integration_tests, tpu_unit_tests, tpu_integration_tests]
154+
needs: [cpu_unit_tests, gpu_unit_tests, gpu_integration_tests, tpu_unit_tests, tpu_integration_tests, tpu_pathways_unit_tests]
142155
name: "Clean up"
143156
runs-on: ["self-hosted"]
144157
permissions:
@@ -157,7 +170,7 @@ jobs:
157170

158171
notify_failure:
159172
name: Notify failed build # creates an issue or modifies last open existing issue for failed build
160-
needs: [cpu_unit_tests, gpu_unit_tests, gpu_integration_tests, tpu_unit_tests, tpu_integration_tests]
173+
needs: [cpu_unit_tests, gpu_unit_tests, gpu_integration_tests, tpu_unit_tests, tpu_integration_tests, tpu_pathways_unit_tests]
161174
if: ${{ always() }}
162175
runs-on: ubuntu-latest
163176
permissions:
@@ -189,7 +202,7 @@ jobs:
189202
name: Close issue after 3 successful builds
190203
# This job runs only if all the preceding test jobs succeeded
191204
if: ${{ success() && github.event.pull_request == null && github.event_name != 'workflow_dispatch' }}
192-
needs: [cpu_unit_tests, gpu_unit_tests, gpu_integration_tests, tpu_unit_tests, tpu_integration_tests]
205+
needs: [cpu_unit_tests, gpu_unit_tests, gpu_integration_tests, tpu_unit_tests, tpu_integration_tests, tpu_pathways_unit_tests]
193206
runs-on: ubuntu-latest
194207
permissions:
195208
issues: write
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# This file runs unit tests with Pathways backend.
16+
17+
name: Run Pathways Tests
18+
19+
on:
20+
workflow_call:
21+
inputs:
22+
device_type:
23+
required: true
24+
type: string
25+
device_name:
26+
required: true
27+
type: string
28+
image_type:
29+
required: false
30+
type: string
31+
pytest_marker:
32+
required: true
33+
type: string
34+
pytest_addopts:
35+
required: false
36+
type: string
37+
default: ''
38+
is_scheduled_run:
39+
required: true
40+
type: string
41+
xla_python_client_mem_fraction:
42+
required: true
43+
type: string
44+
tf_force_gpu_allow_growth:
45+
required: true
46+
type: string
47+
container_resource_option:
48+
required: true
49+
type: string
50+
cloud_runner:
51+
required: false
52+
type: string
53+
54+
jobs:
55+
run:
56+
runs-on: ${{ inputs.cloud_runner != '' && inputs.cloud_runner || fromJson(format('["self-hosted", "{0}", "{1}"]', inputs.device_type, inputs.device_name)) }}
57+
container:
58+
image: gcr.io/tpu-prod-env-multipod/maxtext_${{ github.run_id }}:${{ inputs.image_type != '' && inputs.image_type || inputs.device_type }}
59+
volumes:
60+
- ${{ github.workspace }}:/tmp
61+
env:
62+
XLA_PYTHON_CLIENT_MEM_FRACTION: ${{ inputs.xla_python_client_mem_fraction }}
63+
TF_FORCE_GPU_ALLOW_GROWTH: ${{ inputs.tf_force_gpu_allow_growth }}
64+
IFRT_PROXY_USE_INSECURE_GRPC_CREDENTIALS: true
65+
JAX_PLATFORMS: "proxy"
66+
JAX_BACKEND_TARGET: "grpc://localhost:29000"
67+
options: ${{ inputs.container_resource_option }}
68+
steps:
69+
- uses: actions/checkout@v4
70+
- name: Run
71+
run: |
72+
if [ "${{ inputs.is_scheduled_run }}" = "true" ]; then
73+
FINAL_PYTEST_MARKER="${{ inputs.pytest_marker }}"
74+
else
75+
FINAL_PYTEST_MARKER="${{ inputs.pytest_marker }} and not scheduled_only"
76+
fi
77+
python3 -m pip install -e . --no-dependencies &&
78+
python3 -m pip uninstall -y libtpu &&
79+
# TODO(b/454659463): Enable test_default_hlo_match after volume mount is supported.
80+
python3 -m pytest ${{ inputs.pytest_addopts }} -v -m "${FINAL_PYTEST_MARKER}" -k "not test_default_hlo_match" --durations=0
81+
82+
services:
83+
resource_manager:
84+
image: us-docker.pkg.dev/cloud-tpu-v2-images/pathways/server:latest
85+
ports:
86+
- "29001:29001"
87+
- "29002:29002"
88+
options:
89+
--entrypoint=[/usr/pathways/run/cloud_pathways_server_sanitized, --server_port=29001, --node_type=resource_manager, --instance_count=1, --instance_type=tpuv4:2x2x1, --gcs_scratch_location=gs://cloud-pathways-staging/tmp]
90+
env:
91+
TPU_SKIP_MDS_QUERY: true
92+
93+
worker:
94+
image: us-docker.pkg.dev/cloud-tpu-v2-images/pathways/server:latest
95+
ports:
96+
- "29005:29005"
97+
- "29006:29006"
98+
- "8471:8471"
99+
- "8080:8080"
100+
options:
101+
--entrypoint=[/usr/pathways/run/cloud_pathways_server_sanitized, --server_port=29005, --resource_manager_address=localhost:29001, --gcs_scratch_location=gs://cloud-pathways-staging/tmp]
102+
--tpu=4
103+
104+
proxy:
105+
image: us-docker.pkg.dev/cloud-tpu-v2-images/pathways/proxy_server:latest
106+
volumes:
107+
- ${{ github.workspace }}:/tmp
108+
ports:
109+
- "29000:29000"
110+
env:
111+
IFRT_PROXY_USE_INSECURE_GRPC_CREDENTIALS: true
112+
XLA_FLAGS: "--xla_dump_to=/tmp/aot_test_dump --xla_dump_hlo_as_text --xla_dump_hlo_module_re=jit_train_step"
113+
options:
114+
--entrypoint=[/usr/pathways/run/cloud_proxy_server_sanitized, --server_port=29000, --resource_manager_address=localhost:29001, --gcs_scratch_location=gs://cloud-pathways-staging/tmp, --xla_tpu_scoped_vmem_limit_kib=65536, --xla_tpu_spmd_rng_bit_generator_unsafe=true]

tests/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,11 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
15+
"""
16+
Test initialization
17+
"""
18+
19+
import pathwaysutils
20+
21+
pathwaysutils.initialize()

0 commit comments

Comments
 (0)