kubeflow · gmfrasca · Nov 8, 2025 · Nov 13, 2025 · Nov 13, 2025 · Nov 14, 2025
diff --git a/.github/scripts/find-changed-components-and-pipelines.sh b/.github/scripts/find-changed-components-and-pipelines.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+# Extract unique component and pipeline directories from a list of changed files
+# Usage: ./find-changed-components-and-pipelines.sh <file1> <file2> ...
+# Output: Space-separated list of directories
+# Note: If the generator script is changed, returns all components and pipelines
+
+set -e
+
+components=""
+pipelines=""
+generator_changed=false
+
+# Helper function to find all component directories
+# Scans both components/ and third_party/components/
+find_all_components() {
+    for base_dir in "components" "third_party/components"; do
+        if [ -d "$base_dir" ]; then
+            for category_dir in "$base_dir"/*/; do
+                if [ -d "$category_dir" ]; then
+                    for comp_dir in "$category_dir"*/; do
+                        if [ -d "$comp_dir" ] && [ -f "${comp_dir}component.py" ]; then
+                            comp_path="${comp_dir%/}"
+                            components="$components $comp_path"
+                        fi
+                    done
+                fi
+            done
+        fi
+    done
+}
+
+# Helper function to find all pipeline directories
+# Scans both pipelines/ and third_party/pipelines/
+find_all_pipelines() {
+    for base_dir in "pipelines" "third_party/pipelines"; do
+        if [ -d "$base_dir" ]; then
+            for category_dir in "$base_dir"/*/; do
+                if [ -d "$category_dir" ]; then
+                    for pipe_dir in "$category_dir"*/; do
+                        if [ -d "$pipe_dir" ] && [ -f "${pipe_dir}pipeline.py" ]; then
+                            pipe_path="${pipe_dir%/}"
+                            pipelines="$pipelines $pipe_path"
+                        fi
+                    done
+                fi
+            done
+        fi
+    done
+}
+
+# Helper function to extract directory from file path and add to variable
+# Args: $1 = file path, $2 = pattern to match
+# Automatically detects third_party prefix, cut depth, and output variable
+extract_dir_from_file() {
+    local file=$1
+    local pattern=$2
+
+    if [[ "$file" == $pattern ]]; then
+        # Determine output variable based on pattern
+        local var_name
+        if [[ "$pattern" == *components* ]]; then
+            var_name="components"
+        elif [[ "$pattern" == *pipelines* ]]; then
+            var_name="pipelines"
+        else
+            return
+        fi
+
+        # Default to 3 fields, increment by 1 if third_party is in the pattern
+        local cut_fields=3
+        if [[ "$pattern" == third_party/* ]]; then
+            cut_fields=4
+        fi
+
+        local dir=$(echo "$file" | cut -d'/' -f1-"$cut_fields")
+        local current_value
+        eval "current_value=\$$var_name"
+        if [[ ! " $current_value " =~ " $dir " ]]; then
+            eval "$var_name=\"\${$var_name} \${dir}\""
+        fi
+    fi
+}
+
+# Check if the generator script itself was changed
+for file in "$@"; do
+    if [[ "$file" == scripts/generate_readme/* ]]; then
+        generator_changed=true
+        break
+    fi
+done
+
+# If generator changed, find all components and pipelines
+if [ "$generator_changed" = true ]; then
+    echo "Generator script changed, checking all components and pipelines" >&2
+
+    find_all_components
+    find_all_pipelines
+
+    # Trim whitespace and output
+    all_targets="$(echo "$components $pipelines" | xargs)"
+    echo "$all_targets"
+    exit 0
+fi
+
+# Normal operation: extract directories from changed files
+for file in "$@"; do
+    extract_dir_from_file "$file" "components/*/*/*"
+    extract_dir_from_file "$file" "third_party/components/*/*/*"
+    extract_dir_from_file "$file" "pipelines/*/*/*"
+    extract_dir_from_file "$file" "third_party/pipelines/*/*/*"
+done
+
+# Trim whitespace and output
+all_targets="$(echo "$components $pipelines" | xargs)"
+echo "$all_targets"
+
diff --git a/.github/scripts/test-readme-check.sh b/.github/scripts/test-readme-check.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# Test script to simulate README validation workflow locally
+# Usage: ./test-readme-check.sh [--ci] [component_dir|pipeline_dir]
+#
+# Options:
+#   --ci    Run in CI mode (non-interactive, always restore original README on failure)
+
+set -e
+
+# Parse --ci flag
+CI_MODE=false
+if [ "$1" == "--ci" ]; then
+    CI_MODE=true
+    shift
+fi
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+print_success() {
+    echo -e "${GREEN}✅ $1${NC}"
+}
+
+print_error() {
+    echo -e "${RED}❌ $1${NC}"
+}
+
+print_warning() {
+    echo -e "${YELLOW}⚠️  $1${NC}"
+}
+
+print_header() {
+    echo "=================================================="
+    echo "$1"
+    echo "=================================================="
+}
+
+# Check if target directory is provided
+if [ $# -eq 0 ]; then
+    echo "Usage: $0 [--ci] <component_dir|pipeline_dir>"
+    echo ""
+    echo "Options:"
+    echo "  --ci    Run in CI mode (non-interactive)"
+    echo ""
+    echo "Examples:"
+    echo "  $0 components/dev/hello_world"
+    echo "  $0 pipelines/training/my_pipeline"
+    echo "  $0 third_party/components/dev/external_component"
+    echo "  $0 --ci components/dev/hello_world  # CI mode"
+    exit 1
+fi
+
+TARGET_DIR=$1
+
+# Determine if it's a component or pipeline
+if [[ "$TARGET_DIR" == components/* ]] || [[ "$TARGET_DIR" == third_party/components/* ]]; then
+    TYPE_FLAG="--component"
+elif [[ "$TARGET_DIR" == pipelines/* ]] || [[ "$TARGET_DIR" == third_party/pipelines/* ]]; then
+    TYPE_FLAG="--pipeline"
+else
+    print_error "Invalid directory. Must be in components/, pipelines/, third_party/components/, or third_party/pipelines/"
+    exit 1
+fi
+
+print_header "Testing README validation for: $TARGET_DIR"
+
+# Backup existing README
+if [ -f "$TARGET_DIR/README.md" ]; then
+    print_warning "Backing up existing README..."
+    cp "$TARGET_DIR/README.md" "$TARGET_DIR/README.md.backup"
+else
+    print_warning "No existing README found"
+fi
+
+# Generate new README
+echo "Generating README..."
+uv run python -m scripts.generate_readme $TYPE_FLAG "$TARGET_DIR" --overwrite
+
+# Compare READMEs (ignore custom content section)
+if [ -f "$TARGET_DIR/README.md.backup" ]; then
+    echo "Comparing generated README with existing version..."
+
+    # Extract content before custom-content marker from both files
+    awk '/<!-- custom-content -->/{exit} 1' "$TARGET_DIR/README.md.backup" > /tmp/old_readme.txt
+    awk '/<!-- custom-content -->/{exit} 1' "$TARGET_DIR/README.md" > /tmp/new_readme.txt
+
+    if ! diff -u /tmp/old_readme.txt /tmp/new_readme.txt; then
+        print_error "README is out of sync for $TARGET_DIR"
+        echo ""
+        echo "The generated README differs from the committed version."
+        echo "Command to fix: uv run python -m scripts.generate_readme $TYPE_FLAG $TARGET_DIR --overwrite"
+        echo ""
+        echo "Diff saved to: /tmp/readme_diff.txt"
+        diff -u /tmp/old_readme.txt /tmp/new_readme.txt > /tmp/readme_diff.txt || true
+
+        # In CI mode, always restore original README; otherwise ask user
+        if [ "$CI_MODE" = true ]; then
+            mv "$TARGET_DIR/README.md.backup" "$TARGET_DIR/README.md"
+            print_warning "Restored original README (CI mode)"
+        else
+            # Ask if user wants to keep the new README
+            read -p "Keep the newly generated README? (y/N): " -n 1 -r
+            echo
+            if [[ ! $REPLY =~ ^[Yy]$ ]]; then
+                mv "$TARGET_DIR/README.md.backup" "$TARGET_DIR/README.md"
+                print_warning "Restored original README"
+            else
+                rm "$TARGET_DIR/README.md.backup"
+                print_success "Kept new README. Don't forget to commit it!"
+            fi
+        fi
+
+        exit 1
+    else
+        print_success "README is up-to-date for $TARGET_DIR"
+        # Restore backup to maintain custom content
+        mv "$TARGET_DIR/README.md.backup" "$TARGET_DIR/README.md"
+        exit 0
+    fi
+else
+    print_warning "No existing README to compare against"
+    print_success "A new README has been generated. Review and commit it."
+    exit 0
+fi
+
diff --git a/.github/workflows/readme-check.yml b/.github/workflows/readme-check.yml
@@ -0,0 +1,109 @@
+name: README Validation
+
+on:
+  pull_request:
+    paths:
+      - 'components/**'
+      - 'pipelines/**'
+      - 'third_party/components/**'
+      - 'third_party/pipelines/**'
+      - 'scripts/generate_readme/**'
+      - '.github/workflows/readme-check.yml'
+      - '.github/scripts/**'
+
+jobs:
+  check-readme-sync:
+    name: Check README files are up-to-date
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Get changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v45
+        with:
+          files: |
+            components/**
+            pipelines/**
+            third_party/components/**
+            third_party/pipelines/**
+            scripts/generate_readme/**
+            .github/scripts/**
+          files_ignore: |
+            **/*.md
+
+      - name: Check if only README files changed
+        id: check-skip
+        run: |
+          if [ "${{ steps.changed-files.outputs.any_changed }}" != "true" ]; then
+            echo "skip=true" >> $GITHUB_OUTPUT
+            echo "Only README files changed, skipping validation"
+          else
+            echo "skip=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Install uv
+        if: steps.check-skip.outputs.skip != 'true'
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
+      - name: Set up Python
+        if: steps.check-skip.outputs.skip != 'true'
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: pyproject.toml
+
+      - name: Install dependencies
+        if: steps.check-skip.outputs.skip != 'true'
+        run: uv sync --locked --all-extras --dev
+
+      - name: Extract component and pipeline directories
+        if: steps.check-skip.outputs.skip != 'true'
+        id: find-targets
+        run: |
+          echo "Changed files:"
+          echo "${{ steps.changed-files.outputs.all_changed_files }}"
+
+          # Extract unique component/pipeline directories from changed files
+          targets=$(./.github/scripts/find-changed-components-and-pipelines.sh ${{ steps.changed-files.outputs.all_changed_files }})
+
+          echo "targets=$targets" >> $GITHUB_OUTPUT
+          echo "Targets to check: $targets"
+
+      - name: Validate READMEs
+        if: steps.check-skip.outputs.skip != 'true' && steps.find-targets.outputs.targets != ''
+        run: |
+          set -e
+          failed_targets=""
+
+          for target_dir in ${{ steps.find-targets.outputs.targets }}; do
+            echo ""
+            # Run the validation script in CI mode
+            if ./.github/scripts/test-readme-check.sh --ci "$target_dir"; then
+              echo ""
+            else
+              failed_targets="$failed_targets $target_dir"
+            fi
+          done
+
+          if [ -n "$failed_targets" ]; then
+            echo "=================================================="
+            echo "❌ README validation failed for:"
+            for target in $failed_targets; do
+              echo "  - $target"
+            done
+            echo "=================================================="
+            exit 1
+          fi
+
+      - name: Summary
+        if: steps.check-skip.outputs.skip != 'true'
+        run: |
+          echo "=================================================="
+          echo "✅ All README files are up-to-date!"
+          echo "=================================================="
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,13 @@
+[project]
+name = "pipelines-components"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.9"
+dependencies = [
+    "docstring-parser>=0.17.0",
+    "jinja2>=3.1.6",
+    "kfp>=2.14.6",
+    "pytest>=8.4.2",
+    "pyyaml>=6.0.3",
+]
-dependencies = [
-    "docstring-parser>=0.17.0",
-    "jinja2>=3.1.6",
-    "kfp>=2.14.6",
-    "pytest>=8.4.2",
-    "pyyaml>=6.0.3",
-]
+dependencies = [
+    "kfp>=2.5.0",
+]
+
+[project.optional-dependencies]
+# CI
+ci = [
+    "pyyaml",
+    "pytest",
+    "docstring-parser",
+    "jinja2",
+]
+# Local development (extends ci with additional tools)
+dev = [
+    "kfp-components[ci]",
+    # Add linting/formatting tools here as needed:
+]
-dependencies = [
-    "docstring-parser>=0.17.0",
-    "jinja2>=3.1.6",
-    "kfp>=2.14.6",
-    "pytest>=8.4.2",
-    "pyyaml>=6.0.3",
-]
+dependencies = [
+    "kfp>=2.5.0",
+]
+
+[project.optional-dependencies]
+# CI
+ci = [
+    "pyyaml",
+    "pytest",
+    "docstring-parser",
+    "jinja2",
+]
+# Local development (extends ci with additional tools)
+dev = [
+    "kfp-components[ci]",
+    # Add linting/formatting tools here as needed:
+]