Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
7bd8c29
feat(scripts): Init README Generator Script
gmfrasca Nov 8, 2025
400a146
feat(scripts): Init generate_readme.py script
gmfrasca Nov 13, 2025
6278177
feat(scripts): Add ComponentMetadataParser
gmfrasca Nov 13, 2025
582a35c
feat(scripts): Parse component metadata for generate_readme.md
gmfrasca Nov 14, 2025
e33482c
feat(scripts): Improve generate_readme.py logging
gmfrasca Nov 14, 2025
0ab94a5
feat(scripts): Reorganize main readme_generator code
gmfrasca Nov 14, 2025
e0b7419
feat(scripts): Add ContentGenerator to generate_readme.py
gmfrasca Nov 14, 2025
b1abd94
feat(scripts): Include component metadata in generated READMEs
gmfrasca Nov 14, 2025
4b4dd2a
feat(scripts): Make generated READMEs more concise
gmfrasca Nov 14, 2025
240bf33
feat(scripts): Add PipelineMetadataParser to readme generator
gmfrasca Nov 14, 2025
6398c29
feat(scripts): Preserve Custom Content in readme generation
gmfrasca Nov 14, 2025
505bff6
feat(scripts): Modularize README Generator script
gmfrasca Nov 14, 2025
805a016
test: Add Readme Generator tests
gmfrasca Nov 15, 2025
70c1faf
test: Add README diff check to CI
gmfrasca Nov 15, 2025
548c4be
chore: Add uv files
gmfrasca Nov 15, 2025
7f5100e
chore: Update README generator to use jinja templating
gmfrasca Nov 17, 2025
bf083be
chore: Make generated metadata human-readable
gmfrasca Nov 17, 2025
49442d5
chore: Rename ReadmeGenerator to ReadmeWriter for clairty
gmfrasca Nov 18, 2025
610d60f
chore: Simplify docstring parsing for readme generator
gmfrasca Nov 18, 2025
bbf7ea8
chore: Add Example Usage if example_pipeline.py file exists
gmfrasca Nov 18, 2025
d8c0664
chore: Include OWNERS info in generated readme
gmfrasca Nov 18, 2025
a821872
chore: Move links into Additonal Resources section of generated readmes
gmfrasca Nov 19, 2025
fdc9901
chore: Source component/pipeline name from decorator params
gmfrasca Nov 20, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions .github/scripts/find-changed-components-and-pipelines.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/bin/bash
# Extract unique component and pipeline directories from a list of changed files
# Usage: ./find-changed-components-and-pipelines.sh <file1> <file2> ...
# Output: Space-separated list of directories
# Note: If the generator script is changed, returns all components and pipelines

set -e

components=""
pipelines=""
generator_changed=false

# Helper function to find all component directories
# Scans both components/ and third_party/components/
find_all_components() {
for base_dir in "components" "third_party/components"; do
if [ -d "$base_dir" ]; then
for category_dir in "$base_dir"/*/; do
if [ -d "$category_dir" ]; then
for comp_dir in "$category_dir"*/; do
if [ -d "$comp_dir" ] && [ -f "${comp_dir}component.py" ]; then
comp_path="${comp_dir%/}"
components="$components $comp_path"
fi
done
fi
done
fi
done
}

# Helper function to find all pipeline directories
# Scans both pipelines/ and third_party/pipelines/
find_all_pipelines() {
for base_dir in "pipelines" "third_party/pipelines"; do
if [ -d "$base_dir" ]; then
for category_dir in "$base_dir"/*/; do
if [ -d "$category_dir" ]; then
for pipe_dir in "$category_dir"*/; do
if [ -d "$pipe_dir" ] && [ -f "${pipe_dir}pipeline.py" ]; then
pipe_path="${pipe_dir%/}"
pipelines="$pipelines $pipe_path"
fi
done
fi
done
fi
done
}

# Helper function to extract directory from file path and add to variable
# Args: $1 = file path, $2 = pattern to match
# Automatically detects third_party prefix, cut depth, and output variable
extract_dir_from_file() {
local file=$1
local pattern=$2

if [[ "$file" == $pattern ]]; then
# Determine output variable based on pattern
local var_name
if [[ "$pattern" == *components* ]]; then
var_name="components"
elif [[ "$pattern" == *pipelines* ]]; then
var_name="pipelines"
else
return
fi

# Default to 3 fields, increment by 1 if third_party is in the pattern
local cut_fields=3
if [[ "$pattern" == third_party/* ]]; then
cut_fields=4
fi

local dir=$(echo "$file" | cut -d'/' -f1-"$cut_fields")
local current_value
eval "current_value=\$$var_name"
if [[ ! " $current_value " =~ " $dir " ]]; then
eval "$var_name=\"\${$var_name} \${dir}\""
Comment on lines +77 to +79
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using eval here can be dangerous. It allows attackers to inject code to seal secrets.
You can use arrays, which store strings literally without execution, instead of eval.

components=()
pipelines=()

# Instead of eval, use:
components+=("$dir")

fi
fi
}

# Check if the generator script itself was changed
for file in "$@"; do
if [[ "$file" == scripts/generate_readme/* ]]; then
generator_changed=true
break
fi
done

# If generator changed, find all components and pipelines
if [ "$generator_changed" = true ]; then
echo "Generator script changed, checking all components and pipelines" >&2

find_all_components
find_all_pipelines

# Trim whitespace and output
all_targets="$(echo "$components $pipelines" | xargs)"
echo "$all_targets"
exit 0
fi

# Normal operation: extract directories from changed files
for file in "$@"; do
extract_dir_from_file "$file" "components/*/*/*"
extract_dir_from_file "$file" "third_party/components/*/*/*"
extract_dir_from_file "$file" "pipelines/*/*/*"
extract_dir_from_file "$file" "third_party/pipelines/*/*/*"
done

# Trim whitespace and output
all_targets="$(echo "$components $pipelines" | xargs)"
echo "$all_targets"

128 changes: 128 additions & 0 deletions .github/scripts/test-readme-check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/bin/bash
# Test script to simulate README validation workflow locally
# Usage: ./test-readme-check.sh [--ci] [component_dir|pipeline_dir]
#
# Options:
# --ci Run in CI mode (non-interactive, always restore original README on failure)

set -e

# Parse --ci flag
CI_MODE=false
if [ "$1" == "--ci" ]; then
CI_MODE=true
shift
fi

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

print_success() {
echo -e "${GREEN}✅ $1${NC}"
}

print_error() {
echo -e "${RED}❌ $1${NC}"
}

print_warning() {
echo -e "${YELLOW}⚠️ $1${NC}"
}

print_header() {
echo "=================================================="
echo "$1"
echo "=================================================="
}

# Check if target directory is provided
if [ $# -eq 0 ]; then
echo "Usage: $0 [--ci] <component_dir|pipeline_dir>"
echo ""
echo "Options:"
echo " --ci Run in CI mode (non-interactive)"
echo ""
echo "Examples:"
echo " $0 components/dev/hello_world"
echo " $0 pipelines/training/my_pipeline"
echo " $0 third_party/components/dev/external_component"
echo " $0 --ci components/dev/hello_world # CI mode"
exit 1
fi

TARGET_DIR=$1

# Determine if it's a component or pipeline
if [[ "$TARGET_DIR" == components/* ]] || [[ "$TARGET_DIR" == third_party/components/* ]]; then
TYPE_FLAG="--component"
elif [[ "$TARGET_DIR" == pipelines/* ]] || [[ "$TARGET_DIR" == third_party/pipelines/* ]]; then
TYPE_FLAG="--pipeline"
else
print_error "Invalid directory. Must be in components/, pipelines/, third_party/components/, or third_party/pipelines/"
exit 1
fi

print_header "Testing README validation for: $TARGET_DIR"

# Backup existing README
if [ -f "$TARGET_DIR/README.md" ]; then
print_warning "Backing up existing README..."
cp "$TARGET_DIR/README.md" "$TARGET_DIR/README.md.backup"
else
print_warning "No existing README found"
fi

# Generate new README
echo "Generating README..."
uv run python -m scripts.generate_readme $TYPE_FLAG "$TARGET_DIR" --overwrite

# Compare READMEs (ignore custom content section)
if [ -f "$TARGET_DIR/README.md.backup" ]; then
echo "Comparing generated README with existing version..."

# Extract content before custom-content marker from both files
awk '/<!-- custom-content -->/{exit} 1' "$TARGET_DIR/README.md.backup" > /tmp/old_readme.txt
awk '/<!-- custom-content -->/{exit} 1' "$TARGET_DIR/README.md" > /tmp/new_readme.txt

if ! diff -u /tmp/old_readme.txt /tmp/new_readme.txt; then
print_error "README is out of sync for $TARGET_DIR"
echo ""
echo "The generated README differs from the committed version."
echo "Command to fix: uv run python -m scripts.generate_readme $TYPE_FLAG $TARGET_DIR --overwrite"
echo ""
echo "Diff saved to: /tmp/readme_diff.txt"
diff -u /tmp/old_readme.txt /tmp/new_readme.txt > /tmp/readme_diff.txt || true

# In CI mode, always restore original README; otherwise ask user
if [ "$CI_MODE" = true ]; then
mv "$TARGET_DIR/README.md.backup" "$TARGET_DIR/README.md"
print_warning "Restored original README (CI mode)"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gmfrasca From the KEP, my understanding is that we should restore the original README and fail the job, prompting contributors to update or regenerate the README file. Could you update to fail the job in this case and update the message.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The script actually does this - CI_MODE is just to ignore the prompt to overwrite so that CI Checks don't perpetually wait for user input. The script then fails if it hits this if clause on line 116 (exit 1), so i the behavior matches what is specified

else
# Ask if user wants to keep the new README
read -p "Keep the newly generated README? (y/N): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
mv "$TARGET_DIR/README.md.backup" "$TARGET_DIR/README.md"
print_warning "Restored original README"
else
rm "$TARGET_DIR/README.md.backup"
print_success "Kept new README. Don't forget to commit it!"
fi
fi

exit 1
else
print_success "README is up-to-date for $TARGET_DIR"
# Restore backup to maintain custom content
mv "$TARGET_DIR/README.md.backup" "$TARGET_DIR/README.md"
exit 0
fi
else
print_warning "No existing README to compare against"
print_success "A new README has been generated. Review and commit it."
exit 0
fi

109 changes: 109 additions & 0 deletions .github/workflows/readme-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
name: README Validation

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@HumairAK suggested this on my metadata validation PR and I thought it was a good idea - can you add a GH workflows file to run the generate_readme unit tests? Maybe all scripts directory unit tests can be run with a single workflow


on:
pull_request:
paths:
- 'components/**'
- 'pipelines/**'
- 'third_party/components/**'
- 'third_party/pipelines/**'
- 'scripts/generate_readme/**'
- '.github/workflows/readme-check.yml'
- '.github/scripts/**'

jobs:
check-readme-sync:
name: Check README files are up-to-date
runs-on: ubuntu-latest
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I created the first workflow, I pinned the runner version to ubuntu-24.04 to avoid unpredictable behavior due to new versions.
I actually don't have a strong preference on pinning or not, but I think we should be consistent on all workflows.


steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v45
with:
files: |
components/**
pipelines/**
third_party/components/**
third_party/pipelines/**
scripts/generate_readme/**
.github/scripts/**
files_ignore: |
**/*.md

- name: Check if only README files changed
id: check-skip
run: |
if [ "${{ steps.changed-files.outputs.any_changed }}" != "true" ]; then
echo "skip=true" >> $GITHUB_OUTPUT
echo "Only README files changed, skipping validation"
else
echo "skip=false" >> $GITHUB_OUTPUT
fi
Comment on lines +39 to +47
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This workflow should run even when only a README file changes, otherwise we may end up with a README manually edited.


- name: Install uv
if: steps.check-skip.outputs.skip != 'true'
uses: astral-sh/setup-uv@v5
with:
enable-cache: true

- name: Set up Python
if: steps.check-skip.outputs.skip != 'true'
uses: actions/setup-python@v5
with:
python-version-file: pyproject.toml

- name: Install dependencies
if: steps.check-skip.outputs.skip != 'true'
run: uv sync --locked --all-extras --dev

- name: Extract component and pipeline directories
if: steps.check-skip.outputs.skip != 'true'
id: find-targets
run: |
echo "Changed files:"
echo "${{ steps.changed-files.outputs.all_changed_files }}"

# Extract unique component/pipeline directories from changed files
targets=$(./.github/scripts/find-changed-components-and-pipelines.sh ${{ steps.changed-files.outputs.all_changed_files }})

echo "targets=$targets" >> $GITHUB_OUTPUT
echo "Targets to check: $targets"

- name: Validate READMEs
if: steps.check-skip.outputs.skip != 'true' && steps.find-targets.outputs.targets != ''
run: |
set -e
failed_targets=""

for target_dir in ${{ steps.find-targets.outputs.targets }}; do
echo ""
# Run the validation script in CI mode
if ./.github/scripts/test-readme-check.sh --ci "$target_dir"; then
echo ""
else
failed_targets="$failed_targets $target_dir"
fi
done

if [ -n "$failed_targets" ]; then
echo "=================================================="
echo "❌ README validation failed for:"
for target in $failed_targets; do
echo " - $target"
done
echo "=================================================="
exit 1
fi
Comment on lines +81 to +102
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about making all of this part of ./.github/scripts/test-readme-check.sh and pass ${{ steps.find-targets.outputs.targets }} as argument to that script?


- name: Summary
if: steps.check-skip.outputs.skip != 'true'
run: |
echo "=================================================="
echo "✅ All README files are up-to-date!"
echo "=================================================="
Comment on lines +104 to +109
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This step is redundant and can be misleading. It prints All README files are up-to-date! even when targets was empty and no validation actually ran. Consider moving the success message to the end of the Validate READMEs step instead, so it only logs when validation actually passed.

13 changes: 13 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[project]
name = "pipelines-components"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.9"
dependencies = [
"docstring-parser>=0.17.0",
"jinja2>=3.1.6",
"kfp>=2.14.6",
"pytest>=8.4.2",
"pyyaml>=6.0.3",
]
Comment on lines +7 to +13
Copy link
Contributor

@hbelmiro hbelmiro Nov 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • The KEP states kfp>=2.5.0
  • Do we really need those specific versions for the other dependencies?
  • We can use dependency groups to separate CI and dev dependencies from the rest.
Suggested change
dependencies = [
"docstring-parser>=0.17.0",
"jinja2>=3.1.6",
"kfp>=2.14.6",
"pytest>=8.4.2",
"pyyaml>=6.0.3",
]
dependencies = [
"kfp>=2.5.0",
]
[project.optional-dependencies]
# CI
ci = [
"pyyaml",
"pytest",
"docstring-parser",
"jinja2",
]
# Local development (extends ci with additional tools)
dev = [
"kfp-components[ci]",
# Add linting/formatting tools here as needed:
]

Loading