-
Notifications
You must be signed in to change notification settings - Fork 7
feat(scripts): Add README Generator script and accompanying README Sync Check #7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
7bd8c29
400a146
6278177
582a35c
e33482c
0ab94a5
e0b7419
b1abd94
4b4dd2a
240bf33
6398c29
505bff6
805a016
70c1faf
548c4be
7f5100e
bf083be
49442d5
610d60f
bbf7ea8
d8c0664
a821872
fdc9901
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,116 @@ | ||
| #!/bin/bash | ||
| # Extract unique component and pipeline directories from a list of changed files | ||
| # Usage: ./find-changed-components-and-pipelines.sh <file1> <file2> ... | ||
| # Output: Space-separated list of directories | ||
| # Note: If the generator script is changed, returns all components and pipelines | ||
|
|
||
| set -e | ||
|
|
||
| components="" | ||
| pipelines="" | ||
| generator_changed=false | ||
|
|
||
| # Helper function to find all component directories | ||
| # Scans both components/ and third_party/components/ | ||
| find_all_components() { | ||
| for base_dir in "components" "third_party/components"; do | ||
| if [ -d "$base_dir" ]; then | ||
| for category_dir in "$base_dir"/*/; do | ||
| if [ -d "$category_dir" ]; then | ||
| for comp_dir in "$category_dir"*/; do | ||
| if [ -d "$comp_dir" ] && [ -f "${comp_dir}component.py" ]; then | ||
| comp_path="${comp_dir%/}" | ||
| components="$components $comp_path" | ||
| fi | ||
| done | ||
| fi | ||
| done | ||
| fi | ||
| done | ||
| } | ||
|
|
||
| # Helper function to find all pipeline directories | ||
| # Scans both pipelines/ and third_party/pipelines/ | ||
| find_all_pipelines() { | ||
| for base_dir in "pipelines" "third_party/pipelines"; do | ||
| if [ -d "$base_dir" ]; then | ||
| for category_dir in "$base_dir"/*/; do | ||
| if [ -d "$category_dir" ]; then | ||
| for pipe_dir in "$category_dir"*/; do | ||
| if [ -d "$pipe_dir" ] && [ -f "${pipe_dir}pipeline.py" ]; then | ||
| pipe_path="${pipe_dir%/}" | ||
| pipelines="$pipelines $pipe_path" | ||
| fi | ||
| done | ||
| fi | ||
| done | ||
| fi | ||
| done | ||
| } | ||
|
|
||
| # Helper function to extract directory from file path and add to variable | ||
| # Args: $1 = file path, $2 = pattern to match | ||
| # Automatically detects third_party prefix, cut depth, and output variable | ||
| extract_dir_from_file() { | ||
| local file=$1 | ||
| local pattern=$2 | ||
|
|
||
| if [[ "$file" == $pattern ]]; then | ||
| # Determine output variable based on pattern | ||
| local var_name | ||
| if [[ "$pattern" == *components* ]]; then | ||
| var_name="components" | ||
| elif [[ "$pattern" == *pipelines* ]]; then | ||
| var_name="pipelines" | ||
| else | ||
| return | ||
| fi | ||
|
|
||
| # Default to 3 fields, increment by 1 if third_party is in the pattern | ||
| local cut_fields=3 | ||
| if [[ "$pattern" == third_party/* ]]; then | ||
| cut_fields=4 | ||
| fi | ||
|
|
||
| local dir=$(echo "$file" | cut -d'/' -f1-"$cut_fields") | ||
| local current_value | ||
| eval "current_value=\$$var_name" | ||
| if [[ ! " $current_value " =~ " $dir " ]]; then | ||
| eval "$var_name=\"\${$var_name} \${dir}\"" | ||
| fi | ||
| fi | ||
| } | ||
|
|
||
| # Check if the generator script itself was changed | ||
| for file in "$@"; do | ||
| if [[ "$file" == scripts/generate_readme/* ]]; then | ||
| generator_changed=true | ||
| break | ||
| fi | ||
| done | ||
|
|
||
| # If generator changed, find all components and pipelines | ||
| if [ "$generator_changed" = true ]; then | ||
| echo "Generator script changed, checking all components and pipelines" >&2 | ||
|
|
||
| find_all_components | ||
| find_all_pipelines | ||
|
|
||
| # Trim whitespace and output | ||
| all_targets="$(echo "$components $pipelines" | xargs)" | ||
| echo "$all_targets" | ||
| exit 0 | ||
| fi | ||
|
|
||
| # Normal operation: extract directories from changed files | ||
| for file in "$@"; do | ||
| extract_dir_from_file "$file" "components/*/*/*" | ||
| extract_dir_from_file "$file" "third_party/components/*/*/*" | ||
| extract_dir_from_file "$file" "pipelines/*/*/*" | ||
| extract_dir_from_file "$file" "third_party/pipelines/*/*/*" | ||
| done | ||
|
|
||
| # Trim whitespace and output | ||
| all_targets="$(echo "$components $pipelines" | xargs)" | ||
| echo "$all_targets" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,128 @@ | ||
| #!/bin/bash | ||
| # Test script to simulate README validation workflow locally | ||
| # Usage: ./test-readme-check.sh [--ci] [component_dir|pipeline_dir] | ||
| # | ||
| # Options: | ||
| # --ci Run in CI mode (non-interactive, always restore original README on failure) | ||
|
|
||
| set -e | ||
|
|
||
| # Parse --ci flag | ||
| CI_MODE=false | ||
| if [ "$1" == "--ci" ]; then | ||
| CI_MODE=true | ||
| shift | ||
| fi | ||
|
|
||
| # Colors for output | ||
| RED='\033[0;31m' | ||
| GREEN='\033[0;32m' | ||
| YELLOW='\033[1;33m' | ||
| NC='\033[0m' # No Color | ||
|
|
||
| print_success() { | ||
| echo -e "${GREEN}✅ $1${NC}" | ||
| } | ||
|
|
||
| print_error() { | ||
| echo -e "${RED}❌ $1${NC}" | ||
| } | ||
|
|
||
| print_warning() { | ||
| echo -e "${YELLOW}⚠️ $1${NC}" | ||
| } | ||
|
|
||
| print_header() { | ||
| echo "==================================================" | ||
| echo "$1" | ||
| echo "==================================================" | ||
| } | ||
|
|
||
| # Check if target directory is provided | ||
| if [ $# -eq 0 ]; then | ||
| echo "Usage: $0 [--ci] <component_dir|pipeline_dir>" | ||
| echo "" | ||
| echo "Options:" | ||
| echo " --ci Run in CI mode (non-interactive)" | ||
| echo "" | ||
| echo "Examples:" | ||
| echo " $0 components/dev/hello_world" | ||
| echo " $0 pipelines/training/my_pipeline" | ||
| echo " $0 third_party/components/dev/external_component" | ||
| echo " $0 --ci components/dev/hello_world # CI mode" | ||
| exit 1 | ||
| fi | ||
|
|
||
| TARGET_DIR=$1 | ||
|
|
||
| # Determine if it's a component or pipeline | ||
| if [[ "$TARGET_DIR" == components/* ]] || [[ "$TARGET_DIR" == third_party/components/* ]]; then | ||
| TYPE_FLAG="--component" | ||
| elif [[ "$TARGET_DIR" == pipelines/* ]] || [[ "$TARGET_DIR" == third_party/pipelines/* ]]; then | ||
| TYPE_FLAG="--pipeline" | ||
| else | ||
| print_error "Invalid directory. Must be in components/, pipelines/, third_party/components/, or third_party/pipelines/" | ||
| exit 1 | ||
| fi | ||
|
|
||
| print_header "Testing README validation for: $TARGET_DIR" | ||
|
|
||
| # Backup existing README | ||
| if [ -f "$TARGET_DIR/README.md" ]; then | ||
| print_warning "Backing up existing README..." | ||
| cp "$TARGET_DIR/README.md" "$TARGET_DIR/README.md.backup" | ||
| else | ||
| print_warning "No existing README found" | ||
| fi | ||
|
|
||
| # Generate new README | ||
| echo "Generating README..." | ||
| uv run python -m scripts.generate_readme $TYPE_FLAG "$TARGET_DIR" --overwrite | ||
|
|
||
| # Compare READMEs (ignore custom content section) | ||
| if [ -f "$TARGET_DIR/README.md.backup" ]; then | ||
| echo "Comparing generated README with existing version..." | ||
|
|
||
| # Extract content before custom-content marker from both files | ||
| awk '/<!-- custom-content -->/{exit} 1' "$TARGET_DIR/README.md.backup" > /tmp/old_readme.txt | ||
| awk '/<!-- custom-content -->/{exit} 1' "$TARGET_DIR/README.md" > /tmp/new_readme.txt | ||
|
|
||
| if ! diff -u /tmp/old_readme.txt /tmp/new_readme.txt; then | ||
| print_error "README is out of sync for $TARGET_DIR" | ||
| echo "" | ||
| echo "The generated README differs from the committed version." | ||
| echo "Command to fix: uv run python -m scripts.generate_readme $TYPE_FLAG $TARGET_DIR --overwrite" | ||
| echo "" | ||
| echo "Diff saved to: /tmp/readme_diff.txt" | ||
| diff -u /tmp/old_readme.txt /tmp/new_readme.txt > /tmp/readme_diff.txt || true | ||
|
|
||
| # In CI mode, always restore original README; otherwise ask user | ||
| if [ "$CI_MODE" = true ]; then | ||
| mv "$TARGET_DIR/README.md.backup" "$TARGET_DIR/README.md" | ||
| print_warning "Restored original README (CI mode)" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gmfrasca From the KEP, my understanding is that we should restore the original README and fail the job, prompting contributors to update or regenerate the README file. Could you update to fail the job in this case and update the message.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The script actually does this - CI_MODE is just to ignore the prompt to overwrite so that CI Checks don't perpetually wait for user input. The script then fails if it hits this if clause on line 116 ( |
||
| else | ||
| # Ask if user wants to keep the new README | ||
| read -p "Keep the newly generated README? (y/N): " -n 1 -r | ||
| echo | ||
| if [[ ! $REPLY =~ ^[Yy]$ ]]; then | ||
| mv "$TARGET_DIR/README.md.backup" "$TARGET_DIR/README.md" | ||
| print_warning "Restored original README" | ||
| else | ||
| rm "$TARGET_DIR/README.md.backup" | ||
| print_success "Kept new README. Don't forget to commit it!" | ||
| fi | ||
| fi | ||
|
|
||
| exit 1 | ||
| else | ||
| print_success "README is up-to-date for $TARGET_DIR" | ||
| # Restore backup to maintain custom content | ||
| mv "$TARGET_DIR/README.md.backup" "$TARGET_DIR/README.md" | ||
| exit 0 | ||
| fi | ||
| else | ||
| print_warning "No existing README to compare against" | ||
| print_success "A new README has been generated. Review and commit it." | ||
| exit 0 | ||
| fi | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,109 @@ | ||
| name: README Validation | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @HumairAK suggested this on my metadata validation PR and I thought it was a good idea - can you add a GH workflows file to run the generate_readme unit tests? Maybe all scripts directory unit tests can be run with a single workflow |
||
|
|
||
| on: | ||
| pull_request: | ||
| paths: | ||
| - 'components/**' | ||
| - 'pipelines/**' | ||
| - 'third_party/components/**' | ||
| - 'third_party/pipelines/**' | ||
| - 'scripts/generate_readme/**' | ||
| - '.github/workflows/readme-check.yml' | ||
| - '.github/scripts/**' | ||
|
|
||
| jobs: | ||
| check-readme-sync: | ||
| name: Check README files are up-to-date | ||
| runs-on: ubuntu-latest | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When I created the first workflow, I pinned the runner version to |
||
|
|
||
| steps: | ||
| - name: Checkout code | ||
| uses: actions/checkout@v4 | ||
| with: | ||
| fetch-depth: 0 | ||
|
|
||
| - name: Get changed files | ||
| id: changed-files | ||
| uses: tj-actions/changed-files@v45 | ||
| with: | ||
| files: | | ||
| components/** | ||
| pipelines/** | ||
| third_party/components/** | ||
| third_party/pipelines/** | ||
| scripts/generate_readme/** | ||
| .github/scripts/** | ||
| files_ignore: | | ||
| **/*.md | ||
|
|
||
| - name: Check if only README files changed | ||
| id: check-skip | ||
| run: | | ||
| if [ "${{ steps.changed-files.outputs.any_changed }}" != "true" ]; then | ||
| echo "skip=true" >> $GITHUB_OUTPUT | ||
| echo "Only README files changed, skipping validation" | ||
| else | ||
| echo "skip=false" >> $GITHUB_OUTPUT | ||
| fi | ||
|
Comment on lines
+39
to
+47
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This workflow should run even when only a README file changes, otherwise we may end up with a README manually edited. |
||
|
|
||
| - name: Install uv | ||
| if: steps.check-skip.outputs.skip != 'true' | ||
| uses: astral-sh/setup-uv@v5 | ||
| with: | ||
| enable-cache: true | ||
|
|
||
| - name: Set up Python | ||
| if: steps.check-skip.outputs.skip != 'true' | ||
| uses: actions/setup-python@v5 | ||
| with: | ||
| python-version-file: pyproject.toml | ||
|
|
||
| - name: Install dependencies | ||
| if: steps.check-skip.outputs.skip != 'true' | ||
| run: uv sync --locked --all-extras --dev | ||
|
|
||
| - name: Extract component and pipeline directories | ||
| if: steps.check-skip.outputs.skip != 'true' | ||
| id: find-targets | ||
| run: | | ||
| echo "Changed files:" | ||
| echo "${{ steps.changed-files.outputs.all_changed_files }}" | ||
|
|
||
| # Extract unique component/pipeline directories from changed files | ||
| targets=$(./.github/scripts/find-changed-components-and-pipelines.sh ${{ steps.changed-files.outputs.all_changed_files }}) | ||
|
|
||
| echo "targets=$targets" >> $GITHUB_OUTPUT | ||
| echo "Targets to check: $targets" | ||
|
|
||
| - name: Validate READMEs | ||
| if: steps.check-skip.outputs.skip != 'true' && steps.find-targets.outputs.targets != '' | ||
| run: | | ||
| set -e | ||
| failed_targets="" | ||
|
|
||
| for target_dir in ${{ steps.find-targets.outputs.targets }}; do | ||
| echo "" | ||
| # Run the validation script in CI mode | ||
| if ./.github/scripts/test-readme-check.sh --ci "$target_dir"; then | ||
| echo "" | ||
| else | ||
| failed_targets="$failed_targets $target_dir" | ||
| fi | ||
| done | ||
|
|
||
| if [ -n "$failed_targets" ]; then | ||
| echo "==================================================" | ||
| echo "❌ README validation failed for:" | ||
| for target in $failed_targets; do | ||
| echo " - $target" | ||
| done | ||
| echo "==================================================" | ||
| exit 1 | ||
| fi | ||
|
Comment on lines
+81
to
+102
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about making all of this part of |
||
|
|
||
| - name: Summary | ||
| if: steps.check-skip.outputs.skip != 'true' | ||
| run: | | ||
| echo "==================================================" | ||
| echo "✅ All README files are up-to-date!" | ||
| echo "==================================================" | ||
|
Comment on lines
+104
to
+109
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This step is redundant and can be misleading. It prints |
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,13 @@ | ||||||||||||||||||||||||||||||||||||||||||||||||||
| [project] | ||||||||||||||||||||||||||||||||||||||||||||||||||
| name = "pipelines-components" | ||||||||||||||||||||||||||||||||||||||||||||||||||
| version = "0.1.0" | ||||||||||||||||||||||||||||||||||||||||||||||||||
| description = "Add your description here" | ||||||||||||||||||||||||||||||||||||||||||||||||||
| readme = "README.md" | ||||||||||||||||||||||||||||||||||||||||||||||||||
| requires-python = ">=3.9" | ||||||||||||||||||||||||||||||||||||||||||||||||||
| dependencies = [ | ||||||||||||||||||||||||||||||||||||||||||||||||||
| "docstring-parser>=0.17.0", | ||||||||||||||||||||||||||||||||||||||||||||||||||
| "jinja2>=3.1.6", | ||||||||||||||||||||||||||||||||||||||||||||||||||
| "kfp>=2.14.6", | ||||||||||||||||||||||||||||||||||||||||||||||||||
| "pytest>=8.4.2", | ||||||||||||||||||||||||||||||||||||||||||||||||||
| "pyyaml>=6.0.3", | ||||||||||||||||||||||||||||||||||||||||||||||||||
| ] | ||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+7
to
+13
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using
evalhere can be dangerous. It allows attackers to inject code to seal secrets.You can use arrays, which store strings literally without execution, instead of
eval.