1+ #! /bin/bash
2+
3+ # Script to generate S3 signed URLs for image files in JSONL format
4+ # Usage: ./generateS3SignedUrls.sh <s3-path> [output-file] [expiration-seconds] [parallel-jobs]
5+ # Or with curl:
6+ # curl -fsSL https://gist.githubusercontent.com/tonylampada/20b7bc984a455f53e2d07f88b33bf43c/raw/generateS3SignedUrls.sh | bash -s -- s3://bucket/path output.jsonl
7+
8+ set -e
9+
10+ # Check if S3 path is provided
11+ if [ -z " $1 " ]; then
12+ echo " Error: S3 path is required"
13+ echo " Usage: $0 <s3-path> [output-file] [expiration-seconds] [parallel-jobs]"
14+ echo " Example: $0 s3://my-bucket/images/ output.jsonl 3600 8"
15+ exit 1
16+ fi
17+
18+ S3_PATH=" $1 "
19+ OUTPUT_FILE=" ${2:- signed_urls.jsonl} "
20+ EXPIRATION=" ${3:- 21600} " # Default: 6 hours
21+ PARALLEL_JOBS=" ${4:- 20} " # Default: 20 parallel jobs
22+
23+ # Remove trailing slash from S3 path if present
24+ S3_PATH=" ${S3_PATH%/ } "
25+
26+ # Extract bucket name from S3_PATH
27+ BUCKET=$( echo " $S3_PATH " | sed ' s|s3://||' | cut -d' /' -f1)
28+
29+ # Image file extensions to include (regex pattern for grep)
30+ IMAGE_PATTERN=' \.(jpg|jpeg|png|gif|bmp|webp|tiff|tif|svg)$'
31+
32+ # Function to process a single file
33+ process_file () {
34+ local file_path=" $1 "
35+ local bucket=" $2 "
36+ local expiration=" $3 "
37+
38+ # Construct full S3 URI
39+ local s3_uri=" s3://${bucket} /${file_path} "
40+
41+ # Generate signed URL
42+ local signed_url=$( aws s3 presign " $s3_uri " --expires-in " $expiration " 2> /dev/null)
43+
44+ if [ $? -eq 0 ]; then
45+ # Create name with full path using double underscores instead of slashes
46+ local name_with_path=$( echo " $file_path " | sed ' s|/|__|g' )
47+
48+ # Output JSONL
49+ echo " {\" name\" : \" $name_with_path \" , \" url\" : \" $signed_url \" }"
50+ fi
51+ }
52+
53+ # Export function and variables for xargs
54+ export -f process_file
55+ export BUCKET
56+ export EXPIRATION
57+
58+ echo " Listing files from $S3_PATH ..."
59+
60+ # Get list of all files, filter for images, and process in parallel
61+ aws s3 ls " $S3_PATH /" --recursive | \
62+ awk ' {print $4}' | \
63+ grep -iE " $IMAGE_PATTERN " | \
64+ xargs -I {} -P " $PARALLEL_JOBS " bash -c ' process_file "$@"' _ {} " $BUCKET " " $EXPIRATION " | \
65+ tee " $OUTPUT_FILE "
66+
67+ echo " "
68+ echo " Done! Signed URLs written to $OUTPUT_FILE "
69+ echo " Total images processed: $( wc -l < " $OUTPUT_FILE " ) "
0 commit comments