1+ #! /bin/bash
2+
3+ # Script to generate GCS signed URLs for image files in JSONL format
4+ # Usage: ./listgcs.sh <gcs-path> [output-file] [expiration-seconds] [parallel-jobs]
5+
6+ set -e
7+
8+ # Check if GCS path is provided
9+ if [ -z " $1 " ]; then
10+ echo " Error: GCS path is required"
11+ echo " Usage: $0 <gcs-path> [output-file] [expiration-seconds] [parallel-jobs]"
12+ echo " Example: $0 gs://my-bucket/images/ output.jsonl 21600 8"
13+ exit 1
14+ fi
15+
16+ GCS_PATH=" $1 "
17+ OUTPUT_FILE=" ${2:- signed_urls.jsonl} "
18+ EXPIRATION_SECONDS=" ${3:- 21600} " # Default: 6 hours
19+ PARALLEL_JOBS=" ${4:- 20} " # Default: 20 parallel jobs
20+
21+ # Remove trailing slash from GCS path if present
22+ GCS_PATH=" ${GCS_PATH%/ } "
23+
24+ # Convert seconds to duration format for gcloud (e.g., 21600s)
25+ EXPIRATION=" ${EXPIRATION_SECONDS} s"
26+
27+ # Image file extensions to include (regex pattern for grep)
28+ IMAGE_PATTERN=' \.(jpg|jpeg|png|gif|bmp|webp|tiff|tif|svg)$'
29+
30+ # Function to find an appropriate service account
31+ find_service_account () {
32+ # First, try to get the default compute service account for the current project
33+ local project_id=$( gcloud config get-value project 2> /dev/null)
34+ if [ -n " $project_id " ]; then
35+ local compute_sa=" ${project_id} -compute@developer.gserviceaccount.com"
36+ if gcloud iam service-accounts describe " $compute_sa " > /dev/null 2>&1 ; then
37+ echo " $compute_sa "
38+ return 0
39+ fi
40+ fi
41+
42+ # If that doesn't work, try to find any service account in the project
43+ local sa_list=$( gcloud iam service-accounts list --format=" value(email)" --limit=1 2> /dev/null)
44+ if [ -n " $sa_list " ]; then
45+ echo " $sa_list " | head -n 1
46+ return 0
47+ fi
48+
49+ return 1
50+ }
51+
52+ # Try to find a service account to use
53+ SERVICE_ACCOUNT=$( find_service_account)
54+ if [ -z " $SERVICE_ACCOUNT " ]; then
55+ echo " Warning: No service account found. Attempting to sign URLs without impersonation."
56+ echo " If this fails, you may need to:"
57+ echo " 1. Authenticate with a service account: gcloud auth activate-service-account --key-file=key.json"
58+ echo " 2. Or ensure you have appropriate service accounts in your project"
59+ echo " "
60+ fi
61+
62+ # Function to process a single file
63+ process_file () {
64+ local object=" $1 "
65+ local service_account=" $2 "
66+ local expiration=" $3 "
67+
68+ # Create signed URL using gcloud storage sign-url
69+ local signed_url_output
70+ if [ -n " $service_account " ]; then
71+ signed_url_output=$( gcloud storage sign-url --http-verb=GET --duration=" $expiration " --impersonate-service-account=" $service_account " " $object " 2> /dev/null)
72+ else
73+ signed_url_output=$( gcloud storage sign-url --http-verb=GET --duration=" $expiration " " $object " 2> /dev/null)
74+ fi
75+
76+ if [ $? -eq 0 ] && [ -n " $signed_url_output " ]; then
77+ # Extract just the signed_url from the YAML output
78+ local signed_url=$( echo " $signed_url_output " | grep " signed_url:" | sed ' s/signed_url: //' )
79+
80+ if [ -n " $signed_url " ]; then
81+ # Extract the path after the bucket name and convert slashes to double underscores
82+ local path_part=$( echo " $object " | sed ' s|gs://[^/]*/||' )
83+ local name_with_path=$( echo " $path_part " | sed ' s|/|__|g' )
84+
85+ # Output JSONL
86+ echo " {\" name\" : \" $name_with_path \" , \" url\" : \" $signed_url \" }"
87+ fi
88+ fi
89+ }
90+
91+ # Export function and variables for xargs
92+ export -f process_file
93+ export SERVICE_ACCOUNT
94+ export EXPIRATION
95+
96+ echo " Listing files from $GCS_PATH ..."
97+
98+ # Get list of all files, filter for images, and process in parallel
99+ gsutil ls -r " $GCS_PATH " 2> /dev/null | \
100+ grep -v ' /$' | \
101+ grep -v ' :$' | \
102+ grep -iE " $IMAGE_PATTERN " | \
103+ xargs -I {} -P " $PARALLEL_JOBS " bash -c ' process_file "$@"' _ {} " $SERVICE_ACCOUNT " " $EXPIRATION " | \
104+ tee " $OUTPUT_FILE "
105+
106+ echo " "
107+ echo " Done! Signed URLs written to $OUTPUT_FILE "
108+ echo " Total images processed: $( wc -l < " $OUTPUT_FILE " ) "
0 commit comments