From 3dcdb5a190235adba2d9c5d3ad1cf900e40dac2b Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Thu, 6 Oct 2022 16:45:54 -0500 Subject: [PATCH 1/6] prune empty dirs before upload. observed a case where file by file clean up started failing. (possibly due to too many directories existing) --- Dockerfile | 2 +- main.sh | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 024cb82..cc668da 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM alpine:3.12 +FROM alpine:3.16.2 WORKDIR /app RUN apk add --no-cache openssh-client rsync bash COPY . . diff --git a/main.sh b/main.sh index 5f57ae9..1f1f1b0 100755 --- a/main.sh +++ b/main.sh @@ -216,9 +216,12 @@ while true; do fatal "failed to resolve upload server and update /etc/hosts." fi - echo "scanning and uploading files..." cd /uploads + echo "cleaning up empty upload dirs..." + find . -type d -empty -delete + + echo "scanning and uploading files..." find_uploads_in_cwd | while read -r dir; do if ! ls "${dir}" | grep -q .; then echo "skipping dir with no uploads: ${dir}" From b96b248467a67babdfa32d2c4f00b25cb894b9fe Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Thu, 6 Oct 2022 16:51:57 -0500 Subject: [PATCH 2/6] added mindepth 1 to ensure we don't accidentally delete working directory --- main.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.sh b/main.sh index 1f1f1b0..04cb28f 100755 --- a/main.sh +++ b/main.sh @@ -219,7 +219,7 @@ while true; do cd /uploads echo "cleaning up empty upload dirs..." - find . -type d -empty -delete + find . -mindepth 1 -type d -empty -delete echo "scanning and uploading files..." find_uploads_in_cwd | while read -r dir; do From ce6cd7acc39d493c3f63258b1b1dca06026443e7 Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Thu, 6 Oct 2022 17:16:34 -0500 Subject: [PATCH 3/6] leave all directory cleanup to pre-step to avoid mutating tree during walk --- main.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/main.sh b/main.sh index 04cb28f..8624d8c 100755 --- a/main.sh +++ b/main.sh @@ -225,7 +225,6 @@ while true; do find_uploads_in_cwd | while read -r dir; do if ! ls "${dir}" | grep -q .; then echo "skipping dir with no uploads: ${dir}" - attempt_to_cleanup_dir "${dir}" continue fi @@ -233,9 +232,6 @@ while true; do upload_dir "${dir}" touch /tmp/rsync_healthy - echo "cleaning up: ${dir}" - attempt_to_cleanup_dir "${dir}" - # indicate that we are healthy and making progress after each transfer completes touch /tmp/healthy From 4650c6bd1c0215beb5171bbe84fbfde85e0b4261 Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Thu, 6 Oct 2022 17:20:41 -0500 Subject: [PATCH 4/6] removed duplicate find_uploads_in_cwd func --- main.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/main.sh b/main.sh index 8624d8c..656d04c 100755 --- a/main.sh +++ b/main.sh @@ -148,10 +148,6 @@ attempt_to_cleanup_dir() { # * shared pid sidecar (wolfgang shared this with me: https://kubernetes.io/docs/tasks/configure-pod-container/share-process-namespace/) rsync_supervisor & -find_uploads_in_cwd() { - find . -mindepth 3 -maxdepth 3 -type d -} - find_uploads_in_cwd() { # NOTE(sean) upload data is mounted at /uploads with leaf files like: # From be77707df71b088d5cb445ee50c49ee1e7dcee4b Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Thu, 6 Oct 2022 17:26:15 -0500 Subject: [PATCH 5/6] only find nonempty dirs in find_uploads_in_cwd to eliminate need for additional check --- main.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/main.sh b/main.sh index 656d04c..a739c05 100755 --- a/main.sh +++ b/main.sh @@ -158,7 +158,7 @@ find_uploads_in_cwd() { # with job # path: ./Pluginctl/test-pipeline/0.2.8/1649746359093671949-a31446e4291ac3a04a3c331e674252a63ee95604/data # depth: 1 2 3 4 5 files - find . -mindepth 3 -maxdepth 4 -type d | awk -F/ ' + find . -mindepth 3 -maxdepth 4 -type d ! -empty | awk -F/ ' # match paths which ends in version/timestamp-shasum $3 ~ /[0-9]+\.[0-9]+\.[0-9]+/ && $4 ~ /[0-9]+-[0-9a-f]+/ $4 ~ /[0-9]+\.[0-9]+\.[0-9]+/ && $5 ~ /[0-9]+-[0-9a-f]+/ @@ -219,11 +219,6 @@ while true; do echo "scanning and uploading files..." find_uploads_in_cwd | while read -r dir; do - if ! ls "${dir}" | grep -q .; then - echo "skipping dir with no uploads: ${dir}" - continue - fi - echo "uploading: ${dir}" upload_dir "${dir}" touch /tmp/rsync_healthy From 66e43e2cd6f59277edf84d39783b72e54d8fe1c7 Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Thu, 6 Oct 2022 17:30:03 -0500 Subject: [PATCH 6/6] combined liveness touch --- main.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/main.sh b/main.sh index a739c05..93fa240 100755 --- a/main.sh +++ b/main.sh @@ -221,11 +221,8 @@ while true; do find_uploads_in_cwd | while read -r dir; do echo "uploading: ${dir}" upload_dir "${dir}" - touch /tmp/rsync_healthy - # indicate that we are healthy and making progress after each transfer completes - touch /tmp/healthy - + touch /tmp/rsync_healthy /tmp/healthy echo "done: ${dir}" done