Skip to content

Commit b8e3e29

Browse files
committed
retry on cluster startup failure
1 parent 479358a commit b8e3e29

9 files changed

+103
-16
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,3 +248,6 @@ fabric.properties
248248
.idea
249249

250250
arangodb-spring-data.iml
251+
252+
/docker/jwtHeader
253+
/docker/jwtSecret

docker/clean_cluster.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/bash
2+
3+
for c in agent1 \
4+
agent2 \
5+
agent3 \
6+
dbserver1 \
7+
dbserver2 \
8+
coordinator1 \
9+
coordinator2; do
10+
docker rm -f $c
11+
done

docker/clean_single.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/bash
2+
3+
docker rm -f arangodb

docker/debug_containers_cluster.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
debug_container() {
2+
echo "=== === === ==="
3+
echo "=== $1"
4+
5+
running=$(docker inspect -f '{{.State.Running}}' "$1")
6+
7+
if [ "$running" = false ]; then
8+
echo "=== $1 IS NOT RUNNING!"
9+
fi
10+
11+
echo "=== === === ==="
12+
13+
docker logs "$1"
14+
}
15+
16+
for c in agent1 \
17+
agent2 \
18+
agent3 \
19+
dbserver1 \
20+
dbserver2 \
21+
coordinator1 \
22+
coordinator2; do
23+
debug_container $c
24+
done

docker/debug_containers_single.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
container_name="arangodb"
2+
3+
debug_container() {
4+
echo "=== === === ==="
5+
echo "=== $1"
6+
7+
running=$(docker inspect -f '{{.State.Running}}' "$1")
8+
9+
if [ "$running" = false ]; then
10+
echo "=== $1 IS NOT RUNNING!"
11+
fi
12+
13+
echo "=== === === ==="
14+
15+
docker logs "$1"
16+
}
17+
18+
debug_container $container_name

docker/start_db_cluster.sh

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# ./start_db_cluster.sh <dockerImage>
66

77
# EXAMPLE:
8-
# ./start_db_cluster.sh docker.io/arangodb/arangodb:3.6.2
8+
# ./start_db_cluster.sh docker.io/arangodb/arangodb:3.7.1
99

1010
docker pull "$1"
1111

@@ -20,25 +20,21 @@ AUTHORIZATION_HEADER=$(cat "$LOCATION"/jwtHeader)
2020
echo "Starting containers..."
2121

2222
docker run -d -v "$LOCATION"/jwtSecret:/jwtSecret -e ARANGO_LICENSE_KEY="$ARANGO_LICENSE_KEY" --network arangodb --ip 172.28.1.1 --name agent1 "$1" arangodb --cluster.start-dbserver false --cluster.start-coordinator false --auth.jwt-secret /jwtSecret
23-
sleep 1
2423
docker run -d -v "$LOCATION"/jwtSecret:/jwtSecret -e ARANGO_LICENSE_KEY="$ARANGO_LICENSE_KEY" --network arangodb --ip 172.28.1.2 --name agent2 "$1" arangodb --cluster.start-dbserver false --cluster.start-coordinator false --starter.join agent1 --auth.jwt-secret /jwtSecret
25-
sleep 1
2624
docker run -d -v "$LOCATION"/jwtSecret:/jwtSecret -e ARANGO_LICENSE_KEY="$ARANGO_LICENSE_KEY" --network arangodb --ip 172.28.1.3 --name agent3 "$1" arangodb --cluster.start-dbserver false --cluster.start-coordinator false --starter.join agent1 --auth.jwt-secret /jwtSecret
27-
sleep 1
2825

2926
docker run -d -v "$LOCATION"/jwtSecret:/jwtSecret -e ARANGO_LICENSE_KEY="$ARANGO_LICENSE_KEY" --network arangodb --ip 172.28.2.1 --name dbserver1 "$1" arangodb --cluster.start-dbserver true --cluster.start-coordinator false --starter.join agent1 --auth.jwt-secret /jwtSecret
30-
sleep 1
3127
docker run -d -v "$LOCATION"/jwtSecret:/jwtSecret -e ARANGO_LICENSE_KEY="$ARANGO_LICENSE_KEY" --network arangodb --ip 172.28.2.2 --name dbserver2 "$1" arangodb --cluster.start-dbserver true --cluster.start-coordinator false --starter.join agent1 --auth.jwt-secret /jwtSecret
32-
sleep 1
33-
docker run -d -v "$LOCATION"/jwtSecret:/jwtSecret -e ARANGO_LICENSE_KEY="$ARANGO_LICENSE_KEY" --network arangodb --ip 172.28.2.3 --name dbserver3 "$1" arangodb --cluster.start-dbserver true --cluster.start-coordinator false --starter.join agent1 --auth.jwt-secret /jwtSecret
34-
sleep 1
3528

3629
docker run -d -v "$LOCATION"/jwtSecret:/jwtSecret -e ARANGO_LICENSE_KEY="$ARANGO_LICENSE_KEY" --network arangodb --ip 172.28.3.1 --name coordinator1 -p 8529:8529 "$1" arangodb --cluster.start-dbserver false --cluster.start-coordinator true --starter.join agent1 --auth.jwt-secret /jwtSecret
37-
sleep 1
3830
docker run -d -v "$LOCATION"/jwtSecret:/jwtSecret -e ARANGO_LICENSE_KEY="$ARANGO_LICENSE_KEY" --network arangodb --ip 172.28.3.2 --name coordinator2 "$1" arangodb --cluster.start-dbserver false --cluster.start-coordinator true --starter.join agent1 --auth.jwt-secret /jwtSecret
39-
sleep 1
4031

4132
debug_container() {
33+
if [ ! "$(docker ps -aqf name="$1")" ]; then
34+
echo "$1 container not found!"
35+
exit 1
36+
fi
37+
4238
running=$(docker inspect -f '{{.State.Running}}' "$1")
4339

4440
if [ "$running" = false ]
@@ -57,7 +53,6 @@ debug() {
5753
agent3 \
5854
dbserver1 \
5955
dbserver2 \
60-
dbserver3 \
6156
coordinator1 \
6257
coordinator2 ; do
6358
debug_container $c
@@ -81,17 +76,16 @@ for a in 172.28.1.1:8531 \
8176
172.28.1.3:8531 \
8277
172.28.2.1:8530 \
8378
172.28.2.2:8530 \
84-
172.28.2.3:8530 \
8579
172.28.3.1:8529 \
8680
172.28.3.2:8529 ; do
8781
wait_server $a
8882
done
8983

9084
# wait for port mappings
91-
wait_server 127.0.0.1:8529
85+
wait_server localhost:8529
9286

9387
docker exec coordinator1 arangosh --server.authentication=false --javascript.execute-string='require("org/arangodb/users").update("root", "test")'
9488

95-
rm "$LOCATION"/jwtHeader "$LOCATION"/jwtSecret
89+
#rm "$LOCATION"/jwtHeader "$LOCATION"/jwtSecret
9690

9791
echo "Done, your cluster is ready."
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
3+
# USAGE:
4+
# export ARANGO_LICENSE_KEY=<arangodb-enterprise-license>
5+
# ./docker/start_db_cluster_retry_fail.sh <dockerImage>
6+
7+
# EXAMPLE:
8+
# ./docker/start_db_cluster_retry_fail.sh docker.io/arangodb/arangodb:3.7.1
9+
10+
./docker/start_db_cluster.sh "$1"
11+
while [ $? -ne 0 ]; do
12+
echo "=== === ==="
13+
echo "cluster startup failed, retrying ..."
14+
./docker/clean_cluster.sh
15+
./docker/start_db_cluster.sh "$1"
16+
done

docker/start_db_single.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55
# ./start_db_single.sh <dockerImage>
66

77
# EXAMPLE:
8-
# ./start_db_single.sh docker.io/arangodb/arangodb:3.6.2
8+
# ./start_db_single.sh docker.io/arangodb/arangodb:3.7.1
99

1010
docker pull "$1"
1111

12-
docker run -d -e ARANGO_ROOT_PASSWORD=test -e ARANGO_LICENSE_KEY="$ARANGO_LICENSE_KEY" -p 8529:8529 "$1"
12+
docker network create arangodb --subnet 172.28.0.0/16
13+
14+
docker run -d -p 8529:8529 -e ARANGO_ROOT_PASSWORD=test -e ARANGO_LICENSE_KEY="$ARANGO_LICENSE_KEY" --network arangodb --ip 172.28.3.1 --name arangodb "$1"
1315

1416
echo "waiting for arangodb ..."
1517

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
3+
# USAGE:
4+
# export ARANGO_LICENSE_KEY=<arangodb-enterprise-license>
5+
# ./docker/start_db_single_retry_fail.sh <dockerImage>
6+
7+
# EXAMPLE:
8+
# ./docker/start_db_single_retry_fail.sh docker.io/arangodb/arangodb:3.7.1
9+
10+
./docker/start_db_single.sh "$1"
11+
while [ $? -ne 0 ]; do
12+
echo "=== === ==="
13+
echo "single startup failed, retrying ..."
14+
./docker/clean_single.sh
15+
./docker/start_db_single.sh "$1"
16+
done

0 commit comments

Comments
 (0)