diff --git a/.gitignore b/.gitignore index 07c3ea6..58870f6 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,8 @@ Session.vim .terraform terraform.tfstate* build.log + +**/*.tar.7z +infra/v?/terraform +**/.env_vars +**/osc_config.tf \ No newline at end of file diff --git a/images/consul/v1/files/bin/consul-config.sh b/images/consul/v1/files/_scripts/consul-config.sh similarity index 99% rename from images/consul/v1/files/bin/consul-config.sh rename to images/consul/v1/files/_scripts/consul-config.sh index 2a0e7b0..047bf65 100755 --- a/images/consul/v1/files/bin/consul-config.sh +++ b/images/consul/v1/files/_scripts/consul-config.sh @@ -34,4 +34,3 @@ else } EOF fi - diff --git a/images/consul/v1/files/_scripts/format_EBS.sh b/images/consul/v1/files/_scripts/format_EBS.sh new file mode 100755 index 0000000..32a7883 --- /dev/null +++ b/images/consul/v1/files/_scripts/format_EBS.sh @@ -0,0 +1,38 @@ +#!/bin/bash +set -e + +DEVICE="/dev/xvdb" +logFile="/var/log/$(basename ${0}).log" + +function error() { + DATE=`date '+%Y-%m-%d %H:%M:%S'` + if [[ -z "$logFile" ]]; then echo "$DATE -- $1" >> $logFile; fi + echo $1 + exit 1 +} + +MY_INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) +REGION=$(curl -s http://169.254.169.254/latest/dynamic/instance-identity/document | grep region | sed 's/ "region" : "\(.*\)",\?/\1/') + +# 60 seconds / 12 attempts +cnt=12 +while [[ ! -b /dev/xvdb ]] && [[ "$cnt" -gt 0 ]] ; do + cnt=$(($cnt-1)) + sleep 5 +done + +[[ -b "$DEVICE" ]] || error "$DEVICE not available" + +# cnt=12 +# while [[ -z "$STATUS" || "$STATUS" != "attached" ]] && [[ "$cnt" -gt 0 ]]; do +# STATUS=$(aws ec2 describe-instances --region ${REGION} --instance ${MY_INSTANCE_ID}|jq -r --arg DEVICE "$DEVICE" '.Reservations[].Instances[].BlockDeviceMappings[]|select(.DeviceName==$DEVICE)|.Ebs.Status') +# cnt=$(($cnt-1)) +# sleep 5 +# done + +if blkid ${DEVICE}; then + echo "$DEVICE setup is fine" +else + wipefs -fa $DEVICE && mkfs.ext4 $DEVICE + # parted -s /dev/xvdb mklabel msdos && parted -s /dev/xvdb mkpart primary ext4 1MiB 100% && mkfs.ext4 /dev/xvdb1 +fi diff --git a/images/consul/v1/files/systemd/consul.service b/images/consul/v1/files/systemd/consul.service index 7e9fcd3..8394a23 100644 --- a/images/consul/v1/files/systemd/consul.service +++ b/images/consul/v1/files/systemd/consul.service @@ -10,11 +10,10 @@ After=generate-consul-config.service [Service] Environment=GOMAXPROCS=2 Restart=on-failure -ExecStartPre=/usr/bin/mkdir -p /mnt/data/consul +ExecStartPre=if [ -d /mnt/data/consul ]; then /usr/bin/mkdir -p /mnt/data/consul; fi ExecStart=/usr/local/bin/consul agent -data-dir=/mnt/data/consul -config-dir=/etc/consul.d -ui ExecReload=/bin/kill -HUP $MAINPID KillSignal=SIGINT [Install] WantedBy=multi-user.target - diff --git a/images/consul/v1/files/systemd/format-data-volume.service b/images/consul/v1/files/systemd/format-data-volume.service index 6f619a1..ca7f06d 100644 --- a/images/consul/v1/files/systemd/format-data-volume.service +++ b/images/consul/v1/files/systemd/format-data-volume.service @@ -1,10 +1,9 @@ [Unit] Description=Format EBS volume if needed +# ConditionFirstBoot=yes [Service] Type=oneshot RemainAfterExit=yes -ExecStart=/bin/bash -c \ -'(/usr/sbin/blkid /dev/xvdb) || \ -(/usr/sbin/wipefs -fa /dev/xvdb && /usr/sbin/mkfs.ext4 /dev/xvdb)' - +ExecStart=/usr/local/bin/format_EBS.sh +TimeoutStartSec=60 \ No newline at end of file diff --git a/images/consul/v1/scripts/base.sh b/images/consul/v1/scripts/base.sh index ddd1230..90bbe2b 100755 --- a/images/consul/v1/scripts/base.sh +++ b/images/consul/v1/scripts/base.sh @@ -3,4 +3,4 @@ set -x set -e -dnf install -y curl wget unzip dnsmasq jq bind-utils +apt-get install -y curl wget unzip dnsmasq jq bind9utils parted diff --git a/images/consul/v1/scripts/configure_network.sh b/images/consul/v1/scripts/configure_network.sh index da92f57..b6d7089 100755 --- a/images/consul/v1/scripts/configure_network.sh +++ b/images/consul/v1/scripts/configure_network.sh @@ -4,7 +4,7 @@ set -x set -e # Add PEERDNS=no to ifcfg-eth0 since we don't want the dhcp client to rewrite /etc/resolv.conf -echo "PEERDNS=no" >> /etc/sysconfig/network-scripts/ifcfg-eth0 +# echo "PEERDNS=no" >> /etc/sysconfig/network-scripts/ifcfg-eth0 cat < /etc/resolv.conf search ec2.internal diff --git a/images/consul/v1/scripts/copy_files.sh b/images/consul/v1/scripts/copy_files.sh index 775e2f1..1cd2358 100755 --- a/images/consul/v1/scripts/copy_files.sh +++ b/images/consul/v1/scripts/copy_files.sh @@ -5,12 +5,12 @@ set -e STAGING_DIR="/tmp/packer/files/" mkdir -p ${STAGING_DIR} -chown -R fedora:fedora ${STAGING_DIR} +chown -R admin:admin ${STAGING_DIR} chown root:root ${STAGING_DIR}/systemd/* -chown root:root ${STAGING_DIR}/bin/* -chmod +x ${STAGING_DIR}/bin/* +chown root:root ${STAGING_DIR}/_scripts/* +chmod +x ${STAGING_DIR}/_scripts/* cp ${STAGING_DIR}/systemd/* /etc/systemd/system/ -cp ${STAGING_DIR}/bin/* /usr/local/bin +cp ${STAGING_DIR}/_scripts/* /usr/local/bin diff --git a/images/consul/v1/scripts/install_awscli.sh b/images/consul/v1/scripts/install_awscli.sh index 949c8a4..4dfff85 100755 --- a/images/consul/v1/scripts/install_awscli.sh +++ b/images/consul/v1/scripts/install_awscli.sh @@ -3,6 +3,7 @@ set -x set -e -dnf install -y python-pip +apt-get install -y python +wget https://bootstrap.pypa.io/get-pip.py +python get-pip.py pip install awscli - diff --git a/images/consul/v1/scripts/install_consul.sh b/images/consul/v1/scripts/install_consul.sh index 0ffb9af..deb2c09 100755 --- a/images/consul/v1/scripts/install_consul.sh +++ b/images/consul/v1/scripts/install_consul.sh @@ -5,7 +5,7 @@ set -e echo "Fetching Consul..." cd /tmp -wget https://releases.hashicorp.com/consul/0.6.2/consul_0.6.2_linux_amd64.zip -O consul.zip +wget https://releases.hashicorp.com/consul/1.0.6/consul_1.0.6_linux_amd64.zip -O consul.zip echo "Installing Consul..." unzip consul.zip >/dev/null diff --git a/images/consul/v1/scripts/prepare_staging.sh b/images/consul/v1/scripts/prepare_staging.sh index 9572f6f..ad3e56e 100755 --- a/images/consul/v1/scripts/prepare_staging.sh +++ b/images/consul/v1/scripts/prepare_staging.sh @@ -4,4 +4,4 @@ set -x set -e mkdir -p /tmp/packer/files -chown -R fedora:fedora /tmp/packer/ +chown -R admin:admin /tmp/packer/ diff --git a/images/consul/v1/scripts/update.sh b/images/consul/v1/scripts/update.sh index 39e885a..ad2a3ca 100755 --- a/images/consul/v1/scripts/update.sh +++ b/images/consul/v1/scripts/update.sh @@ -3,4 +3,4 @@ set -x set -e -dnf -y update +apt-get update diff --git a/images/consul/v1/template.json b/images/consul/v1/template.json index ca7061e..fd1190d 100644 --- a/images/consul/v1/template.json +++ b/images/consul/v1/template.json @@ -9,9 +9,9 @@ "access_key": "{{user `access_key`}}", "secret_key": "{{user `secret_key`}}", "region": "{{user `region`}}", - "source_ami": "ami-518bfb3b", + "source_ami": "ami-0dc82b70", "instance_type": "t2.micro", - "ssh_username": "fedora", + "ssh_username": "admin", "ami_name": "consul ami {{timestamp}}", "tags": { "test": "" @@ -22,9 +22,8 @@ "type": "shell", "execute_command": "{{ .Vars }} sudo -E sh '{{ .Path }}'", "scripts": [ - "./scripts/base.sh", - "./scripts/disable_selinux.sh", "./scripts/update.sh", + "./scripts/base.sh", "./scripts/install_awscli.sh", "./scripts/install_consul.sh", "./scripts/configure_network.sh", diff --git a/images/consul/v2/files/bin/consul-config.sh b/images/consul/v2/files/_scripts/consul-config.sh similarity index 99% rename from images/consul/v2/files/bin/consul-config.sh rename to images/consul/v2/files/_scripts/consul-config.sh index 2a0e7b0..047bf65 100755 --- a/images/consul/v2/files/bin/consul-config.sh +++ b/images/consul/v2/files/_scripts/consul-config.sh @@ -34,4 +34,3 @@ else } EOF fi - diff --git a/images/consul/v2/files/_scripts/format_EBS.sh b/images/consul/v2/files/_scripts/format_EBS.sh new file mode 100755 index 0000000..32a7883 --- /dev/null +++ b/images/consul/v2/files/_scripts/format_EBS.sh @@ -0,0 +1,38 @@ +#!/bin/bash +set -e + +DEVICE="/dev/xvdb" +logFile="/var/log/$(basename ${0}).log" + +function error() { + DATE=`date '+%Y-%m-%d %H:%M:%S'` + if [[ -z "$logFile" ]]; then echo "$DATE -- $1" >> $logFile; fi + echo $1 + exit 1 +} + +MY_INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) +REGION=$(curl -s http://169.254.169.254/latest/dynamic/instance-identity/document | grep region | sed 's/ "region" : "\(.*\)",\?/\1/') + +# 60 seconds / 12 attempts +cnt=12 +while [[ ! -b /dev/xvdb ]] && [[ "$cnt" -gt 0 ]] ; do + cnt=$(($cnt-1)) + sleep 5 +done + +[[ -b "$DEVICE" ]] || error "$DEVICE not available" + +# cnt=12 +# while [[ -z "$STATUS" || "$STATUS" != "attached" ]] && [[ "$cnt" -gt 0 ]]; do +# STATUS=$(aws ec2 describe-instances --region ${REGION} --instance ${MY_INSTANCE_ID}|jq -r --arg DEVICE "$DEVICE" '.Reservations[].Instances[].BlockDeviceMappings[]|select(.DeviceName==$DEVICE)|.Ebs.Status') +# cnt=$(($cnt-1)) +# sleep 5 +# done + +if blkid ${DEVICE}; then + echo "$DEVICE setup is fine" +else + wipefs -fa $DEVICE && mkfs.ext4 $DEVICE + # parted -s /dev/xvdb mklabel msdos && parted -s /dev/xvdb mkpart primary ext4 1MiB 100% && mkfs.ext4 /dev/xvdb1 +fi diff --git a/images/consul/v2/files/systemd/consul.service b/images/consul/v2/files/systemd/consul.service index 7e9fcd3..8394a23 100644 --- a/images/consul/v2/files/systemd/consul.service +++ b/images/consul/v2/files/systemd/consul.service @@ -10,11 +10,10 @@ After=generate-consul-config.service [Service] Environment=GOMAXPROCS=2 Restart=on-failure -ExecStartPre=/usr/bin/mkdir -p /mnt/data/consul +ExecStartPre=if [ -d /mnt/data/consul ]; then /usr/bin/mkdir -p /mnt/data/consul; fi ExecStart=/usr/local/bin/consul agent -data-dir=/mnt/data/consul -config-dir=/etc/consul.d -ui ExecReload=/bin/kill -HUP $MAINPID KillSignal=SIGINT [Install] WantedBy=multi-user.target - diff --git a/images/consul/v2/files/systemd/format-data-volume.service b/images/consul/v2/files/systemd/format-data-volume.service index 6f619a1..ca7f06d 100644 --- a/images/consul/v2/files/systemd/format-data-volume.service +++ b/images/consul/v2/files/systemd/format-data-volume.service @@ -1,10 +1,9 @@ [Unit] Description=Format EBS volume if needed +# ConditionFirstBoot=yes [Service] Type=oneshot RemainAfterExit=yes -ExecStart=/bin/bash -c \ -'(/usr/sbin/blkid /dev/xvdb) || \ -(/usr/sbin/wipefs -fa /dev/xvdb && /usr/sbin/mkfs.ext4 /dev/xvdb)' - +ExecStart=/usr/local/bin/format_EBS.sh +TimeoutStartSec=60 \ No newline at end of file diff --git a/images/consul/v2/scripts/base.sh b/images/consul/v2/scripts/base.sh index ddd1230..90bbe2b 100755 --- a/images/consul/v2/scripts/base.sh +++ b/images/consul/v2/scripts/base.sh @@ -3,4 +3,4 @@ set -x set -e -dnf install -y curl wget unzip dnsmasq jq bind-utils +apt-get install -y curl wget unzip dnsmasq jq bind9utils parted diff --git a/images/consul/v2/scripts/configure_network.sh b/images/consul/v2/scripts/configure_network.sh index da92f57..b6d7089 100755 --- a/images/consul/v2/scripts/configure_network.sh +++ b/images/consul/v2/scripts/configure_network.sh @@ -4,7 +4,7 @@ set -x set -e # Add PEERDNS=no to ifcfg-eth0 since we don't want the dhcp client to rewrite /etc/resolv.conf -echo "PEERDNS=no" >> /etc/sysconfig/network-scripts/ifcfg-eth0 +# echo "PEERDNS=no" >> /etc/sysconfig/network-scripts/ifcfg-eth0 cat < /etc/resolv.conf search ec2.internal diff --git a/images/consul/v2/scripts/copy_files.sh b/images/consul/v2/scripts/copy_files.sh index 775e2f1..1cd2358 100755 --- a/images/consul/v2/scripts/copy_files.sh +++ b/images/consul/v2/scripts/copy_files.sh @@ -5,12 +5,12 @@ set -e STAGING_DIR="/tmp/packer/files/" mkdir -p ${STAGING_DIR} -chown -R fedora:fedora ${STAGING_DIR} +chown -R admin:admin ${STAGING_DIR} chown root:root ${STAGING_DIR}/systemd/* -chown root:root ${STAGING_DIR}/bin/* -chmod +x ${STAGING_DIR}/bin/* +chown root:root ${STAGING_DIR}/_scripts/* +chmod +x ${STAGING_DIR}/_scripts/* cp ${STAGING_DIR}/systemd/* /etc/systemd/system/ -cp ${STAGING_DIR}/bin/* /usr/local/bin +cp ${STAGING_DIR}/_scripts/* /usr/local/bin diff --git a/images/consul/v2/scripts/install_awscli.sh b/images/consul/v2/scripts/install_awscli.sh index 949c8a4..4dfff85 100755 --- a/images/consul/v2/scripts/install_awscli.sh +++ b/images/consul/v2/scripts/install_awscli.sh @@ -3,6 +3,7 @@ set -x set -e -dnf install -y python-pip +apt-get install -y python +wget https://bootstrap.pypa.io/get-pip.py +python get-pip.py pip install awscli - diff --git a/images/consul/v2/scripts/install_consul.sh b/images/consul/v2/scripts/install_consul.sh index e8d58c4..a4b30a6 100755 --- a/images/consul/v2/scripts/install_consul.sh +++ b/images/consul/v2/scripts/install_consul.sh @@ -5,7 +5,7 @@ set -e echo "Fetching Consul..." cd /tmp -wget https://releases.hashicorp.com/consul/0.6.3/consul_0.6.3_linux_amd64.zip -O consul.zip +wget https://releases.hashicorp.com/consul/1.0.5/consul_1.0.5_linux_amd64.zip -O consul.zip echo "Installing Consul..." unzip consul.zip >/dev/null diff --git a/images/consul/v2/scripts/prepare_staging.sh b/images/consul/v2/scripts/prepare_staging.sh index 9572f6f..ad3e56e 100755 --- a/images/consul/v2/scripts/prepare_staging.sh +++ b/images/consul/v2/scripts/prepare_staging.sh @@ -4,4 +4,4 @@ set -x set -e mkdir -p /tmp/packer/files -chown -R fedora:fedora /tmp/packer/ +chown -R admin:admin /tmp/packer/ diff --git a/images/consul/v2/scripts/update.sh b/images/consul/v2/scripts/update.sh index 39e885a..ad2a3ca 100755 --- a/images/consul/v2/scripts/update.sh +++ b/images/consul/v2/scripts/update.sh @@ -3,4 +3,4 @@ set -x set -e -dnf -y update +apt-get update diff --git a/images/consul/v2/template.json b/images/consul/v2/template.json index ca7061e..fd1190d 100644 --- a/images/consul/v2/template.json +++ b/images/consul/v2/template.json @@ -9,9 +9,9 @@ "access_key": "{{user `access_key`}}", "secret_key": "{{user `secret_key`}}", "region": "{{user `region`}}", - "source_ami": "ami-518bfb3b", + "source_ami": "ami-0dc82b70", "instance_type": "t2.micro", - "ssh_username": "fedora", + "ssh_username": "admin", "ami_name": "consul ami {{timestamp}}", "tags": { "test": "" @@ -22,9 +22,8 @@ "type": "shell", "execute_command": "{{ .Vars }} sudo -E sh '{{ .Path }}'", "scripts": [ - "./scripts/base.sh", - "./scripts/disable_selinux.sh", "./scripts/update.sh", + "./scripts/base.sh", "./scripts/install_awscli.sh", "./scripts/install_consul.sh", "./scripts/configure_network.sh", diff --git a/infra/v1/Dockerfile b/infra/v1/Dockerfile index 0cbe51d..a8d64db 100644 --- a/infra/v1/Dockerfile +++ b/infra/v1/Dockerfile @@ -1,15 +1,19 @@ -FROM fedora:23 +FROM debian:stretch -RUN dnf -y install wget curl git unzip jq python python-pip python-boto3 awscli ansible docker +RUN apt-get update && apt-get install -y wget curl git unzip jq python python-pip python-boto3 awscli ansible docker uuid-runtime \ + vim iproute2 net-tools nano -RUN curl https://releases.hashicorp.com/terraform/0.6.12/terraform_0.6.12_linux_amd64.zip -o /tmp/terraform_0.6.12_linux_amd64.zip +RUN curl https://releases.hashicorp.com/terraform/0.11.3/terraform_0.11.3_linux_amd64.zip -o /tmp/terraform_0.11.3_linux_amd64.zip WORKDIR /usr/local/bin/ -RUN unzip /tmp/terraform_0.6.12_linux_amd64.zip +RUN unzip /tmp/terraform_0.11.3_linux_amd64.zip RUN mkdir -p /root/.ssh RUN chmod 700 /root/.ssh -RUN echo -e "Host *\n\tStrictHostKeyChecking no\n" >> /root/.ssh/config +RUN /bin/echo -e "Host *\n\tStrictHostKeyChecking no\n" >> /root/.ssh/config + +RUN curl -fsSL get.docker.com -o get-docker.sh && sh get-docker.sh +# RUN usermod -aG docker admin WORKDIR /deploy ADD main.tf config ./ diff --git a/infra/v1/ansible/test_consul_servers_active.yml b/infra/v1/ansible/test_consul_servers_active.yml index 79f7299..956dfd9 100644 --- a/infra/v1/ansible/test_consul_servers_active.yml +++ b/infra/v1/ansible/test_consul_servers_active.yml @@ -1,6 +1,6 @@ - +--- - hosts: all - remote_user: fedora + remote_user: admin become_user: root gather_facts: false tasks: diff --git a/infra/v1/ansible/wait_instance_stopped.yml b/infra/v1/ansible/wait_instance_stopped.yml index 7f873c1..1522863 100644 --- a/infra/v1/ansible/wait_instance_stopped.yml +++ b/infra/v1/ansible/wait_instance_stopped.yml @@ -1,6 +1,6 @@ - +--- - hosts: all - remote_user: fedora + remote_user: admin become_user: root gather_facts: false tasks: diff --git a/infra/v1/ansible/wait_instance_up.yml b/infra/v1/ansible/wait_instance_up.yml index d2ff73b..e766c5d 100644 --- a/infra/v1/ansible/wait_instance_up.yml +++ b/infra/v1/ansible/wait_instance_up.yml @@ -1,6 +1,6 @@ - +--- - hosts: all - remote_user: fedora + remote_user: admin become_user: root gather_facts: false tasks: diff --git a/infra/v1/config b/infra/v1/config index 5e6dc5d..947a80f 100644 --- a/infra/v1/config +++ b/infra/v1/config @@ -1 +1,2 @@ -CONSUL_AMI_ID= # Put here the ami id reported at the end of the build of `images/consul/v1` +# Put here the ami id reported at the end of the build of `images/consul/v1` +CONSUL_AMI_ID=ami-6bf50016 diff --git a/infra/v1/main.tf b/infra/v1/main.tf index 36e8315..69278c1 100644 --- a/infra/v1/main.tf +++ b/infra/v1/main.tf @@ -1,3 +1,17 @@ +terraform { + required_version = ">= 0.10.8" + + variable "s3_bucket" {} + variable "tfstate_key" {} + + # backend fix + backend "s3" { + # bucket = "${var.s3_bucket}" + # key = "${var.tfstate_key}" + # region = "${var.region}" +} +} + variable "access_key" {} variable "secret_key" {} @@ -14,6 +28,7 @@ variable "azs" { } provider "aws" { + version = "~> 1.9" access_key = "${var.access_key}" secret_key = "${var.secret_key}" region = "${var.region}" @@ -43,14 +58,14 @@ resource "aws_internet_gateway" "igw" { # Create a subnet for every availability zone resource "aws_subnet" "front" { - count = "${length(split(\",\", var.azs))}" + count = "${length(split(",", var.azs))}" vpc_id = "${aws_vpc.vpc.id}" cidr_block = "10.0.${count.index * 16}.0/20" map_public_ip_on_launch = true - availability_zone = "${element(split(\",\", var.azs), count.index)}" + availability_zone = "${element(split(",", var.azs), count.index)}" tags { - Name = "subnet ${count.index} ${element(split(\",\", var.azs), count.index)}" + Name = "subnet ${count.index} ${element(split(",", var.azs), count.index)}" } } @@ -67,7 +82,7 @@ resource "aws_route_table" "public" { } resource "aws_route_table_association" "front" { - count = "${length(split(\",\", var.azs))}" + count = "${length(split(",", var.azs))}" subnet_id = "${element(aws_subnet.front.*.id, count.index)}" route_table_id = "${aws_route_table.public.id}" } @@ -144,7 +159,7 @@ resource "aws_ebs_volume" "consul_server03_ebs" { resource "aws_instance" "consul_server01" { ami = "${var.consul_ami}" instance_type = "t2.micro" - iam_instance_profile = "default_instance_profile" + iam_instance_profile = "ec2-role" key_name = "${var.ssh_keypair}" subnet_id = "${aws_subnet.front.0.id}" vpc_security_group_ids = ["${aws_security_group.allow_all.id}"] @@ -157,7 +172,7 @@ resource "aws_instance" "consul_server01" { resource "aws_instance" "consul_server02" { ami = "${var.consul_ami}" instance_type = "t2.micro" - iam_instance_profile = "default_instance_profile" + iam_instance_profile = "ec2-role" key_name = "${var.ssh_keypair}" subnet_id = "${aws_subnet.front.1.id}" vpc_security_group_ids = ["${aws_security_group.allow_all.id}"] @@ -170,7 +185,7 @@ resource "aws_instance" "consul_server02" { resource "aws_instance" "consul_server03" { ami = "${var.consul_ami}" instance_type = "t2.micro" - iam_instance_profile = "default_instance_profile" + iam_instance_profile = "ec2-role" key_name = "${var.ssh_keypair}" subnet_id = "${aws_subnet.front.2.id}" vpc_security_group_ids = ["${aws_security_group.allow_all.id}"] @@ -187,13 +202,17 @@ output region { } output vpc_id { - value = "${aws_vpc.vpc.id}" + value = "${aws_vpc.vpc.id}" } output security_group_allow_all_id { - value = "${aws_security_group.allow_all.id}" + value = "${aws_security_group.allow_all.id}" } output subnets { - value = "${join(",", aws_subnet.front.*.id)}" + value = "${join(",", aws_subnet.front.*.id)}" } + +# output "instance_ids" { +# value = ["${aws_instance.consul_server03.primary.id}"] +# } diff --git a/infra/v1/run-docker.sh b/infra/v1/run-docker.sh index 2002142..cb215b1 100755 --- a/infra/v1/run-docker.sh +++ b/infra/v1/run-docker.sh @@ -1,3 +1,6 @@ #!/bin/bash -docker run -it -v /var/run/docker.sock:/var/run/docker.sock -v "$SSH_AUTH_SOCK:/tmp/ssh_auth_sock" -e "SSH_AUTH_SOCK=/tmp/ssh_auth_sock" -e "ORIG_SSH_AUTH_SOCK=$SSH_AUTH_SOCK" -e ENV=${ENV} -e S3_BUCKET=${S3_BUCKET} -e SSH_KEYPAIR=${SSH_KEYPAIR} -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY -e AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION infra:${VERSION} ${1} +eval "$(ssh-agent -s)" +ssh-add $HOME/.ssh/${SSH_KEYPAIR}.pem + +docker run -it --rm -v /var/run/docker.sock:/var/run/docker.sock -v $PWD/terraform:/deploy/.terraform -v "$SSH_AUTH_SOCK:/tmp/ssh_auth_sock" -e "SSH_AUTH_SOCK=/tmp/ssh_auth_sock" -e "ORIG_SSH_AUTH_SOCK=$SSH_AUTH_SOCK" -e ENV=${ENV} -e S3_BUCKET=${S3_BUCKET} -e SSH_KEYPAIR=${SSH_KEYPAIR} -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY -e AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION infra:${VERSION} ${1} diff --git a/infra/v1/scripts/create.sh b/infra/v1/scripts/create.sh index bda81dd..886dfd1 100755 --- a/infra/v1/scripts/create.sh +++ b/infra/v1/scripts/create.sh @@ -1,6 +1,5 @@ #!/bin/bash -set -x set -e __dir="$(readlink -f $(dirname ${0}))" @@ -22,30 +21,37 @@ export TF_VAR_access_key=${AWS_ACCESS_KEY_ID} export TF_VAR_secret_key=${AWS_SECRET_ACCESS_KEY} export TF_VAR_region=${AWS_DEFAULT_REGION} export TF_VAR_ssh_keypair=${SSH_KEYPAIR} +export TF_VAR_s3_bucket=${S3_BUCKET} TFSTATE_KEY="terraform/$ENV/base" +export TF_VAR_tfstate_key=${TFSTATE_KEY} + TFSTATE_FILE="${__root}/.terraform/terraform.tfstate" [ -z ${CONSUL_AMI_ID} ] && error "undefined CONSUL_AMI_ID" export TF_VAR_consul_ami=${CONSUL_AMI_ID} - pushd ${__root} # Remove local cached terraform.tfstate file. This is to avoid having a cached state file referencing another environment due to manual tests or wrong operations. rm -f ${TFSTATE_FILE} -terraform remote config -backend=s3 --backend-config="bucket=${S3_BUCKET}" --backend-config="key=$TFSTATE_KEY" --backend-config="region=${AWS_DEFAULT_REGION}" +# terraform remote config -backend=s3 --backend-config="bucket=${S3_BUCKET}" --backend-config="key=$TFSTATE_KEY" --backend-config="region=${AWS_DEFAULT_REGION}" +TF_LOG=trace TF_LOG_PATH=init.log terraform init -backend=true --backend-config="bucket=${S3_BUCKET}" --backend-config="key=${TFSTATE_KEY}" --backend-config="region=${AWS_DEFAULT_REGION}" + +TF_LOG=trace TF_LOG_PATH=plan.log terraform plan -input=false -var "env=$ENV" || error "terraform plan failed" -terraform plan -input=false -var "env=$ENV" || error "terraform plan failed" +TF_LOG=trace TF_LOG_PATH=apply.log terraform apply -input=false -auto-approve -var "env=$ENV" || error "terraform apply failed" -terraform apply -input=false -var "env=$ENV" || error "terraform apply failed" +# get states +# terraform output -json instance_ids +export TFSTATE=$(terraform state pull) -ALL_INSTANCE_IDS=$(tf_get_all_instance_ids ${TFSTATE_FILE}) +ALL_INSTANCE_IDS=$(tf_get_all_instance_ids ${TFSTATE}) aws ec2 wait instance-running --instance-ids ${ALL_INSTANCE_IDS} || error "some instances not active" # Wait all instances are reachable via ssh ansible-playbook -i ${__root}/scripts/terraform_to_ansible_inventory.sh ${__ansible}/wait_instance_up.yml # Wait for all the consul server being active. Check this using the first consul server. -consul01ip=$(tf_get_instance_public_ip ${TFSTATE_FILE} "consul_server01") +consul01ip=$(tf_get_instance_public_ip "consul_server01") ansible-playbook -i ${consul01ip}, ${__ansible}/test_consul_servers_active.yml diff --git a/infra/v1/scripts/destroy.sh b/infra/v1/scripts/destroy.sh index f015d9a..361c4fc 100755 --- a/infra/v1/scripts/destroy.sh +++ b/infra/v1/scripts/destroy.sh @@ -30,10 +30,15 @@ export TF_VAR_consul_ami="IDONTCARE" # Remove local cached terraform.tfstate file. This is to avoid having a cached state file referencing another environment due to manual tests or wrong operations. rm -f ${TFSTATE_FILE} -terraform remote config -backend=s3 --backend-config="bucket=${S3_BUCKET}" --backend-config="key=$TFSTATE_KEY" --backend-config="region=${AWS_DEFAULT_REGION}" +# terraform remote config -backend=s3 --backend-config="bucket=${S3_BUCKET}" --backend-config="key=$TFSTATE_KEY" --backend-config="region=${AWS_DEFAULT_REGION}" +terraform init -backend=true --backend-config="bucket=${S3_BUCKET}" --backend-config="key=${TFSTATE_KEY}" --backend-config="region=${AWS_DEFAULT_REGION}" + +# get states +# terraform output -json instance_ids +export TFSTATE=$(terraform state pull) # shutdown instance before doing terraform apply or it will fail to remove the aws_volume_attachment since it's mounted. See also https://github.com/hashicorp/terraform/issues/2957 -INSTANCE_IDS=$(tf_get_all_instance_ids ${TFSTATE_FILE}) +INSTANCE_IDS=$(tf_get_all_instance_ids ${TFSTATE}) if [ ${INSTANCE_IDS} != "[]" ]; then aws ec2 stop-instances --instance-ids ${INSTANCE_IDS} aws ec2 wait instance-stopped --instance-ids ${INSTANCE_IDS} || error "some instance are not stopped" diff --git a/infra/v1/scripts/run.sh b/infra/v1/scripts/run.sh index 74cf248..e8f4097 100755 --- a/infra/v1/scripts/run.sh +++ b/infra/v1/scripts/run.sh @@ -25,7 +25,10 @@ case ${1} in "test-upgrade") ${__root}/scripts/test_upgrade.sh ;; + "debug") + /bin/bash + ;; *) - error "Usage: ${0} {create|upgrade|destroy|test-create|test-upgrade}" + error "Usage: ${0} {create|upgrade|destroy|test-create|test-upgrade|debug}" esac diff --git a/infra/v1/scripts/terraform_to_ansible_inventory.sh b/infra/v1/scripts/terraform_to_ansible_inventory.sh index 37b33ce..2189298 100755 --- a/infra/v1/scripts/terraform_to_ansible_inventory.sh +++ b/infra/v1/scripts/terraform_to_ansible_inventory.sh @@ -9,4 +9,4 @@ __root="$(readlink -f ${__dir}/../)" TFSTATE_FILE="${__root}/.terraform/terraform.tfstate" -cat ${TFSTATE_FILE} | jq -c -e -r -M '{ all: .modules[0].resources | to_entries | map(select(.key | test("aws_instance\\..*"))) | map(.value.primary.attributes.public_ip) }' +echo ${TFSTATE} | jq -c -e -r -M '{ all: .modules[0].resources | to_entries | map(select(.key | test("aws_instance\\..*"))) | map(.value.primary.attributes.public_ip) }' diff --git a/infra/v1/scripts/test_create.sh b/infra/v1/scripts/test_create.sh index 6e4dbcb..4a2d477 100755 --- a/infra/v1/scripts/test_create.sh +++ b/infra/v1/scripts/test_create.sh @@ -10,6 +10,7 @@ __ansible="${__root}/ansible" source ${__dir}/utils.sh function cleanup() { + # echo "w00ps / exiting" ${__dir}/destroy.sh delete_s3_object ${S3_BUCKET} ${TFSTATE_KEY} } diff --git a/infra/v1/scripts/utils.sh b/infra/v1/scripts/utils.sh index b9997a5..bd53c88 100644 --- a/infra/v1/scripts/utils.sh +++ b/infra/v1/scripts/utils.sh @@ -25,13 +25,13 @@ function tf_get_instance_id() { } function tf_get_instance_public_ip() { - local tfstatefile=${1} - local instance=${2} + # local tfstatefile=${1} + local instance=${1} local ip - ip=$(cat ${tfstatefile} | jq -e -r -M '.modules[0].resources."aws_instance.'"${instance}"'".primary.attributes.public_ip') + ip=$(echo ${TFSTATE} | jq -e -r -M '.modules[0].resources."aws_instance.'"${instance}"'".primary.attributes.public_ip') if [ $? -ne 0 ]; then # if someone has tainted the resource try with tainted instead of primary - ip=$(cat ${tfstatefile} | jq -e -r -M '.modules[0].resources."aws_instance.'"${instance}"'".tainted[0].attributes.public_ip') + ip=$(echo ${TFSTATE} | jq -e -r -M '.modules[0].resources."aws_instance.'"${instance}"'".tainted[0].attributes.public_ip') if [ $? -ne 0 ]; then echo "" return @@ -43,7 +43,7 @@ function tf_get_instance_public_ip() { function tf_get_all_instance_ids() { local tfstatefile=${1} local ids - ids=$(cat ${tfstatefile} | jq -c -e -r -M '.modules[0].resources | to_entries | map(select(.key | test("aws_instance\\..*"))) | map(.value.primary.id)') + ids=$(echo ${TFSTATE} | jq -c -e -r -M '.modules[0].resources | to_entries | map(select(.key | test("aws_instance\\..*"))) | map(.value.primary.id)') if [ $? -ne 0 ]; then echo "" return @@ -54,7 +54,7 @@ function tf_get_all_instance_ids() { function tf_get_all_instance_public_ips() { local tfstatefile=${1} local ids - ids=$(cat ${tfstatefile} | jq -c -e -r -M '.modules[0].resources | to_entries | map(select(.key | test("aws_instance\\..*"))) | map(.value.primary.attributes.public_ip)') + ids=$(echo ${tfstatefile} | jq -c -e -r -M '.modules[0].resources | to_entries | map(select(.key | test("aws_instance\\..*"))) | map(.value.primary.attributes.public_ip)') if [ $? -ne 0 ]; then echo "" return diff --git a/infra/v2/Dockerfile b/infra/v2/Dockerfile index 0cbe51d..a8d64db 100644 --- a/infra/v2/Dockerfile +++ b/infra/v2/Dockerfile @@ -1,15 +1,19 @@ -FROM fedora:23 +FROM debian:stretch -RUN dnf -y install wget curl git unzip jq python python-pip python-boto3 awscli ansible docker +RUN apt-get update && apt-get install -y wget curl git unzip jq python python-pip python-boto3 awscli ansible docker uuid-runtime \ + vim iproute2 net-tools nano -RUN curl https://releases.hashicorp.com/terraform/0.6.12/terraform_0.6.12_linux_amd64.zip -o /tmp/terraform_0.6.12_linux_amd64.zip +RUN curl https://releases.hashicorp.com/terraform/0.11.3/terraform_0.11.3_linux_amd64.zip -o /tmp/terraform_0.11.3_linux_amd64.zip WORKDIR /usr/local/bin/ -RUN unzip /tmp/terraform_0.6.12_linux_amd64.zip +RUN unzip /tmp/terraform_0.11.3_linux_amd64.zip RUN mkdir -p /root/.ssh RUN chmod 700 /root/.ssh -RUN echo -e "Host *\n\tStrictHostKeyChecking no\n" >> /root/.ssh/config +RUN /bin/echo -e "Host *\n\tStrictHostKeyChecking no\n" >> /root/.ssh/config + +RUN curl -fsSL get.docker.com -o get-docker.sh && sh get-docker.sh +# RUN usermod -aG docker admin WORKDIR /deploy ADD main.tf config ./ diff --git a/infra/v2/ansible/test_consul_servers_active.yml b/infra/v2/ansible/test_consul_servers_active.yml index 79f7299..c57390f 100644 --- a/infra/v2/ansible/test_consul_servers_active.yml +++ b/infra/v2/ansible/test_consul_servers_active.yml @@ -1,6 +1,6 @@ - hosts: all - remote_user: fedora + remote_user: admin become_user: root gather_facts: false tasks: diff --git a/infra/v2/ansible/wait_instance_stopped.yml b/infra/v2/ansible/wait_instance_stopped.yml index 7f873c1..dbffc05 100644 --- a/infra/v2/ansible/wait_instance_stopped.yml +++ b/infra/v2/ansible/wait_instance_stopped.yml @@ -1,6 +1,6 @@ - hosts: all - remote_user: fedora + remote_user: admin become_user: root gather_facts: false tasks: diff --git a/infra/v2/ansible/wait_instance_up.yml b/infra/v2/ansible/wait_instance_up.yml index d2ff73b..5af6c1a 100644 --- a/infra/v2/ansible/wait_instance_up.yml +++ b/infra/v2/ansible/wait_instance_up.yml @@ -1,6 +1,6 @@ - hosts: all - remote_user: fedora + remote_user: admin become_user: root gather_facts: false tasks: diff --git a/infra/v2/config b/infra/v2/config index d61aa05..1127ddd 100644 --- a/infra/v2/config +++ b/infra/v2/config @@ -1,3 +1,4 @@ -CONSUL_AMI_ID= # Put here the ami id reported at the end of the build of `images/consul/v2` +# Put here the ami id reported at the end of the build of `images/consul/v2` +CONSUL_AMI_ID=ami-779b6e0a PREV_VERSION="v1" diff --git a/infra/v2/main.tf b/infra/v2/main.tf index 36e8315..f813fd7 100644 --- a/infra/v2/main.tf +++ b/infra/v2/main.tf @@ -1,3 +1,17 @@ +terraform { + required_version = ">= 0.10.8" + + variable "s3_bucket" {} + variable "tfstate_key" {} + + # backend fix + backend "s3" { + # bucket = "${var.s3_bucket}" + # key = "${var.tfstate_key}" + # region = "${var.region}" +} +} + variable "access_key" {} variable "secret_key" {} @@ -14,11 +28,14 @@ variable "azs" { } provider "aws" { + version = "~> 1.9" access_key = "${var.access_key}" secret_key = "${var.secret_key}" region = "${var.region}" } + + # The next resources define a new vpc with a public subnet for every # availabilty zone and a default security group completely open. This isn't # meant to be used in a production environment, it's just to make the vpc @@ -43,14 +60,14 @@ resource "aws_internet_gateway" "igw" { # Create a subnet for every availability zone resource "aws_subnet" "front" { - count = "${length(split(\",\", var.azs))}" + count = "${length(split(",", var.azs))}" vpc_id = "${aws_vpc.vpc.id}" cidr_block = "10.0.${count.index * 16}.0/20" map_public_ip_on_launch = true - availability_zone = "${element(split(\",\", var.azs), count.index)}" + availability_zone = "${element(split(",", var.azs), count.index)}" tags { - Name = "subnet ${count.index} ${element(split(\",\", var.azs), count.index)}" + Name = "subnet ${count.index} ${element(split(",", var.azs), count.index)}" } } @@ -67,7 +84,7 @@ resource "aws_route_table" "public" { } resource "aws_route_table_association" "front" { - count = "${length(split(\",\", var.azs))}" + count = "${length(split(",", var.azs))}" subnet_id = "${element(aws_subnet.front.*.id, count.index)}" route_table_id = "${aws_route_table.public.id}" } @@ -144,7 +161,7 @@ resource "aws_ebs_volume" "consul_server03_ebs" { resource "aws_instance" "consul_server01" { ami = "${var.consul_ami}" instance_type = "t2.micro" - iam_instance_profile = "default_instance_profile" + iam_instance_profile = "ec2-role" key_name = "${var.ssh_keypair}" subnet_id = "${aws_subnet.front.0.id}" vpc_security_group_ids = ["${aws_security_group.allow_all.id}"] @@ -157,7 +174,7 @@ resource "aws_instance" "consul_server01" { resource "aws_instance" "consul_server02" { ami = "${var.consul_ami}" instance_type = "t2.micro" - iam_instance_profile = "default_instance_profile" + iam_instance_profile = "ec2-role" key_name = "${var.ssh_keypair}" subnet_id = "${aws_subnet.front.1.id}" vpc_security_group_ids = ["${aws_security_group.allow_all.id}"] @@ -170,7 +187,7 @@ resource "aws_instance" "consul_server02" { resource "aws_instance" "consul_server03" { ami = "${var.consul_ami}" instance_type = "t2.micro" - iam_instance_profile = "default_instance_profile" + iam_instance_profile = "ec2-role" key_name = "${var.ssh_keypair}" subnet_id = "${aws_subnet.front.2.id}" vpc_security_group_ids = ["${aws_security_group.allow_all.id}"] @@ -187,13 +204,17 @@ output region { } output vpc_id { - value = "${aws_vpc.vpc.id}" + value = "${aws_vpc.vpc.id}" } output security_group_allow_all_id { - value = "${aws_security_group.allow_all.id}" + value = "${aws_security_group.allow_all.id}" } output subnets { - value = "${join(",", aws_subnet.front.*.id)}" + value = "${join(",", aws_subnet.front.*.id)}" } + +# output "instance_ids" { +# value = ["${aws_instance.consul_server03.primary.id}"] +# } diff --git a/infra/v2/run-docker.sh b/infra/v2/run-docker.sh index 2002142..cb215b1 100755 --- a/infra/v2/run-docker.sh +++ b/infra/v2/run-docker.sh @@ -1,3 +1,6 @@ #!/bin/bash -docker run -it -v /var/run/docker.sock:/var/run/docker.sock -v "$SSH_AUTH_SOCK:/tmp/ssh_auth_sock" -e "SSH_AUTH_SOCK=/tmp/ssh_auth_sock" -e "ORIG_SSH_AUTH_SOCK=$SSH_AUTH_SOCK" -e ENV=${ENV} -e S3_BUCKET=${S3_BUCKET} -e SSH_KEYPAIR=${SSH_KEYPAIR} -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY -e AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION infra:${VERSION} ${1} +eval "$(ssh-agent -s)" +ssh-add $HOME/.ssh/${SSH_KEYPAIR}.pem + +docker run -it --rm -v /var/run/docker.sock:/var/run/docker.sock -v $PWD/terraform:/deploy/.terraform -v "$SSH_AUTH_SOCK:/tmp/ssh_auth_sock" -e "SSH_AUTH_SOCK=/tmp/ssh_auth_sock" -e "ORIG_SSH_AUTH_SOCK=$SSH_AUTH_SOCK" -e ENV=${ENV} -e S3_BUCKET=${S3_BUCKET} -e SSH_KEYPAIR=${SSH_KEYPAIR} -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY -e AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION infra:${VERSION} ${1} diff --git a/infra/v2/scripts/create.sh b/infra/v2/scripts/create.sh index bda81dd..656762c 100755 --- a/infra/v2/scripts/create.sh +++ b/infra/v2/scripts/create.sh @@ -22,30 +22,37 @@ export TF_VAR_access_key=${AWS_ACCESS_KEY_ID} export TF_VAR_secret_key=${AWS_SECRET_ACCESS_KEY} export TF_VAR_region=${AWS_DEFAULT_REGION} export TF_VAR_ssh_keypair=${SSH_KEYPAIR} +export TF_VAR_s3_bucket=${S3_BUCKET} TFSTATE_KEY="terraform/$ENV/base" +export TF_VAR_tfstate_key=${TFSTATE_KEY} + TFSTATE_FILE="${__root}/.terraform/terraform.tfstate" [ -z ${CONSUL_AMI_ID} ] && error "undefined CONSUL_AMI_ID" export TF_VAR_consul_ami=${CONSUL_AMI_ID} - pushd ${__root} # Remove local cached terraform.tfstate file. This is to avoid having a cached state file referencing another environment due to manual tests or wrong operations. rm -f ${TFSTATE_FILE} -terraform remote config -backend=s3 --backend-config="bucket=${S3_BUCKET}" --backend-config="key=$TFSTATE_KEY" --backend-config="region=${AWS_DEFAULT_REGION}" +# terraform remote config -backend=s3 --backend-config="bucket=${S3_BUCKET}" --backend-config="key=$TFSTATE_KEY" --backend-config="region=${AWS_DEFAULT_REGION}" +TF_LOG=trace TF_LOG_PATH=init.log terraform init -backend=true --backend-config="bucket=${S3_BUCKET}" --backend-config="key=${TFSTATE_KEY}" --backend-config="region=${AWS_DEFAULT_REGION}" + +TF_LOG=trace TF_LOG_PATH=plan.log terraform plan -input=false -var "env=$ENV" || error "terraform plan failed" -terraform plan -input=false -var "env=$ENV" || error "terraform plan failed" +TF_LOG=trace TF_LOG_PATH=apply.log terraform apply -input=false -auto-approve -var "env=$ENV" || error "terraform apply failed" -terraform apply -input=false -var "env=$ENV" || error "terraform apply failed" +# get states +# terraform output -json instance_ids +export TFSTATE=$(terraform state pull) -ALL_INSTANCE_IDS=$(tf_get_all_instance_ids ${TFSTATE_FILE}) +ALL_INSTANCE_IDS=$(tf_get_all_instance_ids ${TFSTATE}) aws ec2 wait instance-running --instance-ids ${ALL_INSTANCE_IDS} || error "some instances not active" # Wait all instances are reachable via ssh ansible-playbook -i ${__root}/scripts/terraform_to_ansible_inventory.sh ${__ansible}/wait_instance_up.yml # Wait for all the consul server being active. Check this using the first consul server. -consul01ip=$(tf_get_instance_public_ip ${TFSTATE_FILE} "consul_server01") +consul01ip=$(tf_get_instance_public_ip "consul_server01") ansible-playbook -i ${consul01ip}, ${__ansible}/test_consul_servers_active.yml diff --git a/infra/v2/scripts/destroy.sh b/infra/v2/scripts/destroy.sh index f015d9a..9f77545 100755 --- a/infra/v2/scripts/destroy.sh +++ b/infra/v2/scripts/destroy.sh @@ -1,6 +1,5 @@ #!/bin/bash -set -x set -e __dir="$(readlink -f $(dirname ${0}))" @@ -30,10 +29,15 @@ export TF_VAR_consul_ami="IDONTCARE" # Remove local cached terraform.tfstate file. This is to avoid having a cached state file referencing another environment due to manual tests or wrong operations. rm -f ${TFSTATE_FILE} -terraform remote config -backend=s3 --backend-config="bucket=${S3_BUCKET}" --backend-config="key=$TFSTATE_KEY" --backend-config="region=${AWS_DEFAULT_REGION}" +# terraform remote config -backend=s3 --backend-config="bucket=${S3_BUCKET}" --backend-config="key=$TFSTATE_KEY" --backend-config="region=${AWS_DEFAULT_REGION}" +terraform init -backend=true --backend-config="bucket=${S3_BUCKET}" --backend-config="key=${TFSTATE_KEY}" --backend-config="region=${AWS_DEFAULT_REGION}" + +# get states +# terraform output -json instance_ids +export TFSTATE=$(terraform state pull) # shutdown instance before doing terraform apply or it will fail to remove the aws_volume_attachment since it's mounted. See also https://github.com/hashicorp/terraform/issues/2957 -INSTANCE_IDS=$(tf_get_all_instance_ids ${TFSTATE_FILE}) +INSTANCE_IDS=$(tf_get_all_instance_ids ${TFSTATE}) if [ ${INSTANCE_IDS} != "[]" ]; then aws ec2 stop-instances --instance-ids ${INSTANCE_IDS} aws ec2 wait instance-stopped --instance-ids ${INSTANCE_IDS} || error "some instance are not stopped" diff --git a/infra/v2/scripts/run.sh b/infra/v2/scripts/run.sh index 74cf248..e8f4097 100755 --- a/infra/v2/scripts/run.sh +++ b/infra/v2/scripts/run.sh @@ -25,7 +25,10 @@ case ${1} in "test-upgrade") ${__root}/scripts/test_upgrade.sh ;; + "debug") + /bin/bash + ;; *) - error "Usage: ${0} {create|upgrade|destroy|test-create|test-upgrade}" + error "Usage: ${0} {create|upgrade|destroy|test-create|test-upgrade|debug}" esac diff --git a/infra/v2/scripts/terraform_to_ansible_inventory.sh b/infra/v2/scripts/terraform_to_ansible_inventory.sh index 37b33ce..2189298 100755 --- a/infra/v2/scripts/terraform_to_ansible_inventory.sh +++ b/infra/v2/scripts/terraform_to_ansible_inventory.sh @@ -9,4 +9,4 @@ __root="$(readlink -f ${__dir}/../)" TFSTATE_FILE="${__root}/.terraform/terraform.tfstate" -cat ${TFSTATE_FILE} | jq -c -e -r -M '{ all: .modules[0].resources | to_entries | map(select(.key | test("aws_instance\\..*"))) | map(.value.primary.attributes.public_ip) }' +echo ${TFSTATE} | jq -c -e -r -M '{ all: .modules[0].resources | to_entries | map(select(.key | test("aws_instance\\..*"))) | map(.value.primary.attributes.public_ip) }' diff --git a/infra/v2/scripts/test_create.sh b/infra/v2/scripts/test_create.sh index 6e4dbcb..7c1ee1d 100755 --- a/infra/v2/scripts/test_create.sh +++ b/infra/v2/scripts/test_create.sh @@ -1,6 +1,5 @@ #!/bin/bash -set -x set -e __dir="$(readlink -f $(dirname ${0}))" @@ -10,6 +9,7 @@ __ansible="${__root}/ansible" source ${__dir}/utils.sh function cleanup() { + echo "w00ps / exiting" ${__dir}/destroy.sh delete_s3_object ${S3_BUCKET} ${TFSTATE_KEY} } diff --git a/infra/v2/scripts/test_upgrade.sh b/infra/v2/scripts/test_upgrade.sh index 3513a8b..91cbbc6 100755 --- a/infra/v2/scripts/test_upgrade.sh +++ b/infra/v2/scripts/test_upgrade.sh @@ -1,6 +1,5 @@ #!/bin/bash -set -x set -e __dir="$(readlink -f $(dirname ${0}))" @@ -12,6 +11,7 @@ source ${__root}/config source ${__dir}/utils.sh function cleanup() { + echo "w00ps / exiting" ${__dir}/destroy.sh delete_s3_object ${S3_BUCKET} ${TFSTATE_KEY} } diff --git a/infra/v2/scripts/upgrade.sh b/infra/v2/scripts/upgrade.sh index e86313b..b738fbe 100755 --- a/infra/v2/scripts/upgrade.sh +++ b/infra/v2/scripts/upgrade.sh @@ -1,6 +1,5 @@ #!/bin/bash -set -x set -e __dir="$(readlink -f $(dirname ${0}))" @@ -34,23 +33,27 @@ pushd ${__root} # Remove local cached terraform.tfstate file. This is to avoid having a cached state file referencing another environment due to manual tests or wrong operations. rm -f ${TFSTATE_FILE} -terraform remote config -backend=s3 --backend-config="bucket=${S3_BUCKET}" --backend-config="key=$TFSTATE_KEY" --backend-config="region=${AWS_DEFAULT_REGION}" +TF_LOG=trace TF_LOG_PATH=init.log terraform init -backend=true --backend-config="bucket=${S3_BUCKET}" --backend-config="key=${TFSTATE_KEY}" --backend-config="region=${AWS_DEFAULT_REGION}" + +# get states +# terraform output -json instance_ids +export TFSTATE=$(terraform state pull) # Consul server upgrade # Test all consul servers are active. Check this using the first consul server. -consul01ip=$(tf_get_instance_public_ip ${TFSTATE_FILE} "consul_server01") +consul01ip=$(tf_get_instance_public_ip "consul_server01") ansible-playbook -i ${consul01ip}, ${__ansible}/test_consul_servers_active.yml ## Rolling upgrade of consul server for id in 01 02 03; do - INSTANCE_ID=$(tf_get_instance_id ${TFSTATE_FILE} "consul_server${id}") + INSTANCE_ID=$(tf_get_instance_id "consul_server${id}") if [ -z ${INSTANCE_ID} ]; then error "empty instance id" fi # check for changes set +e - terraform plan -detailed-exitcode -input=false -var "env=$ENV" -target aws_instance.consul_server${id} -target aws_volume_attachment.consul_server${id}_ebs_attachment + TF_LOG=trace TF_LOG_PATH=plan.log terraform plan -detailed-exitcode -input=false -var "env=$ENV" -target aws_instance.consul_server${id} -target aws_volume_attachment.consul_server${id}_ebs_attachment if [ $? -eq 0 ]; then echo "no changes for instance ${instance}" continue @@ -62,16 +65,19 @@ for id in 01 02 03; do aws ec2 wait instance-stopped --instance-ids ${INSTANCE_ID} || error "instance ${INSTANCE_ID} is not stopped" # recreate instance - terraform apply -input=false -var "env=$ENV" -target aws_instance.consul_server${id} -target aws_volume_attachment.consul_server${id}_ebs_attachment + TF_LOG=trace TF_LOG_PATH=apply.log terraform apply -input=false -auto-approve -var "env=$ENV" -target aws_instance.consul_server${id} -target aws_volume_attachment.consul_server${id}_ebs_attachment + + # refresh TFSTATE + export TFSTATE=$(terraform state pull) # Get the new instance id - INSTANCE_ID=$(tf_get_instance_id ${TFSTATE_FILE} consul_server${id}) + INSTANCE_ID=$(tf_get_instance_id consul_server${id}) if [ -z ${INSTANCE_ID} ]; then error "empty instance id" fi aws ec2 wait instance-running --instance-ids ${INSTANCE_ID} || error "instance ${INSTANCE_ID} not running" - INSTANCE_PUBLIC_IP=$(tf_get_instance_public_ip ${TFSTATE_FILE} consul_server${id}) + INSTANCE_PUBLIC_IP=$(tf_get_instance_public_ip consul_server${id}) # Wait for the consul server instance being reachable via ssh ansible-playbook -i ${INSTANCE_PUBLIC_IP}, ${__ansible}/wait_instance_up.yml @@ -85,7 +91,7 @@ done # If there're some changes left behind, then we forgot to do something. echo "Checking that no changes were left behind" set +e -terraform plan -detailed-exitcode -input=false -var "env=$ENV" +TF_LOG=trace TF_LOG_PATH=plan.log terraform plan -detailed-exitcode -input=false -var "env=$ENV" ret=$? if [ ${ret} -eq 1 ]; then error "terraform plan error!" diff --git a/infra/v2/scripts/utils.sh b/infra/v2/scripts/utils.sh index b9997a5..cbe3bee 100644 --- a/infra/v2/scripts/utils.sh +++ b/infra/v2/scripts/utils.sh @@ -1,21 +1,29 @@ #!/bin/bash -set -x set -e +shopt -s expand_aliases +_xtrace() { + case $1 in + on) set -x ;; + off) set +x ;; + esac +} +alias xtrace='{ _xtrace $(cat); } 2>/dev/null <<<' + function error() { echo $1 exit 1 } function tf_get_instance_id() { - local tfstatefile=${1} - local instance=${2} + # local tfstatefile=${1} + local instance=${1} local id - id=$(cat ${tfstatefile} | jq -e -r -M '.modules[0].resources."aws_instance.'"${instance}"'".primary.id') + id=$(echo ${TFSTATE} | jq -e -r -M '.modules[0].resources."aws_instance.'"${instance}"'".primary.id') if [ $? -ne 0 ]; then # if someone has tainted the resource try with tainted instead of primary - id=$(cat ${tfstatefile} | jq -e -r -M '.modules[0].resources."aws_instance.'"${instance}"'".tainted[0].id') + id=$(echo ${TFSTATE} | jq -e -r -M '.modules[0].resources."aws_instance.'"${instance}"'".tainted[0].id') if [ $? -ne 0 ]; then echo "" return @@ -25,13 +33,13 @@ function tf_get_instance_id() { } function tf_get_instance_public_ip() { - local tfstatefile=${1} - local instance=${2} + # local tfstatefile=${1} + local instance=${1} local ip - ip=$(cat ${tfstatefile} | jq -e -r -M '.modules[0].resources."aws_instance.'"${instance}"'".primary.attributes.public_ip') + ip=$(echo ${TFSTATE} | jq -e -r -M '.modules[0].resources."aws_instance.'"${instance}"'".primary.attributes.public_ip') if [ $? -ne 0 ]; then # if someone has tainted the resource try with tainted instead of primary - ip=$(cat ${tfstatefile} | jq -e -r -M '.modules[0].resources."aws_instance.'"${instance}"'".tainted[0].attributes.public_ip') + ip=$(echo ${TFSTATE} | jq -e -r -M '.modules[0].resources."aws_instance.'"${instance}"'".tainted[0].attributes.public_ip') if [ $? -ne 0 ]; then echo "" return @@ -43,7 +51,7 @@ function tf_get_instance_public_ip() { function tf_get_all_instance_ids() { local tfstatefile=${1} local ids - ids=$(cat ${tfstatefile} | jq -c -e -r -M '.modules[0].resources | to_entries | map(select(.key | test("aws_instance\\..*"))) | map(.value.primary.id)') + ids=$(echo ${TFSTATE} | jq -c -e -r -M '.modules[0].resources | to_entries | map(select(.key | test("aws_instance\\..*"))) | map(.value.primary.id)') if [ $? -ne 0 ]; then echo "" return @@ -54,7 +62,7 @@ function tf_get_all_instance_ids() { function tf_get_all_instance_public_ips() { local tfstatefile=${1} local ids - ids=$(cat ${tfstatefile} | jq -c -e -r -M '.modules[0].resources | to_entries | map(select(.key | test("aws_instance\\..*"))) | map(.value.primary.attributes.public_ip)') + ids=$(echo ${tfstatefile} | jq -c -e -r -M '.modules[0].resources | to_entries | map(select(.key | test("aws_instance\\..*"))) | map(.value.primary.attributes.public_ip)') if [ $? -ne 0 ]; then echo "" return diff --git a/setup/ENV_VARS b/setup/ENV_VARS new file mode 100644 index 0000000..667e961 --- /dev/null +++ b/setup/ENV_VARS @@ -0,0 +1,5 @@ +export AWS_ACCESS_KEY_ID= # The aws access key +export AWS_SECRET_ACCESS_KEY= # The aws secret key +export AWS_DEFAULT_REGION=us-east-1 +export S3_BUCKET= # The s3 bucket for saving terraform state files +export SSH_KEYPAIR= diff --git a/setup/iam_roles.tf b/setup/iam_roles.tf index 45a4ad2..e4334ae 100644 --- a/setup/iam_roles.tf +++ b/setup/iam_roles.tf @@ -13,28 +13,35 @@ provider "aws" { # Define an iam instance profile needed for executing the aws cli inside the # instances without expliciting providing and access and a secret key -resource "aws_iam_role" "instance_aws_access_role" { - name = "instance_aws_access" - path = "/" +resource "aws_iam_role" "ec2-role" { + name = "ec2-role" + # path = "/" assume_role_policy = < ${ENV_FILE} && EDIT THE RIGHT VALUES" + exit 1 +fi +# source globals & export to ENV +set -a && source ${ENV_FILE} && set +a + +export TF_VAR_access_key=${AWS_ACCESS_KEY_ID} +export TF_VAR_secret_key=${AWS_SECRET_ACCESS_KEY} +export TF_VAR_region=${AWS_DEFAULT_REGION} + +terraform apply