#!/usr/bin/env bash
#
# Customer-side image deploy companion to apply.sh. Reads the locked image URI
# from terraform outputs, pulls the matching tag from AWS ECR Public, retags
# for the customer's private ECR, pushes, and triggers an ECS rolling deploy.
#
# Usage:
#   cd infrastructure/terraform/environments/<your-env>
#   ../../deploy-image.sh
#
# Or from anywhere with a path:
#   /path/to/infrastructure/terraform/deploy-image.sh /path/to/your/env-dir
#
# Environment overrides:
#   NQUIRY_PUBLIC_IMAGE_URI  — full pull source, e.g. public.ecr.aws/l2g7u7c8/invapp-dev-app
#                              (default: public.ecr.aws/l2g7u7c8/invapp-dev-app — JE Vectors registry).
#                              Marketplace customers: substitute the Marketplace ECR URI
#                              provided in your product entitlement.
#   NQUIRY_VERSION_OVERRIDE  — deploy this tag instead of terraform's module_version output.
#                              Use case: image-only rollback (terraform pin still points at the
#                              broken release, but you want to deploy a prior version's image
#                              without changing the pin). See docs/licensee/ops/rollback-runbook.md
#                              §Image-only rollback.
#   AWS_REGION               — region for the customer ECR (default: read from terraform output).
#
# Idempotent. Safe to re-run for upgrades or to retry a failed deploy.

set -euo pipefail

bold=$'\033[1m'
red=$'\033[31m'
dim=$'\033[2m'
reset=$'\033[0m'

target_dir="${1:-$PWD}"
public_image_uri="${NQUIRY_PUBLIC_IMAGE_URI:-public.ecr.aws/l2g7u7c8/invapp-dev-app}"

abort() {
  echo
  echo "${red}Aborted.${reset}"
  exit 130
}
trap abort INT

require_cmd() {
  if ! command -v "$1" >/dev/null 2>&1; then
    echo "${red}$1 not found on PATH.${reset} $2" >&2
    exit 1
  fi
}

require_cmd terraform "Install Terraform >= 1.0 and retry."
require_cmd docker    "Install Docker (or any OCI-compatible CLI alias) and retry."
require_cmd aws       "Install the AWS CLI v2 and retry."
require_cmd jq        "Install jq and retry."

if [[ ! -d "$target_dir" ]]; then
  echo "${red}Not a directory:${reset} $target_dir" >&2
  exit 1
fi

if [[ ! -f "$target_dir/main.tf" ]]; then
  echo "${red}No main.tf in $target_dir.${reset} Run this from a Terraform environment directory (or pass one as an argument)." >&2
  exit 1
fi

cd "$target_dir"

# Pull required values from terraform outputs. apply.sh must have run first.
echo "${bold}==> Reading terraform outputs${reset} ${dim}($PWD)${reset}"
if ! terraform output -raw module_version >/dev/null 2>&1; then
  echo "${red}terraform output 'module_version' not available.${reset} Run apply.sh first." >&2
  exit 1
fi

tf_version=$(terraform output -raw module_version)
version="${NQUIRY_VERSION_OVERRIDE:-$tf_version}"
locked_uri_base=$(terraform output -json onboarding_summary | jq -r .version.locked_image_uri | sed 's/:[^:]*$//')
locked_uri="${locked_uri_base}:${version}"
cluster=$(terraform output -raw ecs_cluster_name)
service=$(terraform output -raw ecs_service_name)
region="${AWS_REGION:-$(terraform output -raw aws_region 2>/dev/null || echo us-east-1)}"

if [[ -z "$version" || "$version" == "null" ]]; then
  echo "${red}module_version is empty.${reset} The terraform state may be stale; re-run apply.sh." >&2
  exit 1
fi

if [[ "$version" != "$tf_version" ]]; then
  echo "${bold}NOTE:${reset} Deploying ${dim}$version${reset} (override) instead of terraform pin ${dim}$tf_version${reset}."
  echo "This is image-only rollback mode — the terraform pin still points at $tf_version."
fi

echo "  Version:            $version"
echo "  Pull source:        $public_image_uri:$version"
echo "  Push destination:   $locked_uri"
echo "  ECS cluster:        $cluster"
echo "  ECS service:        $service"
echo "  Region:             $region"
echo

# Pull from the public source. Anonymous pull — no auth required.
echo "${bold}==> Pulling image${reset}"
docker pull "$public_image_uri:$version"

# Retag for the customer's private ECR.
echo "${bold}==> Retagging${reset}"
docker tag "$public_image_uri:$version" "$locked_uri"

# Authenticate to the customer's private ECR.
echo "${bold}==> Authenticating to private ECR${reset}"
aws ecr get-login-password --region "$region" \
  | docker login --username AWS --password-stdin "${locked_uri%%/*}"

# Push to private ECR. Docker manifests are content-addressable so re-pushing
# the same tag with the same digest is a no-op at the registry.
echo "${bold}==> Pushing to private ECR${reset}"
docker push "$locked_uri"

# Force a new ECS deployment. The task definition has lifecycle.ignore_changes
# on container_definitions, so terraform won't update the image — the deployment
# controller pulls the current image at the tag we just pushed.
echo "${bold}==> Triggering ECS rolling deployment${reset}"
aws ecs update-service \
  --cluster "$cluster" \
  --service "$service" \
  --region "$region" \
  --force-new-deployment \
  --query 'service.deployments[0].{status:status, desired:desiredCount, taskDef:taskDefinition}' \
  --output table

echo
echo "${bold}==> Waiting for service to stabilize${reset} ${dim}(typically 3-5 min, timeout 15 min)${reset}"
if aws ecs wait services-stable \
     --cluster "$cluster" \
     --services "$service" \
     --region "$region"; then
  echo
  echo "${bold}Deploy complete.${reset} Image $version is running."
  echo
  echo "Health check: ${dim}curl https://$(terraform output -raw app_url 2>/dev/null | sed 's|https*://||')/api/health${reset}"
else
  echo
  echo "${red}Service did not stabilize within the timeout.${reset}" >&2
  echo "Investigate via:" >&2
  echo "  aws ecs describe-services --cluster $cluster --services $service --region $region --query 'services[0].events[0:5]'" >&2
  exit 1
fi
