#!/usr/bin/env bash
set -euo pipefail

ROOT="$(cd "$(dirname "$0")/.." && pwd)"
ENV_FILE="$ROOT/.env"
RUN_DIR="$ROOT/run"
PID_FILE="$RUN_DIR/codex-app-server.pid"
LOG_FILE="$RUN_DIR/codex-app-server.log"
STDIN_FIFO="$RUN_DIR/codex-app-server.stdin"
UPGRADE_LOG_FILE="$RUN_DIR/codex-app-server-upgrade.log"
CODEX_RELEASE_REPO="${CODEX_RELEASE_REPO:-openai/codex}"
INSTALL_PREFIX=()

read_env_value() {
  local key="$1"
  if [[ -f "$ENV_FILE" ]]; then
    awk -F= -v key="$key" '$1 == key { sub(/^[^=]*=/, ""); print; exit }' "$ENV_FILE"
  fi
}

HOST_CODEX_SOCKET="${HOST_CODEX_SOCKET:-$(read_env_value HOST_CODEX_SOCKET)}"
HOST_CODEX_SOCKET="${HOST_CODEX_SOCKET:-$RUN_DIR/codex.sock}"

mkdir -p "$RUN_DIR"
chmod 700 "$RUN_DIR"

usage() {
  cat <<USAGE
Usage: $0 <start|stop|status|check-updates> [options]

Commands:
  start                Start codex app-server if it is not already running.
  stop                 Stop codex app-server and remove stale runtime files.
  status               Print whether codex app-server is running.
  check-updates [-y]   Check GitHub releases and optionally install the latest Codex binary.

Environment:
  CODEX_BIN            Codex executable to replace. Defaults to the codex found on PATH.
  CODEX_RELEASE_REPO   GitHub repo for releases. Defaults to openai/codex.
USAGE
}

pid_from_file() {
  tr -cd '0-9' < "$PID_FILE" 2>/dev/null || true
}

server_pid() {
  local pid
  pid="$(pid_from_file)"
  if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then
    printf '%s\n' "$pid"
  fi
}

process_group_id() {
  ps -o pgid= -p "$1" 2>/dev/null | tr -d '[:space:]' || true
}

process_state() {
  ps -o stat= -p "$1" 2>/dev/null | tr -d '[:space:]' || true
}

process_group_members() {
  local pgid="$1"
  if command -v pgrep >/dev/null 2>&1; then
    pgrep -g "$pgid" 2>/dev/null || true
    return 0
  fi
  ps -eo pid=,pgid= | awk -v pgid="$pgid" '$2 == pgid { print $1 }'
}

process_group_alive() {
  local pgid="$1" members
  members="$(process_group_members "$pgid")"
  [[ -n "$members" ]]
}

is_running() {
  [[ -n "$(server_pid)" ]]
}

remove_socket_if_safe() {
  if [[ -e "$HOST_CODEX_SOCKET" ]]; then
    if [[ -S "$HOST_CODEX_SOCKET" ]]; then
      rm -f "$HOST_CODEX_SOCKET"
    else
      echo "socket path exists and is not a Unix socket: $HOST_CODEX_SOCKET" >&2
      return 1
    fi
  fi
}

show_log_excerpt() {
  sed -n '1,120p' "$LOG_FILE" >&2 || true
}

codex_bin() {
  if [[ -n "${CODEX_BIN:-}" ]]; then
    printf '%s\n' "$CODEX_BIN"
    return 0
  fi
  command -v codex
}

start_server() {
  local old_pid pid start_codex_bin
  old_pid="$(pid_from_file)"
  if [[ -n "$old_pid" ]] && kill -0 "$old_pid" 2>/dev/null; then
    if [[ -S "$HOST_CODEX_SOCKET" ]]; then
      echo "codex app-server already running: pid=$old_pid socket=$HOST_CODEX_SOCKET"
      return 0
    fi
    echo "pid $old_pid is running but socket is missing; refusing to start a second app-server" >&2
    return 1
  fi
  rm -f "$PID_FILE"
  remove_socket_if_safe

  start_codex_bin="$(codex_bin)"
  if [[ -z "$start_codex_bin" ]]; then
    echo "codex executable not found; set CODEX_BIN" >&2
    return 1
  fi

  rm -f "$STDIN_FIFO"
  mkfifo "$STDIN_FIFO"
  chmod 600 "$STDIN_FIFO"

  : > "$LOG_FILE"
  # Codex app-server currently exits if detached with stdin closed. A detached
  # wrapper keeps a private FIFO writer open and then runs Codex on the host.
  setsid -f bash -c '
    echo "$$" > "$3"
    tail -f /dev/null > "$1" &
    writer=$!
    trap "kill $writer 2>/dev/null || true" EXIT
    "$4" app-server --listen "$2" < "$1"
  ' codex-app-server "$STDIN_FIFO" "unix://$HOST_CODEX_SOCKET" "$PID_FILE" "$start_codex_bin" >> "$LOG_FILE" 2>&1

  for _ in $(seq 1 50); do
    [[ -f "$PID_FILE" ]] && break
    sleep 0.1
  done

  pid="$(pid_from_file)"
  if [[ -z "$pid" ]]; then
    echo "codex app-server did not write a pid file; log follows:" >&2
    show_log_excerpt
    return 1
  fi

  for _ in $(seq 1 100); do
    if [[ -S "$HOST_CODEX_SOCKET" ]]; then
      sleep 0.5
      if kill -0 "$pid" 2>/dev/null; then
        echo "codex app-server started: pid=$pid socket=$HOST_CODEX_SOCKET log=$LOG_FILE"
        return 0
      fi
    fi
    if ! kill -0 "$pid" 2>/dev/null; then
      echo "codex app-server exited before staying ready; log follows:" >&2
      show_log_excerpt
      rm -f "$PID_FILE"
      return 1
    fi
    sleep 0.1
  done

  echo "codex app-server did not create socket within 10 seconds; log follows:" >&2
  show_log_excerpt
  return 1
}

finish_stopped() {
  rm -f "$PID_FILE" "$STDIN_FIFO"
  remove_socket_if_safe
  echo "codex app-server stopped"
}

stop_server() {
  local pid pgid self_pgid signal_target state
  pid="$(server_pid)"
  if [[ -z "$pid" ]]; then
    rm -f "$PID_FILE" "$STDIN_FIFO"
    remove_socket_if_safe
    echo "codex app-server is not running"
    return 0
  fi

  pgid="$(process_group_id "$pid")"
  self_pgid="$(process_group_id "$$")"
  if [[ -n "$pgid" && "$pgid" != "$self_pgid" ]]; then
    signal_target="-$pgid"
    echo "stopping codex app-server process group: pgid=$pgid pid=$pid"
  else
    signal_target="$pid"
    echo "stopping codex app-server: pid=$pid"
    if [[ -n "$pgid" && "$pgid" == "$self_pgid" ]]; then
      echo "server shares this script process group; using pid-only stop" >&2
    fi
  fi

  kill -TERM -- "$signal_target" 2>/dev/null || true
  for _ in $(seq 1 50); do
    state="$(process_state "$pid")"
    if [[ -n "$pgid" && "$signal_target" == "-$pgid" ]]; then
      if ! process_group_alive "$pgid"; then
        finish_stopped
        return 0
      fi
    elif [[ -z "$state" || "$state" == Z* ]]; then
      finish_stopped
      return 0
    fi
    sleep 0.1
  done

  echo "codex app-server did not stop after 5 seconds; killing $signal_target" >&2
  kill -KILL -- "$signal_target" 2>/dev/null || true
  for _ in $(seq 1 20); do
    state="$(process_state "$pid")"
    if [[ -n "$pgid" && "$signal_target" == "-$pgid" ]]; then
      if ! process_group_alive "$pgid"; then
        finish_stopped
        return 0
      fi
    elif [[ -z "$state" || "$state" == Z* ]]; then
      finish_stopped
      return 0
    fi
    sleep 0.1
  done
  echo "failed to stop codex app-server pid=$pid" >&2
  return 1
}

status_server() {
  local pid pgid
  pid="$(server_pid)"
  if [[ -n "$pid" ]]; then
    pgid="$(process_group_id "$pid")"
    if [[ -S "$HOST_CODEX_SOCKET" ]]; then
      echo "codex app-server running: pid=$pid pgid=$pgid socket=$HOST_CODEX_SOCKET log=$LOG_FILE"
      return 0
    fi
    echo "codex app-server pid=$pid is running but socket is missing: $HOST_CODEX_SOCKET" >&2
    return 2
  fi
  if [[ -f "$PID_FILE" ]]; then
    echo "codex app-server not running; stale pid file: $PID_FILE" >&2
    return 1
  fi
  echo "codex app-server not running"
  return 1
}

require_cmd() {
  if ! command -v "$1" >/dev/null 2>&1; then
    echo "missing required command: $1" >&2
    return 1
  fi
}

codex_version_from() {
  local bin="$1" line
  line="$($bin --version 2>/dev/null || true)"
  printf '%s\n' "$line" | sed -n 's/.*\([0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\).*/\1/p' | head -n 1
}

release_target() {
  local os arch
  os="$(uname -s)"
  arch="$(uname -m)"
  case "$os:$arch" in
    Linux:x86_64|Linux:amd64) echo "x86_64-unknown-linux-musl" ;;
    Linux:aarch64|Linux:arm64) echo "aarch64-unknown-linux-musl" ;;
    Darwin:x86_64|Darwin:amd64) echo "x86_64-apple-darwin" ;;
    Darwin:aarch64|Darwin:arm64) echo "aarch64-apple-darwin" ;;
    *) echo "unsupported platform for Codex binary release: $os $arch" >&2; return 1 ;;
  esac
}

version_gt() {
  python3 - "$1" "$2" <<'PY'
import sys

def parse(v):
    return tuple(int(p) for p in v.split('.')[:3])

sys.exit(0 if parse(sys.argv[1]) > parse(sys.argv[2]) else 1)
PY
}

latest_release_info() {
  local target="$1" json_file="$2"
  python3 - "$target" "$json_file" <<'PY'
import json
import sys

target, path = sys.argv[1], sys.argv[2]
asset_name = f"codex-{target}.tar.gz"
with open(path, "r", encoding="utf-8") as f:
    release = json.load(f)
tag = release.get("tag_name", "")
version = tag
if version.startswith("rust-v"):
    version = version[6:]
elif version.startswith("v"):
    version = version[1:]
for asset in release.get("assets", []):
    if asset.get("name") == asset_name:
        print(version)
        print(tag)
        print(asset.get("browser_download_url", ""))
        print(asset.get("digest", ""))
        raise SystemExit(0)
print(f"release {tag or '<unknown>'} has no asset named {asset_name}", file=sys.stderr)
raise SystemExit(1)
PY
}

verify_digest() {
  local file="$1" digest="$2" expected
  if [[ -z "$digest" ]]; then
    echo "release asset has no digest; skipping checksum verification" >&2
    return 0
  fi
  if [[ "$digest" != sha256:* ]]; then
    echo "unsupported release digest format: $digest" >&2
    return 1
  fi
  expected="${digest#sha256:}"
  if command -v sha256sum >/dev/null 2>&1; then
    printf '%s  %s\n' "$expected" "$file" | sha256sum -c - >/dev/null
  elif command -v shasum >/dev/null 2>&1; then
    local actual
    actual="$(shasum -a 256 "$file" | awk '{print $1}')"
    [[ "$actual" == "$expected" ]]
  else
    echo "missing sha256sum or shasum for checksum verification" >&2
    return 1
  fi
}

extract_codex_binary() {
  local archive="$1" dest="$2" found
  mkdir -p "$dest/extract"
  tar -xzf "$archive" -C "$dest/extract"
  found="$(find "$dest/extract" -type f -name codex -print | head -n 1)"
  if [[ -z "$found" ]]; then
    echo "downloaded archive does not contain a codex binary" >&2
    return 1
  fi
  chmod +x "$found"
  printf '%s\n' "$found"
}

run_install() {
  if [[ "${#INSTALL_PREFIX[@]}" -gt 0 ]]; then
    "${INSTALL_PREFIX[@]}" "$@"
  else
    "$@"
  fi
}

choose_install_prefix() {
  local bin="$1" dir
  dir="$(dirname "$bin")"
  INSTALL_PREFIX=()
  if [[ -w "$dir" && ( ! -e "$bin" || -w "$bin" ) ]]; then
    return 0
  fi
  if command -v sudo >/dev/null 2>&1 && sudo -n true 2>/dev/null; then
    INSTALL_PREFIX=(sudo)
    return 0
  fi
  echo "cannot replace $bin without write permission" >&2
  echo "Run with suitable privileges or set CODEX_BIN to a user-writable Codex binary path." >&2
  return 1
}

install_candidate() {
  local candidate="$1" bin="$2" backup="$3" tmp_new="$bin.new.$$"
  choose_install_prefix "$bin"
  run_install cp -p "$bin" "$backup"
  run_install install -m 0755 "$candidate" "$tmp_new"
  run_install mv -f "$tmp_new" "$bin"
}

restore_backup() {
  local bin="$1" backup="$2" tmp_failed="$bin.failed.$$"
  if [[ ! -e "$backup" ]]; then
    echo "backup missing; cannot restore $bin" >&2
    return 1
  fi
  choose_install_prefix "$bin"
  if [[ -e "$bin" ]]; then
    run_install mv -f "$bin" "$tmp_failed" || true
  fi
  run_install mv -f "$backup" "$bin"
}

confirm_upgrade() {
  local local_version="$1" latest_version="$2" bin="$3"
  if [[ "${ASSUME_YES:-0}" == "1" ]]; then
    return 0
  fi
  if [[ ! -t 0 ]]; then
    echo "Codex $latest_version is available for $bin; rerun with check-updates -y to upgrade." >&2
    return 2
  fi
  local reply
  read -r -p "Upgrade Codex $local_version -> $latest_version at $bin? [y/N] " reply
  [[ "$reply" == "y" || "$reply" == "Y" || "$reply" == "yes" || "$reply" == "YES" ]]
}

apply_upgrade() {
  local candidate="$1" bin="$2" backup="$3" local_version="$4" latest_version="$5" was_running=0
  if is_running; then
    was_running=1
    stop_server
  fi

  if ! install_candidate "$candidate" "$bin" "$backup"; then
    echo "failed to install Codex update" >&2
    if [[ "$was_running" == "1" ]]; then
      start_server || true
    fi
    return 1
  fi

  if [[ "$was_running" == "1" ]]; then
    if ! start_server; then
      echo "new Codex failed to start; restoring previous binary" >&2
      restore_backup "$bin" "$backup" || true
      start_server || true
      return 1
    fi
  fi

  echo "Codex upgraded: $local_version -> $latest_version"
  echo "backup: $backup"
}

handoff_upgrade() {
  local candidate="$1" bin="$2" backup="$3" update_dir="$4" local_version="$5" latest_version="$6"
  : > "$UPGRADE_LOG_FILE"
  setsid -f bash -c '
    sleep 1
    "$0" __apply-upgrade "$1" "$2" "$3" "$4" "$5" "$6"
  ' "$0" "$candidate" "$bin" "$backup" "$update_dir" "$local_version" "$latest_version" >> "$UPGRADE_LOG_FILE" 2>&1
  echo "Codex upgrade handoff started; app-server will restart if replacement succeeds. log=$UPGRADE_LOG_FILE"
}

check_updates() {
  ASSUME_YES=0
  while [[ $# -gt 0 ]]; do
    case "$1" in
      -y|--yes) ASSUME_YES=1 ;;
      -h|--help) usage; return 0 ;;
      *) echo "unknown check-updates option: $1" >&2; usage; return 2 ;;
    esac
    shift
  done

  require_cmd curl
  require_cmd tar
  require_cmd python3
  require_cmd ps

  local bin local_version target json latest_version latest_tag download_url digest archive tmp candidate candidate_version backup
  bin="$(codex_bin)"
  if [[ -z "$bin" ]]; then
    echo "codex executable not found; set CODEX_BIN" >&2
    return 1
  fi
  if [[ "$bin" != /* ]]; then
    echo "CODEX_BIN must be an absolute path: $bin" >&2
    return 1
  fi
  local_version="$(codex_version_from "$bin")"
  if [[ -z "$local_version" ]]; then
    echo "could not determine local Codex version from $bin" >&2
    return 1
  fi

  target="$(release_target)"
  tmp="$(mktemp -d "$RUN_DIR/codex-update.XXXXXX")"
  json="$tmp/latest.json"
  curl -fsSL "https://api.github.com/repos/$CODEX_RELEASE_REPO/releases/latest" -o "$json"
  mapfile -t release_info < <(latest_release_info "$target" "$json")
  latest_version="${release_info[0]:-}"
  latest_tag="${release_info[1]:-}"
  download_url="${release_info[2]:-}"
  digest="${release_info[3]:-}"
  if [[ -z "$latest_version" || -z "$download_url" ]]; then
    rm -rf "$tmp"
    echo "could not determine latest Codex release for $target" >&2
    return 1
  fi

  if ! version_gt "$latest_version" "$local_version"; then
    rm -rf "$tmp"
    echo "Codex is already current: $local_version (latest $latest_version)"
    return 0
  fi
  echo "Codex update available: $local_version -> $latest_version ($latest_tag)"
  confirm_upgrade "$local_version" "$latest_version" "$bin"

  archive="$tmp/codex-$target.tar.gz"
  curl -fL "$download_url" -o "$archive"
  verify_digest "$archive" "$digest"
  candidate="$(extract_codex_binary "$archive" "$tmp")"
  candidate_version="$(codex_version_from "$candidate")"
  if [[ "$candidate_version" != "$latest_version" ]]; then
    rm -rf "$tmp"
    echo "downloaded Codex version $candidate_version does not match release $latest_version" >&2
    return 1
  fi

  backup="$bin.bak.$(date -u +%Y%m%d%H%M%S)"
  choose_install_prefix "$bin"

  if is_running; then
    handoff_upgrade "$candidate" "$bin" "$backup" "$tmp" "$local_version" "$latest_version"
    return 0
  fi

  if apply_upgrade "$candidate" "$bin" "$backup" "$local_version" "$latest_version"; then
    rm -rf "$tmp"
    return 0
  fi
  rm -rf "$tmp"
  return 1
}

apply_upgrade_worker() {
  local candidate="$1" bin="$2" backup="$3" update_dir="$4" local_version="$5" latest_version="$6" rc=0
  if [[ ! -x "$candidate" ]]; then
    echo "upgrade candidate is missing or not executable: $candidate" >&2
    rm -rf "$update_dir"
    return 1
  fi
  if ! apply_upgrade "$candidate" "$bin" "$backup" "$local_version" "$latest_version"; then
    rc=1
  fi
  rm -rf "$update_dir"
  return "$rc"
}

cmd="${1:-start}"
case "$cmd" in
  start)
    shift || true
    if [[ $# -ne 0 ]]; then usage; exit 2; fi
    start_server
    ;;
  stop)
    shift || true
    if [[ $# -ne 0 ]]; then usage; exit 2; fi
    stop_server
    ;;
  status)
    shift || true
    if [[ $# -ne 0 ]]; then usage; exit 2; fi
    status_server
    ;;
  check-updates)
    shift || true
    check_updates "$@"
    ;;
  __apply-upgrade)
    shift || true
    if [[ $# -ne 6 ]]; then echo "invalid upgrade worker arguments" >&2; exit 2; fi
    apply_upgrade_worker "$@"
    ;;
  -h|--help|help)
    usage
    ;;
  *)
    echo "unknown command: $cmd" >&2
    usage >&2
    exit 2
    ;;
esac
