diff --git a/README.md b/README.md index ad2ad96..f910a7b 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Docker Compose runs only the Go Telegram bot. Codex runs on the host through `co scripts/start-codex-app-server start ``` - The script supports `start`, `stop`, `status`, and `check-updates [-y]`. `start` launches Codex detached, writes `run/codex-app-server.pid`, logs to `run/codex-app-server.log`, and is idempotent if the socket is already live. `check-updates` compares the local `codex` binary with the latest OpenAI Codex GitHub release; with `-y`, it downloads the matching platform archive, verifies the release digest when available, replaces the configured `CODEX_BIN`, and restarts the app-server if it was running. If the upgraded server fails to start, the script restores the previous binary and starts it again. + The script supports `start`, `stop`, `status`, and `check-updates [-y]`. `start` launches Codex detached, writes `run/codex-app-server.pid`, logs to `run/codex-app-server.log`, and is idempotent if the socket is already live. `check-updates` compares the local `codex` binary with the latest OpenAI Codex GitHub release. With `-y`, it downloads and validates the matching platform archive before stopping a running app-server. If the app-server is running, the final stop/replace/start step is handed to a detached worker so the upgrade can complete even when invoked from Codex itself. If the upgraded server fails to start, the worker restores the previous binary and starts it again. 3. Add at least one Telegram user and workspace: diff --git a/scripts/start-codex-app-server b/scripts/start-codex-app-server index 3612992..be773a7 100755 --- a/scripts/start-codex-app-server +++ b/scripts/start-codex-app-server @@ -7,6 +7,7 @@ RUN_DIR="$ROOT/run" PID_FILE="$RUN_DIR/codex-app-server.pid" LOG_FILE="$RUN_DIR/codex-app-server.log" STDIN_FIFO="$RUN_DIR/codex-app-server.stdin" +UPGRADE_LOG_FILE="$RUN_DIR/codex-app-server-upgrade.log" CODEX_RELEASE_REPO="${CODEX_RELEASE_REPO:-openai/codex}" INSTALL_PREFIX=() @@ -51,6 +52,29 @@ server_pid() { fi } +process_group_id() { + ps -o pgid= -p "$1" 2>/dev/null | tr -d '[:space:]' || true +} + +process_state() { + ps -o stat= -p "$1" 2>/dev/null | tr -d '[:space:]' || true +} + +process_group_members() { + local pgid="$1" + if command -v pgrep >/dev/null 2>&1; then + pgrep -g "$pgid" 2>/dev/null || true + return 0 + fi + ps -eo pid=,pgid= | awk -v pgid="$pgid" '$2 == pgid { print $1 }' +} + +process_group_alive() { + local pgid="$1" members + members="$(process_group_members "$pgid")" + [[ -n "$members" ]] +} + is_running() { [[ -n "$(server_pid)" ]] } @@ -70,6 +94,14 @@ show_log_excerpt() { sed -n '1,120p' "$LOG_FILE" >&2 || true } +codex_bin() { + if [[ -n "${CODEX_BIN:-}" ]]; then + printf '%s\n' "$CODEX_BIN" + return 0 + fi + command -v codex +} + start_server() { local old_pid pid start_codex_bin old_pid="$(pid_from_file)" @@ -84,6 +116,12 @@ start_server() { rm -f "$PID_FILE" remove_socket_if_safe + start_codex_bin="$(codex_bin)" + if [[ -z "$start_codex_bin" ]]; then + echo "codex executable not found; set CODEX_BIN" >&2 + return 1 + fi + rm -f "$STDIN_FIFO" mkfifo "$STDIN_FIFO" chmod 600 "$STDIN_FIFO" @@ -91,7 +129,6 @@ start_server() { : > "$LOG_FILE" # Codex app-server currently exits if detached with stdin closed. A detached # wrapper keeps a private FIFO writer open and then runs Codex on the host. - start_codex_bin="$(codex_bin)" setsid -f bash -c ' echo "$$" > "$3" tail -f /dev/null > "$1" & @@ -134,8 +171,14 @@ start_server() { return 1 } +finish_stopped() { + rm -f "$PID_FILE" "$STDIN_FIFO" + remove_socket_if_safe + echo "codex app-server stopped" +} + stop_server() { - local pid + local pid pgid self_pgid signal_target state pid="$(server_pid)" if [[ -z "$pid" ]]; then rm -f "$PID_FILE" "$STDIN_FIFO" @@ -144,25 +187,45 @@ stop_server() { return 0 fi - echo "stopping codex app-server: pid=$pid" - kill "$pid" 2>/dev/null || true + pgid="$(process_group_id "$pid")" + self_pgid="$(process_group_id "$$")" + if [[ -n "$pgid" && "$pgid" != "$self_pgid" ]]; then + signal_target="-$pgid" + echo "stopping codex app-server process group: pgid=$pgid pid=$pid" + else + signal_target="$pid" + echo "stopping codex app-server: pid=$pid" + if [[ -n "$pgid" && "$pgid" == "$self_pgid" ]]; then + echo "server shares this script process group; using pid-only stop" >&2 + fi + fi + + kill -TERM -- "$signal_target" 2>/dev/null || true for _ in $(seq 1 50); do - if ! kill -0 "$pid" 2>/dev/null; then - rm -f "$PID_FILE" "$STDIN_FIFO" - remove_socket_if_safe - echo "codex app-server stopped" + state="$(process_state "$pid")" + if [[ -n "$pgid" && "$signal_target" == "-$pgid" ]]; then + if ! process_group_alive "$pgid"; then + finish_stopped + return 0 + fi + elif [[ -z "$state" || "$state" == Z* ]]; then + finish_stopped return 0 fi sleep 0.1 done - echo "codex app-server did not stop after 5 seconds; killing pid=$pid" >&2 - kill -KILL "$pid" 2>/dev/null || true + echo "codex app-server did not stop after 5 seconds; killing $signal_target" >&2 + kill -KILL -- "$signal_target" 2>/dev/null || true for _ in $(seq 1 20); do - if ! kill -0 "$pid" 2>/dev/null; then - rm -f "$PID_FILE" "$STDIN_FIFO" - remove_socket_if_safe - echo "codex app-server stopped" + state="$(process_state "$pid")" + if [[ -n "$pgid" && "$signal_target" == "-$pgid" ]]; then + if ! process_group_alive "$pgid"; then + finish_stopped + return 0 + fi + elif [[ -z "$state" || "$state" == Z* ]]; then + finish_stopped return 0 fi sleep 0.1 @@ -172,11 +235,12 @@ stop_server() { } status_server() { - local pid + local pid pgid pid="$(server_pid)" if [[ -n "$pid" ]]; then + pgid="$(process_group_id "$pid")" if [[ -S "$HOST_CODEX_SOCKET" ]]; then - echo "codex app-server running: pid=$pid socket=$HOST_CODEX_SOCKET log=$LOG_FILE" + echo "codex app-server running: pid=$pid pgid=$pgid socket=$HOST_CODEX_SOCKET log=$LOG_FILE" return 0 fi echo "codex app-server pid=$pid is running but socket is missing: $HOST_CODEX_SOCKET" >&2 @@ -197,14 +261,6 @@ require_cmd() { fi } -codex_bin() { - if [[ -n "${CODEX_BIN:-}" ]]; then - printf '%s\n' "$CODEX_BIN" - return 0 - fi - command -v codex -} - codex_version_from() { local bin="$1" line line="$($bin --version 2>/dev/null || true)" @@ -339,9 +395,9 @@ restore_backup() { fi choose_install_prefix "$bin" if [[ -e "$bin" ]]; then - run_install mv "$bin" "$tmp_failed" || true + run_install mv -f "$bin" "$tmp_failed" || true fi - run_install mv "$backup" "$bin" + run_install mv -f "$backup" "$bin" } confirm_upgrade() { @@ -358,80 +414,15 @@ confirm_upgrade() { [[ "$reply" == "y" || "$reply" == "Y" || "$reply" == "yes" || "$reply" == "YES" ]] } -check_updates() { - ASSUME_YES=0 - while [[ $# -gt 0 ]]; do - case "$1" in - -y|--yes) ASSUME_YES=1 ;; - -h|--help) usage; return 0 ;; - *) echo "unknown check-updates option: $1" >&2; usage; return 2 ;; - esac - shift - done - - require_cmd curl - require_cmd tar - require_cmd python3 - - local bin local_version target json latest_version latest_tag download_url digest archive tmp candidate candidate_version was_running backup - bin="$(codex_bin)" - if [[ -z "$bin" ]]; then - echo "codex executable not found; set CODEX_BIN" >&2 - return 1 - fi - if [[ "$bin" != /* ]]; then - echo "CODEX_BIN must be an absolute path: $bin" >&2 - return 1 - fi - local_version="$(codex_version_from "$bin")" - if [[ -z "$local_version" ]]; then - echo "could not determine local Codex version from $bin" >&2 - return 1 - fi - - target="$(release_target)" - tmp="$(mktemp -d "$RUN_DIR/codex-update.XXXXXX")" - trap "rm -rf '$tmp'" EXIT - json="$tmp/latest.json" - curl -fsSL "https://api.github.com/repos/$CODEX_RELEASE_REPO/releases/latest" -o "$json" - mapfile -t release_info < <(latest_release_info "$target" "$json") - latest_version="${release_info[0]:-}" - latest_tag="${release_info[1]:-}" - download_url="${release_info[2]:-}" - digest="${release_info[3]:-}" - if [[ -z "$latest_version" || -z "$download_url" ]]; then - echo "could not determine latest Codex release for $target" >&2 - return 1 - fi - - if ! version_gt "$latest_version" "$local_version"; then - echo "Codex is already current: $local_version (latest $latest_version)" - return 0 - fi - echo "Codex update available: $local_version -> $latest_version ($latest_tag)" - confirm_upgrade "$local_version" "$latest_version" "$bin" - - archive="$tmp/codex-$target.tar.gz" - curl -fL "$download_url" -o "$archive" - verify_digest "$archive" "$digest" - candidate="$(extract_codex_binary "$archive" "$tmp")" - candidate_version="$(codex_version_from "$candidate")" - if [[ "$candidate_version" != "$latest_version" ]]; then - echo "downloaded Codex version $candidate_version does not match release $latest_version" >&2 - return 1 - fi - - was_running=0 +apply_upgrade() { + local candidate="$1" bin="$2" backup="$3" local_version="$4" latest_version="$5" was_running=0 if is_running; then was_running=1 - fi - backup="$bin.bak.$(date -u +%Y%m%d%H%M%S)" - - if [[ "$was_running" == "1" ]]; then stop_server fi if ! install_candidate "$candidate" "$bin" "$backup"; then + echo "failed to install Codex update" >&2 if [[ "$was_running" == "1" ]]; then start_server || true fi @@ -451,6 +442,112 @@ check_updates() { echo "backup: $backup" } +handoff_upgrade() { + local candidate="$1" bin="$2" backup="$3" update_dir="$4" local_version="$5" latest_version="$6" + : > "$UPGRADE_LOG_FILE" + setsid -f bash -c ' + sleep 1 + "$0" __apply-upgrade "$1" "$2" "$3" "$4" "$5" "$6" + ' "$0" "$candidate" "$bin" "$backup" "$update_dir" "$local_version" "$latest_version" >> "$UPGRADE_LOG_FILE" 2>&1 + echo "Codex upgrade handoff started; app-server will restart if replacement succeeds. log=$UPGRADE_LOG_FILE" +} + +check_updates() { + ASSUME_YES=0 + while [[ $# -gt 0 ]]; do + case "$1" in + -y|--yes) ASSUME_YES=1 ;; + -h|--help) usage; return 0 ;; + *) echo "unknown check-updates option: $1" >&2; usage; return 2 ;; + esac + shift + done + + require_cmd curl + require_cmd tar + require_cmd python3 + require_cmd ps + + local bin local_version target json latest_version latest_tag download_url digest archive tmp candidate candidate_version backup + bin="$(codex_bin)" + if [[ -z "$bin" ]]; then + echo "codex executable not found; set CODEX_BIN" >&2 + return 1 + fi + if [[ "$bin" != /* ]]; then + echo "CODEX_BIN must be an absolute path: $bin" >&2 + return 1 + fi + local_version="$(codex_version_from "$bin")" + if [[ -z "$local_version" ]]; then + echo "could not determine local Codex version from $bin" >&2 + return 1 + fi + + target="$(release_target)" + tmp="$(mktemp -d "$RUN_DIR/codex-update.XXXXXX")" + json="$tmp/latest.json" + curl -fsSL "https://api.github.com/repos/$CODEX_RELEASE_REPO/releases/latest" -o "$json" + mapfile -t release_info < <(latest_release_info "$target" "$json") + latest_version="${release_info[0]:-}" + latest_tag="${release_info[1]:-}" + download_url="${release_info[2]:-}" + digest="${release_info[3]:-}" + if [[ -z "$latest_version" || -z "$download_url" ]]; then + rm -rf "$tmp" + echo "could not determine latest Codex release for $target" >&2 + return 1 + fi + + if ! version_gt "$latest_version" "$local_version"; then + rm -rf "$tmp" + echo "Codex is already current: $local_version (latest $latest_version)" + return 0 + fi + echo "Codex update available: $local_version -> $latest_version ($latest_tag)" + confirm_upgrade "$local_version" "$latest_version" "$bin" + + archive="$tmp/codex-$target.tar.gz" + curl -fL "$download_url" -o "$archive" + verify_digest "$archive" "$digest" + candidate="$(extract_codex_binary "$archive" "$tmp")" + candidate_version="$(codex_version_from "$candidate")" + if [[ "$candidate_version" != "$latest_version" ]]; then + rm -rf "$tmp" + echo "downloaded Codex version $candidate_version does not match release $latest_version" >&2 + return 1 + fi + + backup="$bin.bak.$(date -u +%Y%m%d%H%M%S)" + choose_install_prefix "$bin" + + if is_running; then + handoff_upgrade "$candidate" "$bin" "$backup" "$tmp" "$local_version" "$latest_version" + return 0 + fi + + if apply_upgrade "$candidate" "$bin" "$backup" "$local_version" "$latest_version"; then + rm -rf "$tmp" + return 0 + fi + rm -rf "$tmp" + return 1 +} + +apply_upgrade_worker() { + local candidate="$1" bin="$2" backup="$3" update_dir="$4" local_version="$5" latest_version="$6" rc=0 + if [[ ! -x "$candidate" ]]; then + echo "upgrade candidate is missing or not executable: $candidate" >&2 + rm -rf "$update_dir" + return 1 + fi + if ! apply_upgrade "$candidate" "$bin" "$backup" "$local_version" "$latest_version"; then + rc=1 + fi + rm -rf "$update_dir" + return "$rc" +} + cmd="${1:-start}" case "$cmd" in start) @@ -472,6 +569,11 @@ case "$cmd" in shift || true check_updates "$@" ;; + __apply-upgrade) + shift || true + if [[ $# -ne 6 ]]; then echo "invalid upgrade worker arguments" >&2; exit 2; fi + apply_upgrade_worker "$@" + ;; -h|--help|help) usage ;;