Make Codex upgrades self-safe

This commit is contained in:
Codex
2026-05-25 04:25:26 +00:00
parent d03bf33a55
commit 09ff990468
2 changed files with 197 additions and 95 deletions

View File

@@ -11,7 +11,7 @@ Docker Compose runs only the Go Telegram bot. Codex runs on the host through `co
scripts/start-codex-app-server start scripts/start-codex-app-server start
``` ```
The script supports `start`, `stop`, `status`, and `check-updates [-y]`. `start` launches Codex detached, writes `run/codex-app-server.pid`, logs to `run/codex-app-server.log`, and is idempotent if the socket is already live. `check-updates` compares the local `codex` binary with the latest OpenAI Codex GitHub release; with `-y`, it downloads the matching platform archive, verifies the release digest when available, replaces the configured `CODEX_BIN`, and restarts the app-server if it was running. If the upgraded server fails to start, the script restores the previous binary and starts it again. The script supports `start`, `stop`, `status`, and `check-updates [-y]`. `start` launches Codex detached, writes `run/codex-app-server.pid`, logs to `run/codex-app-server.log`, and is idempotent if the socket is already live. `check-updates` compares the local `codex` binary with the latest OpenAI Codex GitHub release. With `-y`, it downloads and validates the matching platform archive before stopping a running app-server. If the app-server is running, the final stop/replace/start step is handed to a detached worker so the upgrade can complete even when invoked from Codex itself. If the upgraded server fails to start, the worker restores the previous binary and starts it again.
3. Add at least one Telegram user and workspace: 3. Add at least one Telegram user and workspace:

View File

@@ -7,6 +7,7 @@ RUN_DIR="$ROOT/run"
PID_FILE="$RUN_DIR/codex-app-server.pid" PID_FILE="$RUN_DIR/codex-app-server.pid"
LOG_FILE="$RUN_DIR/codex-app-server.log" LOG_FILE="$RUN_DIR/codex-app-server.log"
STDIN_FIFO="$RUN_DIR/codex-app-server.stdin" STDIN_FIFO="$RUN_DIR/codex-app-server.stdin"
UPGRADE_LOG_FILE="$RUN_DIR/codex-app-server-upgrade.log"
CODEX_RELEASE_REPO="${CODEX_RELEASE_REPO:-openai/codex}" CODEX_RELEASE_REPO="${CODEX_RELEASE_REPO:-openai/codex}"
INSTALL_PREFIX=() INSTALL_PREFIX=()
@@ -51,6 +52,29 @@ server_pid() {
fi fi
} }
process_group_id() {
ps -o pgid= -p "$1" 2>/dev/null | tr -d '[:space:]' || true
}
process_state() {
ps -o stat= -p "$1" 2>/dev/null | tr -d '[:space:]' || true
}
process_group_members() {
local pgid="$1"
if command -v pgrep >/dev/null 2>&1; then
pgrep -g "$pgid" 2>/dev/null || true
return 0
fi
ps -eo pid=,pgid= | awk -v pgid="$pgid" '$2 == pgid { print $1 }'
}
process_group_alive() {
local pgid="$1" members
members="$(process_group_members "$pgid")"
[[ -n "$members" ]]
}
is_running() { is_running() {
[[ -n "$(server_pid)" ]] [[ -n "$(server_pid)" ]]
} }
@@ -70,6 +94,14 @@ show_log_excerpt() {
sed -n '1,120p' "$LOG_FILE" >&2 || true sed -n '1,120p' "$LOG_FILE" >&2 || true
} }
codex_bin() {
if [[ -n "${CODEX_BIN:-}" ]]; then
printf '%s\n' "$CODEX_BIN"
return 0
fi
command -v codex
}
start_server() { start_server() {
local old_pid pid start_codex_bin local old_pid pid start_codex_bin
old_pid="$(pid_from_file)" old_pid="$(pid_from_file)"
@@ -84,6 +116,12 @@ start_server() {
rm -f "$PID_FILE" rm -f "$PID_FILE"
remove_socket_if_safe remove_socket_if_safe
start_codex_bin="$(codex_bin)"
if [[ -z "$start_codex_bin" ]]; then
echo "codex executable not found; set CODEX_BIN" >&2
return 1
fi
rm -f "$STDIN_FIFO" rm -f "$STDIN_FIFO"
mkfifo "$STDIN_FIFO" mkfifo "$STDIN_FIFO"
chmod 600 "$STDIN_FIFO" chmod 600 "$STDIN_FIFO"
@@ -91,7 +129,6 @@ start_server() {
: > "$LOG_FILE" : > "$LOG_FILE"
# Codex app-server currently exits if detached with stdin closed. A detached # Codex app-server currently exits if detached with stdin closed. A detached
# wrapper keeps a private FIFO writer open and then runs Codex on the host. # wrapper keeps a private FIFO writer open and then runs Codex on the host.
start_codex_bin="$(codex_bin)"
setsid -f bash -c ' setsid -f bash -c '
echo "$$" > "$3" echo "$$" > "$3"
tail -f /dev/null > "$1" & tail -f /dev/null > "$1" &
@@ -134,8 +171,14 @@ start_server() {
return 1 return 1
} }
finish_stopped() {
rm -f "$PID_FILE" "$STDIN_FIFO"
remove_socket_if_safe
echo "codex app-server stopped"
}
stop_server() { stop_server() {
local pid local pid pgid self_pgid signal_target state
pid="$(server_pid)" pid="$(server_pid)"
if [[ -z "$pid" ]]; then if [[ -z "$pid" ]]; then
rm -f "$PID_FILE" "$STDIN_FIFO" rm -f "$PID_FILE" "$STDIN_FIFO"
@@ -144,25 +187,45 @@ stop_server() {
return 0 return 0
fi fi
pgid="$(process_group_id "$pid")"
self_pgid="$(process_group_id "$$")"
if [[ -n "$pgid" && "$pgid" != "$self_pgid" ]]; then
signal_target="-$pgid"
echo "stopping codex app-server process group: pgid=$pgid pid=$pid"
else
signal_target="$pid"
echo "stopping codex app-server: pid=$pid" echo "stopping codex app-server: pid=$pid"
kill "$pid" 2>/dev/null || true if [[ -n "$pgid" && "$pgid" == "$self_pgid" ]]; then
echo "server shares this script process group; using pid-only stop" >&2
fi
fi
kill -TERM -- "$signal_target" 2>/dev/null || true
for _ in $(seq 1 50); do for _ in $(seq 1 50); do
if ! kill -0 "$pid" 2>/dev/null; then state="$(process_state "$pid")"
rm -f "$PID_FILE" "$STDIN_FIFO" if [[ -n "$pgid" && "$signal_target" == "-$pgid" ]]; then
remove_socket_if_safe if ! process_group_alive "$pgid"; then
echo "codex app-server stopped" finish_stopped
return 0
fi
elif [[ -z "$state" || "$state" == Z* ]]; then
finish_stopped
return 0 return 0
fi fi
sleep 0.1 sleep 0.1
done done
echo "codex app-server did not stop after 5 seconds; killing pid=$pid" >&2 echo "codex app-server did not stop after 5 seconds; killing $signal_target" >&2
kill -KILL "$pid" 2>/dev/null || true kill -KILL -- "$signal_target" 2>/dev/null || true
for _ in $(seq 1 20); do for _ in $(seq 1 20); do
if ! kill -0 "$pid" 2>/dev/null; then state="$(process_state "$pid")"
rm -f "$PID_FILE" "$STDIN_FIFO" if [[ -n "$pgid" && "$signal_target" == "-$pgid" ]]; then
remove_socket_if_safe if ! process_group_alive "$pgid"; then
echo "codex app-server stopped" finish_stopped
return 0
fi
elif [[ -z "$state" || "$state" == Z* ]]; then
finish_stopped
return 0 return 0
fi fi
sleep 0.1 sleep 0.1
@@ -172,11 +235,12 @@ stop_server() {
} }
status_server() { status_server() {
local pid local pid pgid
pid="$(server_pid)" pid="$(server_pid)"
if [[ -n "$pid" ]]; then if [[ -n "$pid" ]]; then
pgid="$(process_group_id "$pid")"
if [[ -S "$HOST_CODEX_SOCKET" ]]; then if [[ -S "$HOST_CODEX_SOCKET" ]]; then
echo "codex app-server running: pid=$pid socket=$HOST_CODEX_SOCKET log=$LOG_FILE" echo "codex app-server running: pid=$pid pgid=$pgid socket=$HOST_CODEX_SOCKET log=$LOG_FILE"
return 0 return 0
fi fi
echo "codex app-server pid=$pid is running but socket is missing: $HOST_CODEX_SOCKET" >&2 echo "codex app-server pid=$pid is running but socket is missing: $HOST_CODEX_SOCKET" >&2
@@ -197,14 +261,6 @@ require_cmd() {
fi fi
} }
codex_bin() {
if [[ -n "${CODEX_BIN:-}" ]]; then
printf '%s\n' "$CODEX_BIN"
return 0
fi
command -v codex
}
codex_version_from() { codex_version_from() {
local bin="$1" line local bin="$1" line
line="$($bin --version 2>/dev/null || true)" line="$($bin --version 2>/dev/null || true)"
@@ -339,9 +395,9 @@ restore_backup() {
fi fi
choose_install_prefix "$bin" choose_install_prefix "$bin"
if [[ -e "$bin" ]]; then if [[ -e "$bin" ]]; then
run_install mv "$bin" "$tmp_failed" || true run_install mv -f "$bin" "$tmp_failed" || true
fi fi
run_install mv "$backup" "$bin" run_install mv -f "$backup" "$bin"
} }
confirm_upgrade() { confirm_upgrade() {
@@ -358,80 +414,15 @@ confirm_upgrade() {
[[ "$reply" == "y" || "$reply" == "Y" || "$reply" == "yes" || "$reply" == "YES" ]] [[ "$reply" == "y" || "$reply" == "Y" || "$reply" == "yes" || "$reply" == "YES" ]]
} }
check_updates() { apply_upgrade() {
ASSUME_YES=0 local candidate="$1" bin="$2" backup="$3" local_version="$4" latest_version="$5" was_running=0
while [[ $# -gt 0 ]]; do
case "$1" in
-y|--yes) ASSUME_YES=1 ;;
-h|--help) usage; return 0 ;;
*) echo "unknown check-updates option: $1" >&2; usage; return 2 ;;
esac
shift
done
require_cmd curl
require_cmd tar
require_cmd python3
local bin local_version target json latest_version latest_tag download_url digest archive tmp candidate candidate_version was_running backup
bin="$(codex_bin)"
if [[ -z "$bin" ]]; then
echo "codex executable not found; set CODEX_BIN" >&2
return 1
fi
if [[ "$bin" != /* ]]; then
echo "CODEX_BIN must be an absolute path: $bin" >&2
return 1
fi
local_version="$(codex_version_from "$bin")"
if [[ -z "$local_version" ]]; then
echo "could not determine local Codex version from $bin" >&2
return 1
fi
target="$(release_target)"
tmp="$(mktemp -d "$RUN_DIR/codex-update.XXXXXX")"
trap "rm -rf '$tmp'" EXIT
json="$tmp/latest.json"
curl -fsSL "https://api.github.com/repos/$CODEX_RELEASE_REPO/releases/latest" -o "$json"
mapfile -t release_info < <(latest_release_info "$target" "$json")
latest_version="${release_info[0]:-}"
latest_tag="${release_info[1]:-}"
download_url="${release_info[2]:-}"
digest="${release_info[3]:-}"
if [[ -z "$latest_version" || -z "$download_url" ]]; then
echo "could not determine latest Codex release for $target" >&2
return 1
fi
if ! version_gt "$latest_version" "$local_version"; then
echo "Codex is already current: $local_version (latest $latest_version)"
return 0
fi
echo "Codex update available: $local_version -> $latest_version ($latest_tag)"
confirm_upgrade "$local_version" "$latest_version" "$bin"
archive="$tmp/codex-$target.tar.gz"
curl -fL "$download_url" -o "$archive"
verify_digest "$archive" "$digest"
candidate="$(extract_codex_binary "$archive" "$tmp")"
candidate_version="$(codex_version_from "$candidate")"
if [[ "$candidate_version" != "$latest_version" ]]; then
echo "downloaded Codex version $candidate_version does not match release $latest_version" >&2
return 1
fi
was_running=0
if is_running; then if is_running; then
was_running=1 was_running=1
fi
backup="$bin.bak.$(date -u +%Y%m%d%H%M%S)"
if [[ "$was_running" == "1" ]]; then
stop_server stop_server
fi fi
if ! install_candidate "$candidate" "$bin" "$backup"; then if ! install_candidate "$candidate" "$bin" "$backup"; then
echo "failed to install Codex update" >&2
if [[ "$was_running" == "1" ]]; then if [[ "$was_running" == "1" ]]; then
start_server || true start_server || true
fi fi
@@ -451,6 +442,112 @@ check_updates() {
echo "backup: $backup" echo "backup: $backup"
} }
handoff_upgrade() {
local candidate="$1" bin="$2" backup="$3" update_dir="$4" local_version="$5" latest_version="$6"
: > "$UPGRADE_LOG_FILE"
setsid -f bash -c '
sleep 1
"$0" __apply-upgrade "$1" "$2" "$3" "$4" "$5" "$6"
' "$0" "$candidate" "$bin" "$backup" "$update_dir" "$local_version" "$latest_version" >> "$UPGRADE_LOG_FILE" 2>&1
echo "Codex upgrade handoff started; app-server will restart if replacement succeeds. log=$UPGRADE_LOG_FILE"
}
check_updates() {
ASSUME_YES=0
while [[ $# -gt 0 ]]; do
case "$1" in
-y|--yes) ASSUME_YES=1 ;;
-h|--help) usage; return 0 ;;
*) echo "unknown check-updates option: $1" >&2; usage; return 2 ;;
esac
shift
done
require_cmd curl
require_cmd tar
require_cmd python3
require_cmd ps
local bin local_version target json latest_version latest_tag download_url digest archive tmp candidate candidate_version backup
bin="$(codex_bin)"
if [[ -z "$bin" ]]; then
echo "codex executable not found; set CODEX_BIN" >&2
return 1
fi
if [[ "$bin" != /* ]]; then
echo "CODEX_BIN must be an absolute path: $bin" >&2
return 1
fi
local_version="$(codex_version_from "$bin")"
if [[ -z "$local_version" ]]; then
echo "could not determine local Codex version from $bin" >&2
return 1
fi
target="$(release_target)"
tmp="$(mktemp -d "$RUN_DIR/codex-update.XXXXXX")"
json="$tmp/latest.json"
curl -fsSL "https://api.github.com/repos/$CODEX_RELEASE_REPO/releases/latest" -o "$json"
mapfile -t release_info < <(latest_release_info "$target" "$json")
latest_version="${release_info[0]:-}"
latest_tag="${release_info[1]:-}"
download_url="${release_info[2]:-}"
digest="${release_info[3]:-}"
if [[ -z "$latest_version" || -z "$download_url" ]]; then
rm -rf "$tmp"
echo "could not determine latest Codex release for $target" >&2
return 1
fi
if ! version_gt "$latest_version" "$local_version"; then
rm -rf "$tmp"
echo "Codex is already current: $local_version (latest $latest_version)"
return 0
fi
echo "Codex update available: $local_version -> $latest_version ($latest_tag)"
confirm_upgrade "$local_version" "$latest_version" "$bin"
archive="$tmp/codex-$target.tar.gz"
curl -fL "$download_url" -o "$archive"
verify_digest "$archive" "$digest"
candidate="$(extract_codex_binary "$archive" "$tmp")"
candidate_version="$(codex_version_from "$candidate")"
if [[ "$candidate_version" != "$latest_version" ]]; then
rm -rf "$tmp"
echo "downloaded Codex version $candidate_version does not match release $latest_version" >&2
return 1
fi
backup="$bin.bak.$(date -u +%Y%m%d%H%M%S)"
choose_install_prefix "$bin"
if is_running; then
handoff_upgrade "$candidate" "$bin" "$backup" "$tmp" "$local_version" "$latest_version"
return 0
fi
if apply_upgrade "$candidate" "$bin" "$backup" "$local_version" "$latest_version"; then
rm -rf "$tmp"
return 0
fi
rm -rf "$tmp"
return 1
}
apply_upgrade_worker() {
local candidate="$1" bin="$2" backup="$3" update_dir="$4" local_version="$5" latest_version="$6" rc=0
if [[ ! -x "$candidate" ]]; then
echo "upgrade candidate is missing or not executable: $candidate" >&2
rm -rf "$update_dir"
return 1
fi
if ! apply_upgrade "$candidate" "$bin" "$backup" "$local_version" "$latest_version"; then
rc=1
fi
rm -rf "$update_dir"
return "$rc"
}
cmd="${1:-start}" cmd="${1:-start}"
case "$cmd" in case "$cmd" in
start) start)
@@ -472,6 +569,11 @@ case "$cmd" in
shift || true shift || true
check_updates "$@" check_updates "$@"
;; ;;
__apply-upgrade)
shift || true
if [[ $# -ne 6 ]]; then echo "invalid upgrade worker arguments" >&2; exit 2; fi
apply_upgrade_worker "$@"
;;
-h|--help|help) -h|--help|help)
usage usage
;; ;;