Make Codex upgrades self-safe

This commit is contained in:
Codex
2026-05-25 04:25:26 +00:00
parent d03bf33a55
commit 09ff990468
2 changed files with 197 additions and 95 deletions

View File

@@ -11,7 +11,7 @@ Docker Compose runs only the Go Telegram bot. Codex runs on the host through `co
scripts/start-codex-app-server start
```
The script supports `start`, `stop`, `status`, and `check-updates [-y]`. `start` launches Codex detached, writes `run/codex-app-server.pid`, logs to `run/codex-app-server.log`, and is idempotent if the socket is already live. `check-updates` compares the local `codex` binary with the latest OpenAI Codex GitHub release; with `-y`, it downloads the matching platform archive, verifies the release digest when available, replaces the configured `CODEX_BIN`, and restarts the app-server if it was running. If the upgraded server fails to start, the script restores the previous binary and starts it again.
The script supports `start`, `stop`, `status`, and `check-updates [-y]`. `start` launches Codex detached, writes `run/codex-app-server.pid`, logs to `run/codex-app-server.log`, and is idempotent if the socket is already live. `check-updates` compares the local `codex` binary with the latest OpenAI Codex GitHub release. With `-y`, it downloads and validates the matching platform archive before stopping a running app-server. If the app-server is running, the final stop/replace/start step is handed to a detached worker so the upgrade can complete even when invoked from Codex itself. If the upgraded server fails to start, the worker restores the previous binary and starts it again.
3. Add at least one Telegram user and workspace:

View File

@@ -7,6 +7,7 @@ RUN_DIR="$ROOT/run"
PID_FILE="$RUN_DIR/codex-app-server.pid"
LOG_FILE="$RUN_DIR/codex-app-server.log"
STDIN_FIFO="$RUN_DIR/codex-app-server.stdin"
UPGRADE_LOG_FILE="$RUN_DIR/codex-app-server-upgrade.log"
CODEX_RELEASE_REPO="${CODEX_RELEASE_REPO:-openai/codex}"
INSTALL_PREFIX=()
@@ -51,6 +52,29 @@ server_pid() {
fi
}
process_group_id() {
ps -o pgid= -p "$1" 2>/dev/null | tr -d '[:space:]' || true
}
process_state() {
ps -o stat= -p "$1" 2>/dev/null | tr -d '[:space:]' || true
}
process_group_members() {
local pgid="$1"
if command -v pgrep >/dev/null 2>&1; then
pgrep -g "$pgid" 2>/dev/null || true
return 0
fi
ps -eo pid=,pgid= | awk -v pgid="$pgid" '$2 == pgid { print $1 }'
}
process_group_alive() {
local pgid="$1" members
members="$(process_group_members "$pgid")"
[[ -n "$members" ]]
}
is_running() {
[[ -n "$(server_pid)" ]]
}
@@ -70,6 +94,14 @@ show_log_excerpt() {
sed -n '1,120p' "$LOG_FILE" >&2 || true
}
codex_bin() {
if [[ -n "${CODEX_BIN:-}" ]]; then
printf '%s\n' "$CODEX_BIN"
return 0
fi
command -v codex
}
start_server() {
local old_pid pid start_codex_bin
old_pid="$(pid_from_file)"
@@ -84,6 +116,12 @@ start_server() {
rm -f "$PID_FILE"
remove_socket_if_safe
start_codex_bin="$(codex_bin)"
if [[ -z "$start_codex_bin" ]]; then
echo "codex executable not found; set CODEX_BIN" >&2
return 1
fi
rm -f "$STDIN_FIFO"
mkfifo "$STDIN_FIFO"
chmod 600 "$STDIN_FIFO"
@@ -91,7 +129,6 @@ start_server() {
: > "$LOG_FILE"
# Codex app-server currently exits if detached with stdin closed. A detached
# wrapper keeps a private FIFO writer open and then runs Codex on the host.
start_codex_bin="$(codex_bin)"
setsid -f bash -c '
echo "$$" > "$3"
tail -f /dev/null > "$1" &
@@ -134,8 +171,14 @@ start_server() {
return 1
}
finish_stopped() {
rm -f "$PID_FILE" "$STDIN_FIFO"
remove_socket_if_safe
echo "codex app-server stopped"
}
stop_server() {
local pid
local pid pgid self_pgid signal_target state
pid="$(server_pid)"
if [[ -z "$pid" ]]; then
rm -f "$PID_FILE" "$STDIN_FIFO"
@@ -144,25 +187,45 @@ stop_server() {
return 0
fi
echo "stopping codex app-server: pid=$pid"
kill "$pid" 2>/dev/null || true
pgid="$(process_group_id "$pid")"
self_pgid="$(process_group_id "$$")"
if [[ -n "$pgid" && "$pgid" != "$self_pgid" ]]; then
signal_target="-$pgid"
echo "stopping codex app-server process group: pgid=$pgid pid=$pid"
else
signal_target="$pid"
echo "stopping codex app-server: pid=$pid"
if [[ -n "$pgid" && "$pgid" == "$self_pgid" ]]; then
echo "server shares this script process group; using pid-only stop" >&2
fi
fi
kill -TERM -- "$signal_target" 2>/dev/null || true
for _ in $(seq 1 50); do
if ! kill -0 "$pid" 2>/dev/null; then
rm -f "$PID_FILE" "$STDIN_FIFO"
remove_socket_if_safe
echo "codex app-server stopped"
state="$(process_state "$pid")"
if [[ -n "$pgid" && "$signal_target" == "-$pgid" ]]; then
if ! process_group_alive "$pgid"; then
finish_stopped
return 0
fi
elif [[ -z "$state" || "$state" == Z* ]]; then
finish_stopped
return 0
fi
sleep 0.1
done
echo "codex app-server did not stop after 5 seconds; killing pid=$pid" >&2
kill -KILL "$pid" 2>/dev/null || true
echo "codex app-server did not stop after 5 seconds; killing $signal_target" >&2
kill -KILL -- "$signal_target" 2>/dev/null || true
for _ in $(seq 1 20); do
if ! kill -0 "$pid" 2>/dev/null; then
rm -f "$PID_FILE" "$STDIN_FIFO"
remove_socket_if_safe
echo "codex app-server stopped"
state="$(process_state "$pid")"
if [[ -n "$pgid" && "$signal_target" == "-$pgid" ]]; then
if ! process_group_alive "$pgid"; then
finish_stopped
return 0
fi
elif [[ -z "$state" || "$state" == Z* ]]; then
finish_stopped
return 0
fi
sleep 0.1
@@ -172,11 +235,12 @@ stop_server() {
}
status_server() {
local pid
local pid pgid
pid="$(server_pid)"
if [[ -n "$pid" ]]; then
pgid="$(process_group_id "$pid")"
if [[ -S "$HOST_CODEX_SOCKET" ]]; then
echo "codex app-server running: pid=$pid socket=$HOST_CODEX_SOCKET log=$LOG_FILE"
echo "codex app-server running: pid=$pid pgid=$pgid socket=$HOST_CODEX_SOCKET log=$LOG_FILE"
return 0
fi
echo "codex app-server pid=$pid is running but socket is missing: $HOST_CODEX_SOCKET" >&2
@@ -197,14 +261,6 @@ require_cmd() {
fi
}
codex_bin() {
if [[ -n "${CODEX_BIN:-}" ]]; then
printf '%s\n' "$CODEX_BIN"
return 0
fi
command -v codex
}
codex_version_from() {
local bin="$1" line
line="$($bin --version 2>/dev/null || true)"
@@ -339,9 +395,9 @@ restore_backup() {
fi
choose_install_prefix "$bin"
if [[ -e "$bin" ]]; then
run_install mv "$bin" "$tmp_failed" || true
run_install mv -f "$bin" "$tmp_failed" || true
fi
run_install mv "$backup" "$bin"
run_install mv -f "$backup" "$bin"
}
confirm_upgrade() {
@@ -358,80 +414,15 @@ confirm_upgrade() {
[[ "$reply" == "y" || "$reply" == "Y" || "$reply" == "yes" || "$reply" == "YES" ]]
}
check_updates() {
ASSUME_YES=0
while [[ $# -gt 0 ]]; do
case "$1" in
-y|--yes) ASSUME_YES=1 ;;
-h|--help) usage; return 0 ;;
*) echo "unknown check-updates option: $1" >&2; usage; return 2 ;;
esac
shift
done
require_cmd curl
require_cmd tar
require_cmd python3
local bin local_version target json latest_version latest_tag download_url digest archive tmp candidate candidate_version was_running backup
bin="$(codex_bin)"
if [[ -z "$bin" ]]; then
echo "codex executable not found; set CODEX_BIN" >&2
return 1
fi
if [[ "$bin" != /* ]]; then
echo "CODEX_BIN must be an absolute path: $bin" >&2
return 1
fi
local_version="$(codex_version_from "$bin")"
if [[ -z "$local_version" ]]; then
echo "could not determine local Codex version from $bin" >&2
return 1
fi
target="$(release_target)"
tmp="$(mktemp -d "$RUN_DIR/codex-update.XXXXXX")"
trap "rm -rf '$tmp'" EXIT
json="$tmp/latest.json"
curl -fsSL "https://api.github.com/repos/$CODEX_RELEASE_REPO/releases/latest" -o "$json"
mapfile -t release_info < <(latest_release_info "$target" "$json")
latest_version="${release_info[0]:-}"
latest_tag="${release_info[1]:-}"
download_url="${release_info[2]:-}"
digest="${release_info[3]:-}"
if [[ -z "$latest_version" || -z "$download_url" ]]; then
echo "could not determine latest Codex release for $target" >&2
return 1
fi
if ! version_gt "$latest_version" "$local_version"; then
echo "Codex is already current: $local_version (latest $latest_version)"
return 0
fi
echo "Codex update available: $local_version -> $latest_version ($latest_tag)"
confirm_upgrade "$local_version" "$latest_version" "$bin"
archive="$tmp/codex-$target.tar.gz"
curl -fL "$download_url" -o "$archive"
verify_digest "$archive" "$digest"
candidate="$(extract_codex_binary "$archive" "$tmp")"
candidate_version="$(codex_version_from "$candidate")"
if [[ "$candidate_version" != "$latest_version" ]]; then
echo "downloaded Codex version $candidate_version does not match release $latest_version" >&2
return 1
fi
was_running=0
apply_upgrade() {
local candidate="$1" bin="$2" backup="$3" local_version="$4" latest_version="$5" was_running=0
if is_running; then
was_running=1
fi
backup="$bin.bak.$(date -u +%Y%m%d%H%M%S)"
if [[ "$was_running" == "1" ]]; then
stop_server
fi
if ! install_candidate "$candidate" "$bin" "$backup"; then
echo "failed to install Codex update" >&2
if [[ "$was_running" == "1" ]]; then
start_server || true
fi
@@ -451,6 +442,112 @@ check_updates() {
echo "backup: $backup"
}
handoff_upgrade() {
local candidate="$1" bin="$2" backup="$3" update_dir="$4" local_version="$5" latest_version="$6"
: > "$UPGRADE_LOG_FILE"
setsid -f bash -c '
sleep 1
"$0" __apply-upgrade "$1" "$2" "$3" "$4" "$5" "$6"
' "$0" "$candidate" "$bin" "$backup" "$update_dir" "$local_version" "$latest_version" >> "$UPGRADE_LOG_FILE" 2>&1
echo "Codex upgrade handoff started; app-server will restart if replacement succeeds. log=$UPGRADE_LOG_FILE"
}
check_updates() {
ASSUME_YES=0
while [[ $# -gt 0 ]]; do
case "$1" in
-y|--yes) ASSUME_YES=1 ;;
-h|--help) usage; return 0 ;;
*) echo "unknown check-updates option: $1" >&2; usage; return 2 ;;
esac
shift
done
require_cmd curl
require_cmd tar
require_cmd python3
require_cmd ps
local bin local_version target json latest_version latest_tag download_url digest archive tmp candidate candidate_version backup
bin="$(codex_bin)"
if [[ -z "$bin" ]]; then
echo "codex executable not found; set CODEX_BIN" >&2
return 1
fi
if [[ "$bin" != /* ]]; then
echo "CODEX_BIN must be an absolute path: $bin" >&2
return 1
fi
local_version="$(codex_version_from "$bin")"
if [[ -z "$local_version" ]]; then
echo "could not determine local Codex version from $bin" >&2
return 1
fi
target="$(release_target)"
tmp="$(mktemp -d "$RUN_DIR/codex-update.XXXXXX")"
json="$tmp/latest.json"
curl -fsSL "https://api.github.com/repos/$CODEX_RELEASE_REPO/releases/latest" -o "$json"
mapfile -t release_info < <(latest_release_info "$target" "$json")
latest_version="${release_info[0]:-}"
latest_tag="${release_info[1]:-}"
download_url="${release_info[2]:-}"
digest="${release_info[3]:-}"
if [[ -z "$latest_version" || -z "$download_url" ]]; then
rm -rf "$tmp"
echo "could not determine latest Codex release for $target" >&2
return 1
fi
if ! version_gt "$latest_version" "$local_version"; then
rm -rf "$tmp"
echo "Codex is already current: $local_version (latest $latest_version)"
return 0
fi
echo "Codex update available: $local_version -> $latest_version ($latest_tag)"
confirm_upgrade "$local_version" "$latest_version" "$bin"
archive="$tmp/codex-$target.tar.gz"
curl -fL "$download_url" -o "$archive"
verify_digest "$archive" "$digest"
candidate="$(extract_codex_binary "$archive" "$tmp")"
candidate_version="$(codex_version_from "$candidate")"
if [[ "$candidate_version" != "$latest_version" ]]; then
rm -rf "$tmp"
echo "downloaded Codex version $candidate_version does not match release $latest_version" >&2
return 1
fi
backup="$bin.bak.$(date -u +%Y%m%d%H%M%S)"
choose_install_prefix "$bin"
if is_running; then
handoff_upgrade "$candidate" "$bin" "$backup" "$tmp" "$local_version" "$latest_version"
return 0
fi
if apply_upgrade "$candidate" "$bin" "$backup" "$local_version" "$latest_version"; then
rm -rf "$tmp"
return 0
fi
rm -rf "$tmp"
return 1
}
apply_upgrade_worker() {
local candidate="$1" bin="$2" backup="$3" update_dir="$4" local_version="$5" latest_version="$6" rc=0
if [[ ! -x "$candidate" ]]; then
echo "upgrade candidate is missing or not executable: $candidate" >&2
rm -rf "$update_dir"
return 1
fi
if ! apply_upgrade "$candidate" "$bin" "$backup" "$local_version" "$latest_version"; then
rc=1
fi
rm -rf "$update_dir"
return "$rc"
}
cmd="${1:-start}"
case "$cmd" in
start)
@@ -472,6 +569,11 @@ case "$cmd" in
shift || true
check_updates "$@"
;;
__apply-upgrade)
shift || true
if [[ $# -ne 6 ]]; then echo "invalid upgrade worker arguments" >&2; exit 2; fi
apply_upgrade_worker "$@"
;;
-h|--help|help)
usage
;;