Make Codex upgrades self-safe
This commit is contained in:
@@ -11,7 +11,7 @@ Docker Compose runs only the Go Telegram bot. Codex runs on the host through `co
|
||||
scripts/start-codex-app-server start
|
||||
```
|
||||
|
||||
The script supports `start`, `stop`, `status`, and `check-updates [-y]`. `start` launches Codex detached, writes `run/codex-app-server.pid`, logs to `run/codex-app-server.log`, and is idempotent if the socket is already live. `check-updates` compares the local `codex` binary with the latest OpenAI Codex GitHub release; with `-y`, it downloads the matching platform archive, verifies the release digest when available, replaces the configured `CODEX_BIN`, and restarts the app-server if it was running. If the upgraded server fails to start, the script restores the previous binary and starts it again.
|
||||
The script supports `start`, `stop`, `status`, and `check-updates [-y]`. `start` launches Codex detached, writes `run/codex-app-server.pid`, logs to `run/codex-app-server.log`, and is idempotent if the socket is already live. `check-updates` compares the local `codex` binary with the latest OpenAI Codex GitHub release. With `-y`, it downloads and validates the matching platform archive before stopping a running app-server. If the app-server is running, the final stop/replace/start step is handed to a detached worker so the upgrade can complete even when invoked from Codex itself. If the upgraded server fails to start, the worker restores the previous binary and starts it again.
|
||||
|
||||
3. Add at least one Telegram user and workspace:
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ RUN_DIR="$ROOT/run"
|
||||
PID_FILE="$RUN_DIR/codex-app-server.pid"
|
||||
LOG_FILE="$RUN_DIR/codex-app-server.log"
|
||||
STDIN_FIFO="$RUN_DIR/codex-app-server.stdin"
|
||||
UPGRADE_LOG_FILE="$RUN_DIR/codex-app-server-upgrade.log"
|
||||
CODEX_RELEASE_REPO="${CODEX_RELEASE_REPO:-openai/codex}"
|
||||
INSTALL_PREFIX=()
|
||||
|
||||
@@ -51,6 +52,29 @@ server_pid() {
|
||||
fi
|
||||
}
|
||||
|
||||
process_group_id() {
|
||||
ps -o pgid= -p "$1" 2>/dev/null | tr -d '[:space:]' || true
|
||||
}
|
||||
|
||||
process_state() {
|
||||
ps -o stat= -p "$1" 2>/dev/null | tr -d '[:space:]' || true
|
||||
}
|
||||
|
||||
process_group_members() {
|
||||
local pgid="$1"
|
||||
if command -v pgrep >/dev/null 2>&1; then
|
||||
pgrep -g "$pgid" 2>/dev/null || true
|
||||
return 0
|
||||
fi
|
||||
ps -eo pid=,pgid= | awk -v pgid="$pgid" '$2 == pgid { print $1 }'
|
||||
}
|
||||
|
||||
process_group_alive() {
|
||||
local pgid="$1" members
|
||||
members="$(process_group_members "$pgid")"
|
||||
[[ -n "$members" ]]
|
||||
}
|
||||
|
||||
is_running() {
|
||||
[[ -n "$(server_pid)" ]]
|
||||
}
|
||||
@@ -70,6 +94,14 @@ show_log_excerpt() {
|
||||
sed -n '1,120p' "$LOG_FILE" >&2 || true
|
||||
}
|
||||
|
||||
codex_bin() {
|
||||
if [[ -n "${CODEX_BIN:-}" ]]; then
|
||||
printf '%s\n' "$CODEX_BIN"
|
||||
return 0
|
||||
fi
|
||||
command -v codex
|
||||
}
|
||||
|
||||
start_server() {
|
||||
local old_pid pid start_codex_bin
|
||||
old_pid="$(pid_from_file)"
|
||||
@@ -84,6 +116,12 @@ start_server() {
|
||||
rm -f "$PID_FILE"
|
||||
remove_socket_if_safe
|
||||
|
||||
start_codex_bin="$(codex_bin)"
|
||||
if [[ -z "$start_codex_bin" ]]; then
|
||||
echo "codex executable not found; set CODEX_BIN" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
rm -f "$STDIN_FIFO"
|
||||
mkfifo "$STDIN_FIFO"
|
||||
chmod 600 "$STDIN_FIFO"
|
||||
@@ -91,7 +129,6 @@ start_server() {
|
||||
: > "$LOG_FILE"
|
||||
# Codex app-server currently exits if detached with stdin closed. A detached
|
||||
# wrapper keeps a private FIFO writer open and then runs Codex on the host.
|
||||
start_codex_bin="$(codex_bin)"
|
||||
setsid -f bash -c '
|
||||
echo "$$" > "$3"
|
||||
tail -f /dev/null > "$1" &
|
||||
@@ -134,8 +171,14 @@ start_server() {
|
||||
return 1
|
||||
}
|
||||
|
||||
finish_stopped() {
|
||||
rm -f "$PID_FILE" "$STDIN_FIFO"
|
||||
remove_socket_if_safe
|
||||
echo "codex app-server stopped"
|
||||
}
|
||||
|
||||
stop_server() {
|
||||
local pid
|
||||
local pid pgid self_pgid signal_target state
|
||||
pid="$(server_pid)"
|
||||
if [[ -z "$pid" ]]; then
|
||||
rm -f "$PID_FILE" "$STDIN_FIFO"
|
||||
@@ -144,25 +187,45 @@ stop_server() {
|
||||
return 0
|
||||
fi
|
||||
|
||||
pgid="$(process_group_id "$pid")"
|
||||
self_pgid="$(process_group_id "$$")"
|
||||
if [[ -n "$pgid" && "$pgid" != "$self_pgid" ]]; then
|
||||
signal_target="-$pgid"
|
||||
echo "stopping codex app-server process group: pgid=$pgid pid=$pid"
|
||||
else
|
||||
signal_target="$pid"
|
||||
echo "stopping codex app-server: pid=$pid"
|
||||
kill "$pid" 2>/dev/null || true
|
||||
if [[ -n "$pgid" && "$pgid" == "$self_pgid" ]]; then
|
||||
echo "server shares this script process group; using pid-only stop" >&2
|
||||
fi
|
||||
fi
|
||||
|
||||
kill -TERM -- "$signal_target" 2>/dev/null || true
|
||||
for _ in $(seq 1 50); do
|
||||
if ! kill -0 "$pid" 2>/dev/null; then
|
||||
rm -f "$PID_FILE" "$STDIN_FIFO"
|
||||
remove_socket_if_safe
|
||||
echo "codex app-server stopped"
|
||||
state="$(process_state "$pid")"
|
||||
if [[ -n "$pgid" && "$signal_target" == "-$pgid" ]]; then
|
||||
if ! process_group_alive "$pgid"; then
|
||||
finish_stopped
|
||||
return 0
|
||||
fi
|
||||
elif [[ -z "$state" || "$state" == Z* ]]; then
|
||||
finish_stopped
|
||||
return 0
|
||||
fi
|
||||
sleep 0.1
|
||||
done
|
||||
|
||||
echo "codex app-server did not stop after 5 seconds; killing pid=$pid" >&2
|
||||
kill -KILL "$pid" 2>/dev/null || true
|
||||
echo "codex app-server did not stop after 5 seconds; killing $signal_target" >&2
|
||||
kill -KILL -- "$signal_target" 2>/dev/null || true
|
||||
for _ in $(seq 1 20); do
|
||||
if ! kill -0 "$pid" 2>/dev/null; then
|
||||
rm -f "$PID_FILE" "$STDIN_FIFO"
|
||||
remove_socket_if_safe
|
||||
echo "codex app-server stopped"
|
||||
state="$(process_state "$pid")"
|
||||
if [[ -n "$pgid" && "$signal_target" == "-$pgid" ]]; then
|
||||
if ! process_group_alive "$pgid"; then
|
||||
finish_stopped
|
||||
return 0
|
||||
fi
|
||||
elif [[ -z "$state" || "$state" == Z* ]]; then
|
||||
finish_stopped
|
||||
return 0
|
||||
fi
|
||||
sleep 0.1
|
||||
@@ -172,11 +235,12 @@ stop_server() {
|
||||
}
|
||||
|
||||
status_server() {
|
||||
local pid
|
||||
local pid pgid
|
||||
pid="$(server_pid)"
|
||||
if [[ -n "$pid" ]]; then
|
||||
pgid="$(process_group_id "$pid")"
|
||||
if [[ -S "$HOST_CODEX_SOCKET" ]]; then
|
||||
echo "codex app-server running: pid=$pid socket=$HOST_CODEX_SOCKET log=$LOG_FILE"
|
||||
echo "codex app-server running: pid=$pid pgid=$pgid socket=$HOST_CODEX_SOCKET log=$LOG_FILE"
|
||||
return 0
|
||||
fi
|
||||
echo "codex app-server pid=$pid is running but socket is missing: $HOST_CODEX_SOCKET" >&2
|
||||
@@ -197,14 +261,6 @@ require_cmd() {
|
||||
fi
|
||||
}
|
||||
|
||||
codex_bin() {
|
||||
if [[ -n "${CODEX_BIN:-}" ]]; then
|
||||
printf '%s\n' "$CODEX_BIN"
|
||||
return 0
|
||||
fi
|
||||
command -v codex
|
||||
}
|
||||
|
||||
codex_version_from() {
|
||||
local bin="$1" line
|
||||
line="$($bin --version 2>/dev/null || true)"
|
||||
@@ -339,9 +395,9 @@ restore_backup() {
|
||||
fi
|
||||
choose_install_prefix "$bin"
|
||||
if [[ -e "$bin" ]]; then
|
||||
run_install mv "$bin" "$tmp_failed" || true
|
||||
run_install mv -f "$bin" "$tmp_failed" || true
|
||||
fi
|
||||
run_install mv "$backup" "$bin"
|
||||
run_install mv -f "$backup" "$bin"
|
||||
}
|
||||
|
||||
confirm_upgrade() {
|
||||
@@ -358,80 +414,15 @@ confirm_upgrade() {
|
||||
[[ "$reply" == "y" || "$reply" == "Y" || "$reply" == "yes" || "$reply" == "YES" ]]
|
||||
}
|
||||
|
||||
check_updates() {
|
||||
ASSUME_YES=0
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-y|--yes) ASSUME_YES=1 ;;
|
||||
-h|--help) usage; return 0 ;;
|
||||
*) echo "unknown check-updates option: $1" >&2; usage; return 2 ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
require_cmd curl
|
||||
require_cmd tar
|
||||
require_cmd python3
|
||||
|
||||
local bin local_version target json latest_version latest_tag download_url digest archive tmp candidate candidate_version was_running backup
|
||||
bin="$(codex_bin)"
|
||||
if [[ -z "$bin" ]]; then
|
||||
echo "codex executable not found; set CODEX_BIN" >&2
|
||||
return 1
|
||||
fi
|
||||
if [[ "$bin" != /* ]]; then
|
||||
echo "CODEX_BIN must be an absolute path: $bin" >&2
|
||||
return 1
|
||||
fi
|
||||
local_version="$(codex_version_from "$bin")"
|
||||
if [[ -z "$local_version" ]]; then
|
||||
echo "could not determine local Codex version from $bin" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
target="$(release_target)"
|
||||
tmp="$(mktemp -d "$RUN_DIR/codex-update.XXXXXX")"
|
||||
trap "rm -rf '$tmp'" EXIT
|
||||
json="$tmp/latest.json"
|
||||
curl -fsSL "https://api.github.com/repos/$CODEX_RELEASE_REPO/releases/latest" -o "$json"
|
||||
mapfile -t release_info < <(latest_release_info "$target" "$json")
|
||||
latest_version="${release_info[0]:-}"
|
||||
latest_tag="${release_info[1]:-}"
|
||||
download_url="${release_info[2]:-}"
|
||||
digest="${release_info[3]:-}"
|
||||
if [[ -z "$latest_version" || -z "$download_url" ]]; then
|
||||
echo "could not determine latest Codex release for $target" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! version_gt "$latest_version" "$local_version"; then
|
||||
echo "Codex is already current: $local_version (latest $latest_version)"
|
||||
return 0
|
||||
fi
|
||||
echo "Codex update available: $local_version -> $latest_version ($latest_tag)"
|
||||
confirm_upgrade "$local_version" "$latest_version" "$bin"
|
||||
|
||||
archive="$tmp/codex-$target.tar.gz"
|
||||
curl -fL "$download_url" -o "$archive"
|
||||
verify_digest "$archive" "$digest"
|
||||
candidate="$(extract_codex_binary "$archive" "$tmp")"
|
||||
candidate_version="$(codex_version_from "$candidate")"
|
||||
if [[ "$candidate_version" != "$latest_version" ]]; then
|
||||
echo "downloaded Codex version $candidate_version does not match release $latest_version" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
was_running=0
|
||||
apply_upgrade() {
|
||||
local candidate="$1" bin="$2" backup="$3" local_version="$4" latest_version="$5" was_running=0
|
||||
if is_running; then
|
||||
was_running=1
|
||||
fi
|
||||
backup="$bin.bak.$(date -u +%Y%m%d%H%M%S)"
|
||||
|
||||
if [[ "$was_running" == "1" ]]; then
|
||||
stop_server
|
||||
fi
|
||||
|
||||
if ! install_candidate "$candidate" "$bin" "$backup"; then
|
||||
echo "failed to install Codex update" >&2
|
||||
if [[ "$was_running" == "1" ]]; then
|
||||
start_server || true
|
||||
fi
|
||||
@@ -451,6 +442,112 @@ check_updates() {
|
||||
echo "backup: $backup"
|
||||
}
|
||||
|
||||
handoff_upgrade() {
|
||||
local candidate="$1" bin="$2" backup="$3" update_dir="$4" local_version="$5" latest_version="$6"
|
||||
: > "$UPGRADE_LOG_FILE"
|
||||
setsid -f bash -c '
|
||||
sleep 1
|
||||
"$0" __apply-upgrade "$1" "$2" "$3" "$4" "$5" "$6"
|
||||
' "$0" "$candidate" "$bin" "$backup" "$update_dir" "$local_version" "$latest_version" >> "$UPGRADE_LOG_FILE" 2>&1
|
||||
echo "Codex upgrade handoff started; app-server will restart if replacement succeeds. log=$UPGRADE_LOG_FILE"
|
||||
}
|
||||
|
||||
check_updates() {
|
||||
ASSUME_YES=0
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-y|--yes) ASSUME_YES=1 ;;
|
||||
-h|--help) usage; return 0 ;;
|
||||
*) echo "unknown check-updates option: $1" >&2; usage; return 2 ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
require_cmd curl
|
||||
require_cmd tar
|
||||
require_cmd python3
|
||||
require_cmd ps
|
||||
|
||||
local bin local_version target json latest_version latest_tag download_url digest archive tmp candidate candidate_version backup
|
||||
bin="$(codex_bin)"
|
||||
if [[ -z "$bin" ]]; then
|
||||
echo "codex executable not found; set CODEX_BIN" >&2
|
||||
return 1
|
||||
fi
|
||||
if [[ "$bin" != /* ]]; then
|
||||
echo "CODEX_BIN must be an absolute path: $bin" >&2
|
||||
return 1
|
||||
fi
|
||||
local_version="$(codex_version_from "$bin")"
|
||||
if [[ -z "$local_version" ]]; then
|
||||
echo "could not determine local Codex version from $bin" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
target="$(release_target)"
|
||||
tmp="$(mktemp -d "$RUN_DIR/codex-update.XXXXXX")"
|
||||
json="$tmp/latest.json"
|
||||
curl -fsSL "https://api.github.com/repos/$CODEX_RELEASE_REPO/releases/latest" -o "$json"
|
||||
mapfile -t release_info < <(latest_release_info "$target" "$json")
|
||||
latest_version="${release_info[0]:-}"
|
||||
latest_tag="${release_info[1]:-}"
|
||||
download_url="${release_info[2]:-}"
|
||||
digest="${release_info[3]:-}"
|
||||
if [[ -z "$latest_version" || -z "$download_url" ]]; then
|
||||
rm -rf "$tmp"
|
||||
echo "could not determine latest Codex release for $target" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! version_gt "$latest_version" "$local_version"; then
|
||||
rm -rf "$tmp"
|
||||
echo "Codex is already current: $local_version (latest $latest_version)"
|
||||
return 0
|
||||
fi
|
||||
echo "Codex update available: $local_version -> $latest_version ($latest_tag)"
|
||||
confirm_upgrade "$local_version" "$latest_version" "$bin"
|
||||
|
||||
archive="$tmp/codex-$target.tar.gz"
|
||||
curl -fL "$download_url" -o "$archive"
|
||||
verify_digest "$archive" "$digest"
|
||||
candidate="$(extract_codex_binary "$archive" "$tmp")"
|
||||
candidate_version="$(codex_version_from "$candidate")"
|
||||
if [[ "$candidate_version" != "$latest_version" ]]; then
|
||||
rm -rf "$tmp"
|
||||
echo "downloaded Codex version $candidate_version does not match release $latest_version" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
backup="$bin.bak.$(date -u +%Y%m%d%H%M%S)"
|
||||
choose_install_prefix "$bin"
|
||||
|
||||
if is_running; then
|
||||
handoff_upgrade "$candidate" "$bin" "$backup" "$tmp" "$local_version" "$latest_version"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if apply_upgrade "$candidate" "$bin" "$backup" "$local_version" "$latest_version"; then
|
||||
rm -rf "$tmp"
|
||||
return 0
|
||||
fi
|
||||
rm -rf "$tmp"
|
||||
return 1
|
||||
}
|
||||
|
||||
apply_upgrade_worker() {
|
||||
local candidate="$1" bin="$2" backup="$3" update_dir="$4" local_version="$5" latest_version="$6" rc=0
|
||||
if [[ ! -x "$candidate" ]]; then
|
||||
echo "upgrade candidate is missing or not executable: $candidate" >&2
|
||||
rm -rf "$update_dir"
|
||||
return 1
|
||||
fi
|
||||
if ! apply_upgrade "$candidate" "$bin" "$backup" "$local_version" "$latest_version"; then
|
||||
rc=1
|
||||
fi
|
||||
rm -rf "$update_dir"
|
||||
return "$rc"
|
||||
}
|
||||
|
||||
cmd="${1:-start}"
|
||||
case "$cmd" in
|
||||
start)
|
||||
@@ -472,6 +569,11 @@ case "$cmd" in
|
||||
shift || true
|
||||
check_updates "$@"
|
||||
;;
|
||||
__apply-upgrade)
|
||||
shift || true
|
||||
if [[ $# -ne 6 ]]; then echo "invalid upgrade worker arguments" >&2; exit 2; fi
|
||||
apply_upgrade_worker "$@"
|
||||
;;
|
||||
-h|--help|help)
|
||||
usage
|
||||
;;
|
||||
|
||||
Reference in New Issue
Block a user