Allow assistant photo directives

This commit is contained in:
Codex
2026-05-21 12:28:57 +00:00
parent e9425c6d9b
commit 1a74c02173
6 changed files with 272 additions and 5 deletions

View File

@@ -20,9 +20,12 @@ import (
)
const (
telegramDownloadLimit = 20 * 1024 * 1024
resumeThreadPageSize = 8
commandSummaryLimit = 120
telegramDownloadLimit = 20 * 1024 * 1024
resumeThreadPageSize = 8
commandSummaryLimit = 120
telegramPhotoDirectiveStart = "<!-- telegram-photo "
telegramPhotoDirectiveEnd = " -->"
telegramPhotoCaptionLimit = 1024
)
type Bot struct {
@@ -41,11 +44,22 @@ type Bot struct {
diffs map[string]string
}
type assistantMessageSegment struct {
Text string
Photo *assistantPhotoDirective
}
type assistantPhotoDirective struct {
Path string `json:"path"`
Caption string `json:"caption,omitempty"`
}
type outputState struct {
chatID int64
assistant strings.Builder
sentAny bool
tools map[string]toolMessageState
sentImages map[string]bool
workingIndicatorOff context.CancelFunc
}
@@ -689,6 +703,15 @@ func (b *Bot) handleSandboxCommand(ctx context.Context, userID, chatID int64, se
return err
}
func isPicturePath(path string) bool {
switch strings.ToLower(filepath.Ext(path)) {
case ".jpg", ".jpeg", ".png", ".webp", ".gif":
return true
default:
return false
}
}
func (b *Bot) sendDiff(ctx context.Context, chatID int64, session store.Session) error {
if session.ActiveThreadID == 0 {
_, err := b.tg.SendMessage(ctx, chatID, "No active thread.", SendMessageOptions{})
@@ -1333,7 +1356,10 @@ func (b *Bot) handleCodexNotification(ctx context.Context, event codexapp.Event)
return b.flushAssistantMessage(ctx, params.ThreadID)
}
if params.ThreadID != "" {
return b.upsertToolMessage(ctx, params.ThreadID, item.ID, renderCodexItemCompleted(item))
if err := b.upsertToolMessage(ctx, params.ThreadID, item.ID, renderCodexItemCompleted(item)); err != nil {
return err
}
return b.sendImageOutput(ctx, params.ThreadID, item)
}
case "turn/diff/updated":
var params struct {
@@ -1456,6 +1482,7 @@ func (b *Bot) newOutputState(chatID int64) *outputState {
return &outputState{
chatID: chatID,
tools: make(map[string]toolMessageState),
sentImages: make(map[string]bool),
workingIndicatorOff: b.startWorkingIndicator(chatID),
}
}
@@ -1550,6 +1577,52 @@ func (b *Bot) failActiveOutputs(ctx context.Context, message string) {
}
}
func (b *Bot) sendImageOutput(ctx context.Context, threadID string, item codexThreadItemView) error {
if item.Type != "imageGeneration" || strings.TrimSpace(item.SavedPath) == "" {
return nil
}
path := strings.TrimSpace(item.SavedPath)
if !b.markImageOutputPending(threadID, path) {
return nil
}
data, err := os.ReadFile(path)
if err != nil {
b.logger.Printf("read generated image %s: %v", path, err)
return nil
}
chatID, err := b.outputChatID(ctx, threadID)
if err != nil {
return nil
}
caption := "Generated image"
if item.Status != "" {
caption += ": " + item.Status
}
if _, err := b.tg.SendPhotoBytes(ctx, chatID, path, data, caption); err != nil {
b.logger.Printf("send generated image %s: %v", path, err)
return nil
}
b.markOutputSent(threadID)
return nil
}
func (b *Bot) markImageOutputPending(threadID, path string) bool {
b.mu.Lock()
defer b.mu.Unlock()
state := b.outputs[threadID]
if state == nil {
return false
}
if state.sentImages == nil {
state.sentImages = make(map[string]bool)
}
if state.sentImages[path] {
return false
}
state.sentImages[path] = true
return true
}
func (b *Bot) sendOutputBlock(ctx context.Context, threadID, block string) error {
block = strings.TrimSpace(block)
if block == "" {
@@ -1764,6 +1837,100 @@ func ignoreTelegramMessageNotModified(err error) error {
}
return err
}
func splitAssistantMessageSegments(text string) []assistantMessageSegment {
var segments []assistantMessageSegment
var visible strings.Builder
flushVisible := func() {
if visible.Len() == 0 {
return
}
segments = append(segments, assistantMessageSegment{Text: visible.String()})
visible.Reset()
}
for _, line := range strings.SplitAfter(text, "\n") {
body := strings.TrimSuffix(line, "\n")
body = strings.TrimSuffix(body, "\r")
if directive, ok := parseAssistantPhotoDirectiveLine(body); ok {
flushVisible()
segments = append(segments, assistantMessageSegment{Photo: &directive})
continue
}
visible.WriteString(line)
}
flushVisible()
return segments
}
func parseAssistantPhotoDirectiveLine(line string) (assistantPhotoDirective, bool) {
trimmed := strings.TrimSpace(line)
if !strings.HasPrefix(trimmed, telegramPhotoDirectiveStart) || !strings.HasSuffix(trimmed, telegramPhotoDirectiveEnd) {
return assistantPhotoDirective{}, false
}
raw := strings.TrimSuffix(strings.TrimPrefix(trimmed, telegramPhotoDirectiveStart), telegramPhotoDirectiveEnd)
raw = strings.TrimSpace(raw)
var directive assistantPhotoDirective
if err := json.Unmarshal([]byte(raw), &directive); err != nil {
return assistantPhotoDirective{}, false
}
directive.Path = strings.TrimSpace(directive.Path)
directive.Caption = strings.TrimSpace(directive.Caption)
return directive, true
}
func (b *Bot) sendAssistantText(ctx context.Context, chatID int64, text string) error {
for _, segment := range splitAssistantMessageSegments(text) {
if segment.Text != "" && strings.TrimSpace(segment.Text) != "" {
if err := b.sendLong(ctx, chatID, segment.Text); err != nil {
return err
}
}
if segment.Photo != nil {
if err := b.sendAssistantPhoto(ctx, chatID, *segment.Photo); err != nil {
b.logger.Printf("send assistant photo: %v", err)
if sendErr := b.sendLong(ctx, chatID, "Could not send photo: "+err.Error()); sendErr != nil {
return sendErr
}
}
}
}
return nil
}
func (b *Bot) sendAssistantPhoto(ctx context.Context, chatID int64, directive assistantPhotoDirective) error {
path := strings.TrimSpace(directive.Path)
if path == "" {
return errors.New("photo directive is missing a path")
}
if !filepath.IsAbs(path) {
return fmt.Errorf("photo path must be absolute: %s", path)
}
if !isPicturePath(path) {
return fmt.Errorf("unsupported photo type: %s", filepath.Base(path))
}
data, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("read %s: %v", filepath.Base(path), err)
}
caption := truncateTelegramPhotoCaption(directive.Caption)
if _, err := b.tg.SendPhotoBytes(ctx, chatID, path, data, caption); err != nil {
return fmt.Errorf("send %s: %v", filepath.Base(path), err)
}
return nil
}
func truncateTelegramPhotoCaption(caption string) string {
runes := []rune(caption)
if len(runes) <= telegramPhotoCaptionLimit {
return caption
}
if telegramPhotoCaptionLimit <= 3 {
return string(runes[:telegramPhotoCaptionLimit])
}
return string(runes[:telegramPhotoCaptionLimit-3]) + "..."
}
func (b *Bot) appendAssistantDelta(ctx context.Context, threadID, delta string) error {
if delta == "" {
return nil
@@ -1792,7 +1959,7 @@ func (b *Bot) flushAssistantMessage(ctx context.Context, threadID string) error
state.assistant.Reset()
b.mu.Unlock()
if err := b.sendLong(ctx, chatID, text); err != nil {
if err := b.sendAssistantText(ctx, chatID, text); err != nil {
return err
}
b.markOutputSent(threadID)