Allow assistant photo directives
This commit is contained in:
@@ -20,9 +20,12 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
telegramDownloadLimit = 20 * 1024 * 1024
|
||||
resumeThreadPageSize = 8
|
||||
commandSummaryLimit = 120
|
||||
telegramDownloadLimit = 20 * 1024 * 1024
|
||||
resumeThreadPageSize = 8
|
||||
commandSummaryLimit = 120
|
||||
telegramPhotoDirectiveStart = "<!-- telegram-photo "
|
||||
telegramPhotoDirectiveEnd = " -->"
|
||||
telegramPhotoCaptionLimit = 1024
|
||||
)
|
||||
|
||||
type Bot struct {
|
||||
@@ -41,11 +44,22 @@ type Bot struct {
|
||||
diffs map[string]string
|
||||
}
|
||||
|
||||
type assistantMessageSegment struct {
|
||||
Text string
|
||||
Photo *assistantPhotoDirective
|
||||
}
|
||||
|
||||
type assistantPhotoDirective struct {
|
||||
Path string `json:"path"`
|
||||
Caption string `json:"caption,omitempty"`
|
||||
}
|
||||
|
||||
type outputState struct {
|
||||
chatID int64
|
||||
assistant strings.Builder
|
||||
sentAny bool
|
||||
tools map[string]toolMessageState
|
||||
sentImages map[string]bool
|
||||
workingIndicatorOff context.CancelFunc
|
||||
}
|
||||
|
||||
@@ -689,6 +703,15 @@ func (b *Bot) handleSandboxCommand(ctx context.Context, userID, chatID int64, se
|
||||
return err
|
||||
}
|
||||
|
||||
func isPicturePath(path string) bool {
|
||||
switch strings.ToLower(filepath.Ext(path)) {
|
||||
case ".jpg", ".jpeg", ".png", ".webp", ".gif":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Bot) sendDiff(ctx context.Context, chatID int64, session store.Session) error {
|
||||
if session.ActiveThreadID == 0 {
|
||||
_, err := b.tg.SendMessage(ctx, chatID, "No active thread.", SendMessageOptions{})
|
||||
@@ -1333,7 +1356,10 @@ func (b *Bot) handleCodexNotification(ctx context.Context, event codexapp.Event)
|
||||
return b.flushAssistantMessage(ctx, params.ThreadID)
|
||||
}
|
||||
if params.ThreadID != "" {
|
||||
return b.upsertToolMessage(ctx, params.ThreadID, item.ID, renderCodexItemCompleted(item))
|
||||
if err := b.upsertToolMessage(ctx, params.ThreadID, item.ID, renderCodexItemCompleted(item)); err != nil {
|
||||
return err
|
||||
}
|
||||
return b.sendImageOutput(ctx, params.ThreadID, item)
|
||||
}
|
||||
case "turn/diff/updated":
|
||||
var params struct {
|
||||
@@ -1456,6 +1482,7 @@ func (b *Bot) newOutputState(chatID int64) *outputState {
|
||||
return &outputState{
|
||||
chatID: chatID,
|
||||
tools: make(map[string]toolMessageState),
|
||||
sentImages: make(map[string]bool),
|
||||
workingIndicatorOff: b.startWorkingIndicator(chatID),
|
||||
}
|
||||
}
|
||||
@@ -1550,6 +1577,52 @@ func (b *Bot) failActiveOutputs(ctx context.Context, message string) {
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Bot) sendImageOutput(ctx context.Context, threadID string, item codexThreadItemView) error {
|
||||
if item.Type != "imageGeneration" || strings.TrimSpace(item.SavedPath) == "" {
|
||||
return nil
|
||||
}
|
||||
path := strings.TrimSpace(item.SavedPath)
|
||||
if !b.markImageOutputPending(threadID, path) {
|
||||
return nil
|
||||
}
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
b.logger.Printf("read generated image %s: %v", path, err)
|
||||
return nil
|
||||
}
|
||||
chatID, err := b.outputChatID(ctx, threadID)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
caption := "Generated image"
|
||||
if item.Status != "" {
|
||||
caption += ": " + item.Status
|
||||
}
|
||||
if _, err := b.tg.SendPhotoBytes(ctx, chatID, path, data, caption); err != nil {
|
||||
b.logger.Printf("send generated image %s: %v", path, err)
|
||||
return nil
|
||||
}
|
||||
b.markOutputSent(threadID)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Bot) markImageOutputPending(threadID, path string) bool {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
state := b.outputs[threadID]
|
||||
if state == nil {
|
||||
return false
|
||||
}
|
||||
if state.sentImages == nil {
|
||||
state.sentImages = make(map[string]bool)
|
||||
}
|
||||
if state.sentImages[path] {
|
||||
return false
|
||||
}
|
||||
state.sentImages[path] = true
|
||||
return true
|
||||
}
|
||||
|
||||
func (b *Bot) sendOutputBlock(ctx context.Context, threadID, block string) error {
|
||||
block = strings.TrimSpace(block)
|
||||
if block == "" {
|
||||
@@ -1764,6 +1837,100 @@ func ignoreTelegramMessageNotModified(err error) error {
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func splitAssistantMessageSegments(text string) []assistantMessageSegment {
|
||||
var segments []assistantMessageSegment
|
||||
var visible strings.Builder
|
||||
flushVisible := func() {
|
||||
if visible.Len() == 0 {
|
||||
return
|
||||
}
|
||||
segments = append(segments, assistantMessageSegment{Text: visible.String()})
|
||||
visible.Reset()
|
||||
}
|
||||
|
||||
for _, line := range strings.SplitAfter(text, "\n") {
|
||||
body := strings.TrimSuffix(line, "\n")
|
||||
body = strings.TrimSuffix(body, "\r")
|
||||
if directive, ok := parseAssistantPhotoDirectiveLine(body); ok {
|
||||
flushVisible()
|
||||
segments = append(segments, assistantMessageSegment{Photo: &directive})
|
||||
continue
|
||||
}
|
||||
visible.WriteString(line)
|
||||
}
|
||||
flushVisible()
|
||||
return segments
|
||||
}
|
||||
|
||||
func parseAssistantPhotoDirectiveLine(line string) (assistantPhotoDirective, bool) {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if !strings.HasPrefix(trimmed, telegramPhotoDirectiveStart) || !strings.HasSuffix(trimmed, telegramPhotoDirectiveEnd) {
|
||||
return assistantPhotoDirective{}, false
|
||||
}
|
||||
raw := strings.TrimSuffix(strings.TrimPrefix(trimmed, telegramPhotoDirectiveStart), telegramPhotoDirectiveEnd)
|
||||
raw = strings.TrimSpace(raw)
|
||||
var directive assistantPhotoDirective
|
||||
if err := json.Unmarshal([]byte(raw), &directive); err != nil {
|
||||
return assistantPhotoDirective{}, false
|
||||
}
|
||||
directive.Path = strings.TrimSpace(directive.Path)
|
||||
directive.Caption = strings.TrimSpace(directive.Caption)
|
||||
return directive, true
|
||||
}
|
||||
|
||||
func (b *Bot) sendAssistantText(ctx context.Context, chatID int64, text string) error {
|
||||
for _, segment := range splitAssistantMessageSegments(text) {
|
||||
if segment.Text != "" && strings.TrimSpace(segment.Text) != "" {
|
||||
if err := b.sendLong(ctx, chatID, segment.Text); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if segment.Photo != nil {
|
||||
if err := b.sendAssistantPhoto(ctx, chatID, *segment.Photo); err != nil {
|
||||
b.logger.Printf("send assistant photo: %v", err)
|
||||
if sendErr := b.sendLong(ctx, chatID, "Could not send photo: "+err.Error()); sendErr != nil {
|
||||
return sendErr
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Bot) sendAssistantPhoto(ctx context.Context, chatID int64, directive assistantPhotoDirective) error {
|
||||
path := strings.TrimSpace(directive.Path)
|
||||
if path == "" {
|
||||
return errors.New("photo directive is missing a path")
|
||||
}
|
||||
if !filepath.IsAbs(path) {
|
||||
return fmt.Errorf("photo path must be absolute: %s", path)
|
||||
}
|
||||
if !isPicturePath(path) {
|
||||
return fmt.Errorf("unsupported photo type: %s", filepath.Base(path))
|
||||
}
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read %s: %v", filepath.Base(path), err)
|
||||
}
|
||||
caption := truncateTelegramPhotoCaption(directive.Caption)
|
||||
if _, err := b.tg.SendPhotoBytes(ctx, chatID, path, data, caption); err != nil {
|
||||
return fmt.Errorf("send %s: %v", filepath.Base(path), err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func truncateTelegramPhotoCaption(caption string) string {
|
||||
runes := []rune(caption)
|
||||
if len(runes) <= telegramPhotoCaptionLimit {
|
||||
return caption
|
||||
}
|
||||
if telegramPhotoCaptionLimit <= 3 {
|
||||
return string(runes[:telegramPhotoCaptionLimit])
|
||||
}
|
||||
return string(runes[:telegramPhotoCaptionLimit-3]) + "..."
|
||||
}
|
||||
|
||||
func (b *Bot) appendAssistantDelta(ctx context.Context, threadID, delta string) error {
|
||||
if delta == "" {
|
||||
return nil
|
||||
@@ -1792,7 +1959,7 @@ func (b *Bot) flushAssistantMessage(ctx context.Context, threadID string) error
|
||||
state.assistant.Reset()
|
||||
b.mu.Unlock()
|
||||
|
||||
if err := b.sendLong(ctx, chatID, text); err != nil {
|
||||
if err := b.sendAssistantText(ctx, chatID, text); err != nil {
|
||||
return err
|
||||
}
|
||||
b.markOutputSent(threadID)
|
||||
|
||||
Reference in New Issue
Block a user