Allow assistant photo directives
This commit is contained in:
@@ -144,6 +144,52 @@ func (c *Client) DownloadFile(ctx context.Context, filePath string) ([]byte, err
|
||||
return io.ReadAll(resp.Body)
|
||||
}
|
||||
|
||||
func (c *Client) SendPhotoBytes(ctx context.Context, chatID int64, filename string, data []byte, caption string) (Message, error) {
|
||||
var body bytes.Buffer
|
||||
writer := multipart.NewWriter(&body)
|
||||
if err := writer.WriteField("chat_id", fmt.Sprint(chatID)); err != nil {
|
||||
return Message{}, err
|
||||
}
|
||||
if caption != "" {
|
||||
if err := writer.WriteField("caption", caption); err != nil {
|
||||
return Message{}, err
|
||||
}
|
||||
}
|
||||
part, err := writer.CreateFormFile("photo", filepath.Base(filename))
|
||||
if err != nil {
|
||||
return Message{}, err
|
||||
}
|
||||
if _, err := part.Write(data); err != nil {
|
||||
return Message{}, err
|
||||
}
|
||||
if err := writer.Close(); err != nil {
|
||||
return Message{}, err
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/sendPhoto", &body)
|
||||
if err != nil {
|
||||
return Message{}, err
|
||||
}
|
||||
req.Header.Set("Content-Type", writer.FormDataContentType())
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return Message{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
payload, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
||||
return Message{}, fmt.Errorf("sendPhoto: telegram returned %s: %s", resp.Status, string(payload))
|
||||
}
|
||||
var decoded apiResponse[Message]
|
||||
if err := json.NewDecoder(resp.Body).Decode(&decoded); err != nil {
|
||||
return Message{}, err
|
||||
}
|
||||
if !decoded.OK {
|
||||
return Message{}, fmt.Errorf("sendPhoto: telegram error %d: %s", decoded.ErrorCode, decoded.Description)
|
||||
}
|
||||
return decoded.Result, nil
|
||||
}
|
||||
|
||||
func (c *Client) SendDocumentBytes(ctx context.Context, chatID int64, filename string, data []byte, caption string) (Message, error) {
|
||||
var body bytes.Buffer
|
||||
writer := multipart.NewWriter(&body)
|
||||
|
||||
@@ -20,9 +20,12 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
telegramDownloadLimit = 20 * 1024 * 1024
|
||||
resumeThreadPageSize = 8
|
||||
commandSummaryLimit = 120
|
||||
telegramDownloadLimit = 20 * 1024 * 1024
|
||||
resumeThreadPageSize = 8
|
||||
commandSummaryLimit = 120
|
||||
telegramPhotoDirectiveStart = "<!-- telegram-photo "
|
||||
telegramPhotoDirectiveEnd = " -->"
|
||||
telegramPhotoCaptionLimit = 1024
|
||||
)
|
||||
|
||||
type Bot struct {
|
||||
@@ -41,11 +44,22 @@ type Bot struct {
|
||||
diffs map[string]string
|
||||
}
|
||||
|
||||
type assistantMessageSegment struct {
|
||||
Text string
|
||||
Photo *assistantPhotoDirective
|
||||
}
|
||||
|
||||
type assistantPhotoDirective struct {
|
||||
Path string `json:"path"`
|
||||
Caption string `json:"caption,omitempty"`
|
||||
}
|
||||
|
||||
type outputState struct {
|
||||
chatID int64
|
||||
assistant strings.Builder
|
||||
sentAny bool
|
||||
tools map[string]toolMessageState
|
||||
sentImages map[string]bool
|
||||
workingIndicatorOff context.CancelFunc
|
||||
}
|
||||
|
||||
@@ -689,6 +703,15 @@ func (b *Bot) handleSandboxCommand(ctx context.Context, userID, chatID int64, se
|
||||
return err
|
||||
}
|
||||
|
||||
func isPicturePath(path string) bool {
|
||||
switch strings.ToLower(filepath.Ext(path)) {
|
||||
case ".jpg", ".jpeg", ".png", ".webp", ".gif":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Bot) sendDiff(ctx context.Context, chatID int64, session store.Session) error {
|
||||
if session.ActiveThreadID == 0 {
|
||||
_, err := b.tg.SendMessage(ctx, chatID, "No active thread.", SendMessageOptions{})
|
||||
@@ -1333,7 +1356,10 @@ func (b *Bot) handleCodexNotification(ctx context.Context, event codexapp.Event)
|
||||
return b.flushAssistantMessage(ctx, params.ThreadID)
|
||||
}
|
||||
if params.ThreadID != "" {
|
||||
return b.upsertToolMessage(ctx, params.ThreadID, item.ID, renderCodexItemCompleted(item))
|
||||
if err := b.upsertToolMessage(ctx, params.ThreadID, item.ID, renderCodexItemCompleted(item)); err != nil {
|
||||
return err
|
||||
}
|
||||
return b.sendImageOutput(ctx, params.ThreadID, item)
|
||||
}
|
||||
case "turn/diff/updated":
|
||||
var params struct {
|
||||
@@ -1456,6 +1482,7 @@ func (b *Bot) newOutputState(chatID int64) *outputState {
|
||||
return &outputState{
|
||||
chatID: chatID,
|
||||
tools: make(map[string]toolMessageState),
|
||||
sentImages: make(map[string]bool),
|
||||
workingIndicatorOff: b.startWorkingIndicator(chatID),
|
||||
}
|
||||
}
|
||||
@@ -1550,6 +1577,52 @@ func (b *Bot) failActiveOutputs(ctx context.Context, message string) {
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Bot) sendImageOutput(ctx context.Context, threadID string, item codexThreadItemView) error {
|
||||
if item.Type != "imageGeneration" || strings.TrimSpace(item.SavedPath) == "" {
|
||||
return nil
|
||||
}
|
||||
path := strings.TrimSpace(item.SavedPath)
|
||||
if !b.markImageOutputPending(threadID, path) {
|
||||
return nil
|
||||
}
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
b.logger.Printf("read generated image %s: %v", path, err)
|
||||
return nil
|
||||
}
|
||||
chatID, err := b.outputChatID(ctx, threadID)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
caption := "Generated image"
|
||||
if item.Status != "" {
|
||||
caption += ": " + item.Status
|
||||
}
|
||||
if _, err := b.tg.SendPhotoBytes(ctx, chatID, path, data, caption); err != nil {
|
||||
b.logger.Printf("send generated image %s: %v", path, err)
|
||||
return nil
|
||||
}
|
||||
b.markOutputSent(threadID)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Bot) markImageOutputPending(threadID, path string) bool {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
state := b.outputs[threadID]
|
||||
if state == nil {
|
||||
return false
|
||||
}
|
||||
if state.sentImages == nil {
|
||||
state.sentImages = make(map[string]bool)
|
||||
}
|
||||
if state.sentImages[path] {
|
||||
return false
|
||||
}
|
||||
state.sentImages[path] = true
|
||||
return true
|
||||
}
|
||||
|
||||
func (b *Bot) sendOutputBlock(ctx context.Context, threadID, block string) error {
|
||||
block = strings.TrimSpace(block)
|
||||
if block == "" {
|
||||
@@ -1764,6 +1837,100 @@ func ignoreTelegramMessageNotModified(err error) error {
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func splitAssistantMessageSegments(text string) []assistantMessageSegment {
|
||||
var segments []assistantMessageSegment
|
||||
var visible strings.Builder
|
||||
flushVisible := func() {
|
||||
if visible.Len() == 0 {
|
||||
return
|
||||
}
|
||||
segments = append(segments, assistantMessageSegment{Text: visible.String()})
|
||||
visible.Reset()
|
||||
}
|
||||
|
||||
for _, line := range strings.SplitAfter(text, "\n") {
|
||||
body := strings.TrimSuffix(line, "\n")
|
||||
body = strings.TrimSuffix(body, "\r")
|
||||
if directive, ok := parseAssistantPhotoDirectiveLine(body); ok {
|
||||
flushVisible()
|
||||
segments = append(segments, assistantMessageSegment{Photo: &directive})
|
||||
continue
|
||||
}
|
||||
visible.WriteString(line)
|
||||
}
|
||||
flushVisible()
|
||||
return segments
|
||||
}
|
||||
|
||||
func parseAssistantPhotoDirectiveLine(line string) (assistantPhotoDirective, bool) {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if !strings.HasPrefix(trimmed, telegramPhotoDirectiveStart) || !strings.HasSuffix(trimmed, telegramPhotoDirectiveEnd) {
|
||||
return assistantPhotoDirective{}, false
|
||||
}
|
||||
raw := strings.TrimSuffix(strings.TrimPrefix(trimmed, telegramPhotoDirectiveStart), telegramPhotoDirectiveEnd)
|
||||
raw = strings.TrimSpace(raw)
|
||||
var directive assistantPhotoDirective
|
||||
if err := json.Unmarshal([]byte(raw), &directive); err != nil {
|
||||
return assistantPhotoDirective{}, false
|
||||
}
|
||||
directive.Path = strings.TrimSpace(directive.Path)
|
||||
directive.Caption = strings.TrimSpace(directive.Caption)
|
||||
return directive, true
|
||||
}
|
||||
|
||||
func (b *Bot) sendAssistantText(ctx context.Context, chatID int64, text string) error {
|
||||
for _, segment := range splitAssistantMessageSegments(text) {
|
||||
if segment.Text != "" && strings.TrimSpace(segment.Text) != "" {
|
||||
if err := b.sendLong(ctx, chatID, segment.Text); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if segment.Photo != nil {
|
||||
if err := b.sendAssistantPhoto(ctx, chatID, *segment.Photo); err != nil {
|
||||
b.logger.Printf("send assistant photo: %v", err)
|
||||
if sendErr := b.sendLong(ctx, chatID, "Could not send photo: "+err.Error()); sendErr != nil {
|
||||
return sendErr
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Bot) sendAssistantPhoto(ctx context.Context, chatID int64, directive assistantPhotoDirective) error {
|
||||
path := strings.TrimSpace(directive.Path)
|
||||
if path == "" {
|
||||
return errors.New("photo directive is missing a path")
|
||||
}
|
||||
if !filepath.IsAbs(path) {
|
||||
return fmt.Errorf("photo path must be absolute: %s", path)
|
||||
}
|
||||
if !isPicturePath(path) {
|
||||
return fmt.Errorf("unsupported photo type: %s", filepath.Base(path))
|
||||
}
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read %s: %v", filepath.Base(path), err)
|
||||
}
|
||||
caption := truncateTelegramPhotoCaption(directive.Caption)
|
||||
if _, err := b.tg.SendPhotoBytes(ctx, chatID, path, data, caption); err != nil {
|
||||
return fmt.Errorf("send %s: %v", filepath.Base(path), err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func truncateTelegramPhotoCaption(caption string) string {
|
||||
runes := []rune(caption)
|
||||
if len(runes) <= telegramPhotoCaptionLimit {
|
||||
return caption
|
||||
}
|
||||
if telegramPhotoCaptionLimit <= 3 {
|
||||
return string(runes[:telegramPhotoCaptionLimit])
|
||||
}
|
||||
return string(runes[:telegramPhotoCaptionLimit-3]) + "..."
|
||||
}
|
||||
|
||||
func (b *Bot) appendAssistantDelta(ctx context.Context, threadID, delta string) error {
|
||||
if delta == "" {
|
||||
return nil
|
||||
@@ -1792,7 +1959,7 @@ func (b *Bot) flushAssistantMessage(ctx context.Context, threadID string) error
|
||||
state.assistant.Reset()
|
||||
b.mu.Unlock()
|
||||
|
||||
if err := b.sendLong(ctx, chatID, text); err != nil {
|
||||
if err := b.sendAssistantText(ctx, chatID, text); err != nil {
|
||||
return err
|
||||
}
|
||||
b.markOutputSent(threadID)
|
||||
|
||||
@@ -77,6 +77,31 @@ func TestParseCommand(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSplitAssistantMessageSegmentsWithPhotoDirective(t *testing.T) {
|
||||
text := "before\n<!-- telegram-photo {\"path\":\"/tmp/photo.jpg\",\"caption\":\"hello\"} -->\nafter"
|
||||
segments := splitAssistantMessageSegments(text)
|
||||
if len(segments) != 3 {
|
||||
t.Fatalf("segments = %d, want 3: %#v", len(segments), segments)
|
||||
}
|
||||
if segments[0].Text != "before\n" || segments[0].Photo != nil {
|
||||
t.Fatalf("unexpected first segment: %#v", segments[0])
|
||||
}
|
||||
if segments[1].Photo == nil || segments[1].Photo.Path != "/tmp/photo.jpg" || segments[1].Photo.Caption != "hello" {
|
||||
t.Fatalf("unexpected photo segment: %#v", segments[1])
|
||||
}
|
||||
if segments[2].Text != "after" || segments[2].Photo != nil {
|
||||
t.Fatalf("unexpected final segment: %#v", segments[2])
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidPhotoDirectiveStaysVisible(t *testing.T) {
|
||||
text := "<!-- telegram-photo not-json -->"
|
||||
segments := splitAssistantMessageSegments(text)
|
||||
if len(segments) != 1 || segments[0].Text != text {
|
||||
t.Fatalf("invalid directive should stay text: %#v", segments)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenderCodexCommandExecutionItem(t *testing.T) {
|
||||
output := "line 1\nline 2"
|
||||
exitCode := 0
|
||||
|
||||
Reference in New Issue
Block a user