Allow assistant photo directives
This commit is contained in:
22
.codex/skills/telegram-photo/SKILL.md
Normal file
22
.codex/skills/telegram-photo/SKILL.md
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
---
|
||||||
|
name: telegram-photo
|
||||||
|
description: Use when Codex should send, show, or share a local picture into the Telegram chat through the bot without calling Telegram tools.
|
||||||
|
metadata:
|
||||||
|
short-description: Send Telegram photos from assistant output
|
||||||
|
---
|
||||||
|
|
||||||
|
# Telegram Photo
|
||||||
|
|
||||||
|
When asked to send/show/share a picture in Telegram, emit a photo directive in normal assistant output. The bot strips the directive and sends the image as a Telegram photo.
|
||||||
|
|
||||||
|
Use exactly one directive line per image, outside code fences:
|
||||||
|
|
||||||
|
`<!-- telegram-photo {"path":"<absolute-local-image-path>","caption":"<optional caption>"} -->`
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Replace `<absolute-local-image-path>` with an absolute path that exists in the current workspace or another location visible to the bot process.
|
||||||
|
- Do not hardcode machine-specific directories, user names, repository paths, or sample filenames in this skill.
|
||||||
|
- Supported extensions are `.jpg`, `.jpeg`, `.png`, `.webp`, and `.gif`.
|
||||||
|
- `caption` is optional and should be short; omit the `caption` field when no caption is needed.
|
||||||
|
- Do not use external Telegram tool calls for this.
|
||||||
|
- If no usable image path is known, ask for the path or explain what local file is needed.
|
||||||
4
.codex/skills/telegram-photo/agents/openai.yaml
Normal file
4
.codex/skills/telegram-photo/agents/openai.yaml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
interface:
|
||||||
|
display_name: "Telegram Photo"
|
||||||
|
short_description: "Send Telegram photos from assistant output."
|
||||||
|
default_prompt: "Send a local image to the Telegram chat using the bot photo directive."
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -24,6 +24,9 @@
|
|||||||
*.test
|
*.test
|
||||||
coverage.out
|
coverage.out
|
||||||
|
|
||||||
|
# Local scratch assets
|
||||||
|
/codex-telegram-bot-profile.jpg
|
||||||
|
|
||||||
# Editor/OS noise
|
# Editor/OS noise
|
||||||
.DS_Store
|
.DS_Store
|
||||||
.idea/
|
.idea/
|
||||||
|
|||||||
@@ -144,6 +144,52 @@ func (c *Client) DownloadFile(ctx context.Context, filePath string) ([]byte, err
|
|||||||
return io.ReadAll(resp.Body)
|
return io.ReadAll(resp.Body)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Client) SendPhotoBytes(ctx context.Context, chatID int64, filename string, data []byte, caption string) (Message, error) {
|
||||||
|
var body bytes.Buffer
|
||||||
|
writer := multipart.NewWriter(&body)
|
||||||
|
if err := writer.WriteField("chat_id", fmt.Sprint(chatID)); err != nil {
|
||||||
|
return Message{}, err
|
||||||
|
}
|
||||||
|
if caption != "" {
|
||||||
|
if err := writer.WriteField("caption", caption); err != nil {
|
||||||
|
return Message{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
part, err := writer.CreateFormFile("photo", filepath.Base(filename))
|
||||||
|
if err != nil {
|
||||||
|
return Message{}, err
|
||||||
|
}
|
||||||
|
if _, err := part.Write(data); err != nil {
|
||||||
|
return Message{}, err
|
||||||
|
}
|
||||||
|
if err := writer.Close(); err != nil {
|
||||||
|
return Message{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/sendPhoto", &body)
|
||||||
|
if err != nil {
|
||||||
|
return Message{}, err
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", writer.FormDataContentType())
|
||||||
|
resp, err := c.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return Message{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
payload, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
||||||
|
return Message{}, fmt.Errorf("sendPhoto: telegram returned %s: %s", resp.Status, string(payload))
|
||||||
|
}
|
||||||
|
var decoded apiResponse[Message]
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&decoded); err != nil {
|
||||||
|
return Message{}, err
|
||||||
|
}
|
||||||
|
if !decoded.OK {
|
||||||
|
return Message{}, fmt.Errorf("sendPhoto: telegram error %d: %s", decoded.ErrorCode, decoded.Description)
|
||||||
|
}
|
||||||
|
return decoded.Result, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (c *Client) SendDocumentBytes(ctx context.Context, chatID int64, filename string, data []byte, caption string) (Message, error) {
|
func (c *Client) SendDocumentBytes(ctx context.Context, chatID int64, filename string, data []byte, caption string) (Message, error) {
|
||||||
var body bytes.Buffer
|
var body bytes.Buffer
|
||||||
writer := multipart.NewWriter(&body)
|
writer := multipart.NewWriter(&body)
|
||||||
|
|||||||
@@ -20,9 +20,12 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
telegramDownloadLimit = 20 * 1024 * 1024
|
telegramDownloadLimit = 20 * 1024 * 1024
|
||||||
resumeThreadPageSize = 8
|
resumeThreadPageSize = 8
|
||||||
commandSummaryLimit = 120
|
commandSummaryLimit = 120
|
||||||
|
telegramPhotoDirectiveStart = "<!-- telegram-photo "
|
||||||
|
telegramPhotoDirectiveEnd = " -->"
|
||||||
|
telegramPhotoCaptionLimit = 1024
|
||||||
)
|
)
|
||||||
|
|
||||||
type Bot struct {
|
type Bot struct {
|
||||||
@@ -41,11 +44,22 @@ type Bot struct {
|
|||||||
diffs map[string]string
|
diffs map[string]string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type assistantMessageSegment struct {
|
||||||
|
Text string
|
||||||
|
Photo *assistantPhotoDirective
|
||||||
|
}
|
||||||
|
|
||||||
|
type assistantPhotoDirective struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
Caption string `json:"caption,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type outputState struct {
|
type outputState struct {
|
||||||
chatID int64
|
chatID int64
|
||||||
assistant strings.Builder
|
assistant strings.Builder
|
||||||
sentAny bool
|
sentAny bool
|
||||||
tools map[string]toolMessageState
|
tools map[string]toolMessageState
|
||||||
|
sentImages map[string]bool
|
||||||
workingIndicatorOff context.CancelFunc
|
workingIndicatorOff context.CancelFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -689,6 +703,15 @@ func (b *Bot) handleSandboxCommand(ctx context.Context, userID, chatID int64, se
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isPicturePath(path string) bool {
|
||||||
|
switch strings.ToLower(filepath.Ext(path)) {
|
||||||
|
case ".jpg", ".jpeg", ".png", ".webp", ".gif":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (b *Bot) sendDiff(ctx context.Context, chatID int64, session store.Session) error {
|
func (b *Bot) sendDiff(ctx context.Context, chatID int64, session store.Session) error {
|
||||||
if session.ActiveThreadID == 0 {
|
if session.ActiveThreadID == 0 {
|
||||||
_, err := b.tg.SendMessage(ctx, chatID, "No active thread.", SendMessageOptions{})
|
_, err := b.tg.SendMessage(ctx, chatID, "No active thread.", SendMessageOptions{})
|
||||||
@@ -1333,7 +1356,10 @@ func (b *Bot) handleCodexNotification(ctx context.Context, event codexapp.Event)
|
|||||||
return b.flushAssistantMessage(ctx, params.ThreadID)
|
return b.flushAssistantMessage(ctx, params.ThreadID)
|
||||||
}
|
}
|
||||||
if params.ThreadID != "" {
|
if params.ThreadID != "" {
|
||||||
return b.upsertToolMessage(ctx, params.ThreadID, item.ID, renderCodexItemCompleted(item))
|
if err := b.upsertToolMessage(ctx, params.ThreadID, item.ID, renderCodexItemCompleted(item)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return b.sendImageOutput(ctx, params.ThreadID, item)
|
||||||
}
|
}
|
||||||
case "turn/diff/updated":
|
case "turn/diff/updated":
|
||||||
var params struct {
|
var params struct {
|
||||||
@@ -1456,6 +1482,7 @@ func (b *Bot) newOutputState(chatID int64) *outputState {
|
|||||||
return &outputState{
|
return &outputState{
|
||||||
chatID: chatID,
|
chatID: chatID,
|
||||||
tools: make(map[string]toolMessageState),
|
tools: make(map[string]toolMessageState),
|
||||||
|
sentImages: make(map[string]bool),
|
||||||
workingIndicatorOff: b.startWorkingIndicator(chatID),
|
workingIndicatorOff: b.startWorkingIndicator(chatID),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1550,6 +1577,52 @@ func (b *Bot) failActiveOutputs(ctx context.Context, message string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *Bot) sendImageOutput(ctx context.Context, threadID string, item codexThreadItemView) error {
|
||||||
|
if item.Type != "imageGeneration" || strings.TrimSpace(item.SavedPath) == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
path := strings.TrimSpace(item.SavedPath)
|
||||||
|
if !b.markImageOutputPending(threadID, path) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
b.logger.Printf("read generated image %s: %v", path, err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
chatID, err := b.outputChatID(ctx, threadID)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
caption := "Generated image"
|
||||||
|
if item.Status != "" {
|
||||||
|
caption += ": " + item.Status
|
||||||
|
}
|
||||||
|
if _, err := b.tg.SendPhotoBytes(ctx, chatID, path, data, caption); err != nil {
|
||||||
|
b.logger.Printf("send generated image %s: %v", path, err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
b.markOutputSent(threadID)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *Bot) markImageOutputPending(threadID, path string) bool {
|
||||||
|
b.mu.Lock()
|
||||||
|
defer b.mu.Unlock()
|
||||||
|
state := b.outputs[threadID]
|
||||||
|
if state == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if state.sentImages == nil {
|
||||||
|
state.sentImages = make(map[string]bool)
|
||||||
|
}
|
||||||
|
if state.sentImages[path] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
state.sentImages[path] = true
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func (b *Bot) sendOutputBlock(ctx context.Context, threadID, block string) error {
|
func (b *Bot) sendOutputBlock(ctx context.Context, threadID, block string) error {
|
||||||
block = strings.TrimSpace(block)
|
block = strings.TrimSpace(block)
|
||||||
if block == "" {
|
if block == "" {
|
||||||
@@ -1764,6 +1837,100 @@ func ignoreTelegramMessageNotModified(err error) error {
|
|||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func splitAssistantMessageSegments(text string) []assistantMessageSegment {
|
||||||
|
var segments []assistantMessageSegment
|
||||||
|
var visible strings.Builder
|
||||||
|
flushVisible := func() {
|
||||||
|
if visible.Len() == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
segments = append(segments, assistantMessageSegment{Text: visible.String()})
|
||||||
|
visible.Reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, line := range strings.SplitAfter(text, "\n") {
|
||||||
|
body := strings.TrimSuffix(line, "\n")
|
||||||
|
body = strings.TrimSuffix(body, "\r")
|
||||||
|
if directive, ok := parseAssistantPhotoDirectiveLine(body); ok {
|
||||||
|
flushVisible()
|
||||||
|
segments = append(segments, assistantMessageSegment{Photo: &directive})
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
visible.WriteString(line)
|
||||||
|
}
|
||||||
|
flushVisible()
|
||||||
|
return segments
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseAssistantPhotoDirectiveLine(line string) (assistantPhotoDirective, bool) {
|
||||||
|
trimmed := strings.TrimSpace(line)
|
||||||
|
if !strings.HasPrefix(trimmed, telegramPhotoDirectiveStart) || !strings.HasSuffix(trimmed, telegramPhotoDirectiveEnd) {
|
||||||
|
return assistantPhotoDirective{}, false
|
||||||
|
}
|
||||||
|
raw := strings.TrimSuffix(strings.TrimPrefix(trimmed, telegramPhotoDirectiveStart), telegramPhotoDirectiveEnd)
|
||||||
|
raw = strings.TrimSpace(raw)
|
||||||
|
var directive assistantPhotoDirective
|
||||||
|
if err := json.Unmarshal([]byte(raw), &directive); err != nil {
|
||||||
|
return assistantPhotoDirective{}, false
|
||||||
|
}
|
||||||
|
directive.Path = strings.TrimSpace(directive.Path)
|
||||||
|
directive.Caption = strings.TrimSpace(directive.Caption)
|
||||||
|
return directive, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *Bot) sendAssistantText(ctx context.Context, chatID int64, text string) error {
|
||||||
|
for _, segment := range splitAssistantMessageSegments(text) {
|
||||||
|
if segment.Text != "" && strings.TrimSpace(segment.Text) != "" {
|
||||||
|
if err := b.sendLong(ctx, chatID, segment.Text); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if segment.Photo != nil {
|
||||||
|
if err := b.sendAssistantPhoto(ctx, chatID, *segment.Photo); err != nil {
|
||||||
|
b.logger.Printf("send assistant photo: %v", err)
|
||||||
|
if sendErr := b.sendLong(ctx, chatID, "Could not send photo: "+err.Error()); sendErr != nil {
|
||||||
|
return sendErr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *Bot) sendAssistantPhoto(ctx context.Context, chatID int64, directive assistantPhotoDirective) error {
|
||||||
|
path := strings.TrimSpace(directive.Path)
|
||||||
|
if path == "" {
|
||||||
|
return errors.New("photo directive is missing a path")
|
||||||
|
}
|
||||||
|
if !filepath.IsAbs(path) {
|
||||||
|
return fmt.Errorf("photo path must be absolute: %s", path)
|
||||||
|
}
|
||||||
|
if !isPicturePath(path) {
|
||||||
|
return fmt.Errorf("unsupported photo type: %s", filepath.Base(path))
|
||||||
|
}
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("read %s: %v", filepath.Base(path), err)
|
||||||
|
}
|
||||||
|
caption := truncateTelegramPhotoCaption(directive.Caption)
|
||||||
|
if _, err := b.tg.SendPhotoBytes(ctx, chatID, path, data, caption); err != nil {
|
||||||
|
return fmt.Errorf("send %s: %v", filepath.Base(path), err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func truncateTelegramPhotoCaption(caption string) string {
|
||||||
|
runes := []rune(caption)
|
||||||
|
if len(runes) <= telegramPhotoCaptionLimit {
|
||||||
|
return caption
|
||||||
|
}
|
||||||
|
if telegramPhotoCaptionLimit <= 3 {
|
||||||
|
return string(runes[:telegramPhotoCaptionLimit])
|
||||||
|
}
|
||||||
|
return string(runes[:telegramPhotoCaptionLimit-3]) + "..."
|
||||||
|
}
|
||||||
|
|
||||||
func (b *Bot) appendAssistantDelta(ctx context.Context, threadID, delta string) error {
|
func (b *Bot) appendAssistantDelta(ctx context.Context, threadID, delta string) error {
|
||||||
if delta == "" {
|
if delta == "" {
|
||||||
return nil
|
return nil
|
||||||
@@ -1792,7 +1959,7 @@ func (b *Bot) flushAssistantMessage(ctx context.Context, threadID string) error
|
|||||||
state.assistant.Reset()
|
state.assistant.Reset()
|
||||||
b.mu.Unlock()
|
b.mu.Unlock()
|
||||||
|
|
||||||
if err := b.sendLong(ctx, chatID, text); err != nil {
|
if err := b.sendAssistantText(ctx, chatID, text); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
b.markOutputSent(threadID)
|
b.markOutputSent(threadID)
|
||||||
|
|||||||
@@ -77,6 +77,31 @@ func TestParseCommand(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSplitAssistantMessageSegmentsWithPhotoDirective(t *testing.T) {
|
||||||
|
text := "before\n<!-- telegram-photo {\"path\":\"/tmp/photo.jpg\",\"caption\":\"hello\"} -->\nafter"
|
||||||
|
segments := splitAssistantMessageSegments(text)
|
||||||
|
if len(segments) != 3 {
|
||||||
|
t.Fatalf("segments = %d, want 3: %#v", len(segments), segments)
|
||||||
|
}
|
||||||
|
if segments[0].Text != "before\n" || segments[0].Photo != nil {
|
||||||
|
t.Fatalf("unexpected first segment: %#v", segments[0])
|
||||||
|
}
|
||||||
|
if segments[1].Photo == nil || segments[1].Photo.Path != "/tmp/photo.jpg" || segments[1].Photo.Caption != "hello" {
|
||||||
|
t.Fatalf("unexpected photo segment: %#v", segments[1])
|
||||||
|
}
|
||||||
|
if segments[2].Text != "after" || segments[2].Photo != nil {
|
||||||
|
t.Fatalf("unexpected final segment: %#v", segments[2])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInvalidPhotoDirectiveStaysVisible(t *testing.T) {
|
||||||
|
text := "<!-- telegram-photo not-json -->"
|
||||||
|
segments := splitAssistantMessageSegments(text)
|
||||||
|
if len(segments) != 1 || segments[0].Text != text {
|
||||||
|
t.Fatalf("invalid directive should stay text: %#v", segments)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRenderCodexCommandExecutionItem(t *testing.T) {
|
func TestRenderCodexCommandExecutionItem(t *testing.T) {
|
||||||
output := "line 1\nline 2"
|
output := "line 1\nline 2"
|
||||||
exitCode := 0
|
exitCode := 0
|
||||||
|
|||||||
Reference in New Issue
Block a user