feat: add image/photo support for vision-capable LLM requests

This commit is contained in:
Andre Kamarudin 2026-04-16 08:42:29 +08:00
parent 9316a38699
commit 8bd9ce3beb

42
main.py
View file

@ -3,14 +3,15 @@
from __future__ import annotations
import base64
import json
import logging
import os
import pathlib
import subprocess
from openai import OpenAI
import telegram
from openai import OpenAI
from telegram import BotCommand, Update
from telegram.error import BadRequest, TimedOut
from telegram.ext import (
@ -308,9 +309,8 @@ async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE) -> None
if not _is_authorized(update.effective_user.id):
return
chat_id = update.effective_chat.id
user_text = update.message.text
if not user_text:
return
# Text can come from message.text (plain) or message.caption (photo)
user_text = update.message.text or update.message.caption or ""
# In group chats, only respond if bot is mentioned or replied to
if update.effective_chat.type in ("group", "supergroup"):
@ -326,11 +326,37 @@ async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE) -> None
# Strip the @mention from the text
if bot_username:
user_text = user_text.replace(f"@{bot_username}", "").strip()
if not user_text:
# Download photo if present
image_b64: str | None = None
if update.message.photo:
try:
photo = update.message.photo[-1] # highest resolution
file = await ctx.bot.get_file(photo.file_id)
data = await file.download_as_bytearray()
image_b64 = base64.b64encode(bytes(data)).decode()
log.info("Downloaded photo: %d bytes", len(data))
except Exception as e:
log.error("Failed to download photo: %s", e)
if not user_text and not image_b64:
return
# Build user message content (text-only or multipart with image)
if image_b64:
content_parts: list[dict] = []
if user_text:
content_parts.append({"type": "text", "text": user_text})
content_parts.append({
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image_b64}"},
})
user_content: str | list[dict] = content_parts
else:
user_content = user_text
messages = get_messages(chat_id)
messages.append({"role": "user", "content": user_text})
messages.append({"role": "user", "content": user_content})
# Send immediate "Thinking ..." placeholder so user knows the bot read their message
thinking = await _safe_reply(update, "Thinking ...")
@ -432,7 +458,9 @@ def main() -> None:
app = ApplicationBuilder().token(TG_BOT_TOKEN).build()
app.add_handler(CommandHandler("start", cmd_start))
app.add_handler(CommandHandler("reset", cmd_reset))
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
app.add_handler(MessageHandler(
(filters.TEXT | filters.PHOTO) & ~filters.COMMAND, handle_message
))
app.add_error_handler(_error_handler)
# Register bot commands for the / menu