feat: add image/photo support for vision-capable LLM requests
This commit is contained in:
parent
9316a38699
commit
8bd9ce3beb
1 changed files with 36 additions and 8 deletions
42
main.py
42
main.py
|
|
@ -3,14 +3,15 @@
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from openai import OpenAI
|
|
||||||
import telegram
|
import telegram
|
||||||
|
from openai import OpenAI
|
||||||
from telegram import BotCommand, Update
|
from telegram import BotCommand, Update
|
||||||
from telegram.error import BadRequest, TimedOut
|
from telegram.error import BadRequest, TimedOut
|
||||||
from telegram.ext import (
|
from telegram.ext import (
|
||||||
|
|
@ -308,9 +309,8 @@ async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE) -> None
|
||||||
if not _is_authorized(update.effective_user.id):
|
if not _is_authorized(update.effective_user.id):
|
||||||
return
|
return
|
||||||
chat_id = update.effective_chat.id
|
chat_id = update.effective_chat.id
|
||||||
user_text = update.message.text
|
# Text can come from message.text (plain) or message.caption (photo)
|
||||||
if not user_text:
|
user_text = update.message.text or update.message.caption or ""
|
||||||
return
|
|
||||||
|
|
||||||
# In group chats, only respond if bot is mentioned or replied to
|
# In group chats, only respond if bot is mentioned or replied to
|
||||||
if update.effective_chat.type in ("group", "supergroup"):
|
if update.effective_chat.type in ("group", "supergroup"):
|
||||||
|
|
@ -326,11 +326,37 @@ async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE) -> None
|
||||||
# Strip the @mention from the text
|
# Strip the @mention from the text
|
||||||
if bot_username:
|
if bot_username:
|
||||||
user_text = user_text.replace(f"@{bot_username}", "").strip()
|
user_text = user_text.replace(f"@{bot_username}", "").strip()
|
||||||
if not user_text:
|
|
||||||
|
# Download photo if present
|
||||||
|
image_b64: str | None = None
|
||||||
|
if update.message.photo:
|
||||||
|
try:
|
||||||
|
photo = update.message.photo[-1] # highest resolution
|
||||||
|
file = await ctx.bot.get_file(photo.file_id)
|
||||||
|
data = await file.download_as_bytearray()
|
||||||
|
image_b64 = base64.b64encode(bytes(data)).decode()
|
||||||
|
log.info("Downloaded photo: %d bytes", len(data))
|
||||||
|
except Exception as e:
|
||||||
|
log.error("Failed to download photo: %s", e)
|
||||||
|
|
||||||
|
if not user_text and not image_b64:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Build user message content (text-only or multipart with image)
|
||||||
|
if image_b64:
|
||||||
|
content_parts: list[dict] = []
|
||||||
|
if user_text:
|
||||||
|
content_parts.append({"type": "text", "text": user_text})
|
||||||
|
content_parts.append({
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": f"data:image/jpeg;base64,{image_b64}"},
|
||||||
|
})
|
||||||
|
user_content: str | list[dict] = content_parts
|
||||||
|
else:
|
||||||
|
user_content = user_text
|
||||||
|
|
||||||
messages = get_messages(chat_id)
|
messages = get_messages(chat_id)
|
||||||
messages.append({"role": "user", "content": user_text})
|
messages.append({"role": "user", "content": user_content})
|
||||||
|
|
||||||
# Send immediate "Thinking ..." placeholder so user knows the bot read their message
|
# Send immediate "Thinking ..." placeholder so user knows the bot read their message
|
||||||
thinking = await _safe_reply(update, "Thinking ...")
|
thinking = await _safe_reply(update, "Thinking ...")
|
||||||
|
|
@ -432,7 +458,9 @@ def main() -> None:
|
||||||
app = ApplicationBuilder().token(TG_BOT_TOKEN).build()
|
app = ApplicationBuilder().token(TG_BOT_TOKEN).build()
|
||||||
app.add_handler(CommandHandler("start", cmd_start))
|
app.add_handler(CommandHandler("start", cmd_start))
|
||||||
app.add_handler(CommandHandler("reset", cmd_reset))
|
app.add_handler(CommandHandler("reset", cmd_reset))
|
||||||
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
|
app.add_handler(MessageHandler(
|
||||||
|
(filters.TEXT | filters.PHOTO) & ~filters.COMMAND, handle_message
|
||||||
|
))
|
||||||
app.add_error_handler(_error_handler)
|
app.add_error_handler(_error_handler)
|
||||||
|
|
||||||
# Register bot commands for the / menu
|
# Register bot commands for the / menu
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue