betterbot/learning.py
Andre K e68c84424f
Some checks failed
Deploy BetterBot / deploy (push) Failing after 3s
Deploy BetterBot / notify (push) Successful in 3s
feat: fork from CodeAnywhere framework
Replace standalone Telegram bot with full CodeAnywhere framework fork.
BetterBot shares all framework code and customizes only:
- instance.py: BetterBot identity, system prompt, feature flags
- tools/site_editing/: list_files, read_file, write_file with auto git push
- .env: model defaults and site directory paths
- compose/: Docker setup with betterlifesg + memoraiz mounts
- deploy script: RackNerd with Infisical secrets
2026-04-19 08:01:27 +08:00

144 lines
6 KiB
Python

"""Learning extraction — identifies facts worth persisting from conversation turns."""
from __future__ import annotations
import re
from typing import Any
def extract_learnings_from_turn(
user_message: str,
assistant_message: str,
) -> tuple[list[tuple[str, str]], list[tuple[str, str]]]:
"""Extract project-scoped and global learnings from a completed turn.
Returns:
(project_learnings, global_learnings) where each item is (fact, category).
Strategy: rule-based extraction (zero cost, no extra LLM call).
- Detect user-stated preferences, personality cues, contact info → global
- Detect technical discoveries, patterns, architecture notes → project
"""
project: list[tuple[str, str]] = []
global_facts: list[tuple[str, str]] = []
# ── Global learnings (user preferences, personality) ──────────────
# Explicit preference statements
pref_patterns = [
r"(?:I (?:prefer|like|want|always|never|hate|don't like|can't stand)\s+.+?)[.!?]",
r"(?:my (?:favorite|preferred|default|usual)\s+.+?(?:is|are)\s+.+?)[.!?]",
r"(?:I'm (?:a|an)\s+.+?)(?:\s+(?:at|who|that|working))?[.,!?]",
]
for pattern in pref_patterns:
for match in re.finditer(pattern, user_message, re.IGNORECASE):
fact = match.group(0).strip()
if 10 < len(fact) < 200:
global_facts.append((fact, "preference"))
# Job/role mentions
job_patterns = [
r"(?:I (?:work|am working)\s+(?:at|for|on|with)\s+.+?)[.,!?]",
r"(?:I'm\s+(?:a|an)\s+\w+(?:\s+\w+)?\s+(?:engineer|developer|designer|manager|analyst|researcher|founder|student))",
r"(?:my (?:role|title|position|job)\s+(?:is|as)\s+.+?)[.,!?]",
]
for pattern in job_patterns:
for match in re.finditer(pattern, user_message, re.IGNORECASE):
fact = match.group(0).strip()
if 10 < len(fact) < 200:
global_facts.append((fact, "profile"))
# Contact/identity info
identity_patterns = [
r"(?:my (?:email|github|twitter|handle|username|name|phone|number)\s+(?:is)\s+.+?)[.,!?]",
]
for pattern in identity_patterns:
for match in re.finditer(pattern, user_message, re.IGNORECASE):
fact = match.group(0).strip()
if 10 < len(fact) < 200:
global_facts.append((fact, "identity"))
# Family / relationships / personal facts
family_patterns = [
r"(?:I (?:have|am married to|live with)\s+(?:a\s+)?(?:wife|husband|partner|spouse)\s+\w+[^.!?]*)[.!?]",
r"(?:my (?:wife|husband|partner|spouse|daughter|son|child|kid|mother|father|parent|sibling|brother|sister)(?:'s\s+\w+)?\s+(?:is|was|name(?:d| is)?|born)\s+[^.!?]+)[.!?]",
r"(?:I (?:have|have got)\s+(?:two|three|four|five|\d+)\s+(?:daughters?|sons?|kids?|children)[^.!?]*)[.!?]",
r"[A-Z][a-z]+\s+born\s+\d{4}",
]
for pattern in family_patterns:
for match in re.finditer(pattern, user_message, re.IGNORECASE):
fact = match.group(0).strip()
if 10 < len(fact) < 300:
global_facts.append((fact, "personal"))
# Tone/personality instructions (e.g., "be more concise", "use bullet points")
tone_patterns = [
r"(?:(?:be|use|respond|reply|answer|speak|write)\s+(?:more\s+)?(?:concise|brief|detailed|verbose|short|formal|casual|friendly|professional|terse|bullet|markdown|code))[.!?]?",
r"(?:don't (?:use|add|include|give)\s+.+?(?:explanation|comment|context|preamble|prefix))[.!?]?",
]
for pattern in tone_patterns:
for match in re.finditer(pattern, user_message, re.IGNORECASE):
fact = match.group(0).strip()
if 5 < len(fact) < 200:
global_facts.append((fact, "tone"))
# ── Project learnings (technical facts, patterns) ─────────────────
# Architecture/discovery from assistant responses
arch_patterns = [
r"(?:the\s+\w[\w-]*(?:\s+\w[\w-]*)?\s+(?:uses|runs on|depends on|is backed by|is configured with|requires)\s+.+?)[.,]",
]
for pattern in arch_patterns:
for match in re.finditer(pattern, assistant_message, re.IGNORECASE):
fact = match.group(0).strip()
if 15 < len(fact) < 250:
project.append((fact, "architecture"))
# Bug/pattern from assistant
bug_patterns = [
r"(?:(?:root cause|the issue|the problem|the bug)\s+(?:is|was)\s+.+?)[.,]",
r"(?:this\s+(?:happens|occurs)\s+(?:because|due to|when)\s+.+?)[.,]",
]
for pattern in bug_patterns:
for match in re.finditer(pattern, assistant_message, re.IGNORECASE):
fact = match.group(0).strip()
if 15 < len(fact) < 250:
project.append((fact, "bug_pattern"))
# Deployment patterns from assistant
deploy_patterns = [
r"(?:deployed?\s+(?:via|through|using|to)\s+.+?)[.,]",
r"(?:the\s+(?:deploy|ci|pipeline|action)\s+(?:uses|runs|triggers)\s+.+?)[.,]",
]
for pattern in deploy_patterns:
for match in re.finditer(pattern, assistant_message, re.IGNORECASE):
fact = match.group(0).strip()
if 15 < len(fact) < 250:
project.append((fact, "deployment"))
return project, global_facts
def format_learnings_for_prompt(
project_learnings: list[dict[str, Any]],
global_learnings: list[dict[str, Any]],
) -> str | None:
"""Format learnings into a section to append to the system prompt.
Returns None if there are no learnings to inject.
"""
sections: list[str] = []
if global_learnings:
lines = ["## User Preferences & Profile"]
for item in global_learnings[-15:]: # Keep prompt concise
lines.append(f"- {item['fact']}")
sections.append("\n".join(lines))
if project_learnings:
lines = ["## Project Learnings"]
for item in project_learnings[-15:]:
lines.append(f"- {item['fact']}")
sections.append("\n".join(lines))
return "\n\n".join(sections) if sections else None