Replace standalone Telegram bot with full CodeAnywhere framework fork. BetterBot shares all framework code and customizes only: - instance.py: BetterBot identity, system prompt, feature flags - tools/site_editing/: list_files, read_file, write_file with auto git push - .env: model defaults and site directory paths - compose/: Docker setup with betterlifesg + memoraiz mounts - deploy script: RackNerd with Infisical secrets
144 lines
6 KiB
Python
144 lines
6 KiB
Python
"""Learning extraction — identifies facts worth persisting from conversation turns."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from typing import Any
|
|
|
|
|
|
def extract_learnings_from_turn(
|
|
user_message: str,
|
|
assistant_message: str,
|
|
) -> tuple[list[tuple[str, str]], list[tuple[str, str]]]:
|
|
"""Extract project-scoped and global learnings from a completed turn.
|
|
|
|
Returns:
|
|
(project_learnings, global_learnings) where each item is (fact, category).
|
|
|
|
Strategy: rule-based extraction (zero cost, no extra LLM call).
|
|
- Detect user-stated preferences, personality cues, contact info → global
|
|
- Detect technical discoveries, patterns, architecture notes → project
|
|
"""
|
|
project: list[tuple[str, str]] = []
|
|
global_facts: list[tuple[str, str]] = []
|
|
|
|
# ── Global learnings (user preferences, personality) ──────────────
|
|
|
|
# Explicit preference statements
|
|
pref_patterns = [
|
|
r"(?:I (?:prefer|like|want|always|never|hate|don't like|can't stand)\s+.+?)[.!?]",
|
|
r"(?:my (?:favorite|preferred|default|usual)\s+.+?(?:is|are)\s+.+?)[.!?]",
|
|
r"(?:I'm (?:a|an)\s+.+?)(?:\s+(?:at|who|that|working))?[.,!?]",
|
|
]
|
|
for pattern in pref_patterns:
|
|
for match in re.finditer(pattern, user_message, re.IGNORECASE):
|
|
fact = match.group(0).strip()
|
|
if 10 < len(fact) < 200:
|
|
global_facts.append((fact, "preference"))
|
|
|
|
# Job/role mentions
|
|
job_patterns = [
|
|
r"(?:I (?:work|am working)\s+(?:at|for|on|with)\s+.+?)[.,!?]",
|
|
r"(?:I'm\s+(?:a|an)\s+\w+(?:\s+\w+)?\s+(?:engineer|developer|designer|manager|analyst|researcher|founder|student))",
|
|
r"(?:my (?:role|title|position|job)\s+(?:is|as)\s+.+?)[.,!?]",
|
|
]
|
|
for pattern in job_patterns:
|
|
for match in re.finditer(pattern, user_message, re.IGNORECASE):
|
|
fact = match.group(0).strip()
|
|
if 10 < len(fact) < 200:
|
|
global_facts.append((fact, "profile"))
|
|
|
|
# Contact/identity info
|
|
identity_patterns = [
|
|
r"(?:my (?:email|github|twitter|handle|username|name|phone|number)\s+(?:is)\s+.+?)[.,!?]",
|
|
]
|
|
for pattern in identity_patterns:
|
|
for match in re.finditer(pattern, user_message, re.IGNORECASE):
|
|
fact = match.group(0).strip()
|
|
if 10 < len(fact) < 200:
|
|
global_facts.append((fact, "identity"))
|
|
|
|
# Family / relationships / personal facts
|
|
family_patterns = [
|
|
r"(?:I (?:have|am married to|live with)\s+(?:a\s+)?(?:wife|husband|partner|spouse)\s+\w+[^.!?]*)[.!?]",
|
|
r"(?:my (?:wife|husband|partner|spouse|daughter|son|child|kid|mother|father|parent|sibling|brother|sister)(?:'s\s+\w+)?\s+(?:is|was|name(?:d| is)?|born)\s+[^.!?]+)[.!?]",
|
|
r"(?:I (?:have|have got)\s+(?:two|three|four|five|\d+)\s+(?:daughters?|sons?|kids?|children)[^.!?]*)[.!?]",
|
|
r"[A-Z][a-z]+\s+born\s+\d{4}",
|
|
]
|
|
for pattern in family_patterns:
|
|
for match in re.finditer(pattern, user_message, re.IGNORECASE):
|
|
fact = match.group(0).strip()
|
|
if 10 < len(fact) < 300:
|
|
global_facts.append((fact, "personal"))
|
|
|
|
# Tone/personality instructions (e.g., "be more concise", "use bullet points")
|
|
tone_patterns = [
|
|
r"(?:(?:be|use|respond|reply|answer|speak|write)\s+(?:more\s+)?(?:concise|brief|detailed|verbose|short|formal|casual|friendly|professional|terse|bullet|markdown|code))[.!?]?",
|
|
r"(?:don't (?:use|add|include|give)\s+.+?(?:explanation|comment|context|preamble|prefix))[.!?]?",
|
|
]
|
|
for pattern in tone_patterns:
|
|
for match in re.finditer(pattern, user_message, re.IGNORECASE):
|
|
fact = match.group(0).strip()
|
|
if 5 < len(fact) < 200:
|
|
global_facts.append((fact, "tone"))
|
|
|
|
# ── Project learnings (technical facts, patterns) ─────────────────
|
|
|
|
# Architecture/discovery from assistant responses
|
|
arch_patterns = [
|
|
r"(?:the\s+\w[\w-]*(?:\s+\w[\w-]*)?\s+(?:uses|runs on|depends on|is backed by|is configured with|requires)\s+.+?)[.,]",
|
|
]
|
|
for pattern in arch_patterns:
|
|
for match in re.finditer(pattern, assistant_message, re.IGNORECASE):
|
|
fact = match.group(0).strip()
|
|
if 15 < len(fact) < 250:
|
|
project.append((fact, "architecture"))
|
|
|
|
# Bug/pattern from assistant
|
|
bug_patterns = [
|
|
r"(?:(?:root cause|the issue|the problem|the bug)\s+(?:is|was)\s+.+?)[.,]",
|
|
r"(?:this\s+(?:happens|occurs)\s+(?:because|due to|when)\s+.+?)[.,]",
|
|
]
|
|
for pattern in bug_patterns:
|
|
for match in re.finditer(pattern, assistant_message, re.IGNORECASE):
|
|
fact = match.group(0).strip()
|
|
if 15 < len(fact) < 250:
|
|
project.append((fact, "bug_pattern"))
|
|
|
|
# Deployment patterns from assistant
|
|
deploy_patterns = [
|
|
r"(?:deployed?\s+(?:via|through|using|to)\s+.+?)[.,]",
|
|
r"(?:the\s+(?:deploy|ci|pipeline|action)\s+(?:uses|runs|triggers)\s+.+?)[.,]",
|
|
]
|
|
for pattern in deploy_patterns:
|
|
for match in re.finditer(pattern, assistant_message, re.IGNORECASE):
|
|
fact = match.group(0).strip()
|
|
if 15 < len(fact) < 250:
|
|
project.append((fact, "deployment"))
|
|
|
|
return project, global_facts
|
|
|
|
|
|
def format_learnings_for_prompt(
|
|
project_learnings: list[dict[str, Any]],
|
|
global_learnings: list[dict[str, Any]],
|
|
) -> str | None:
|
|
"""Format learnings into a section to append to the system prompt.
|
|
|
|
Returns None if there are no learnings to inject.
|
|
"""
|
|
sections: list[str] = []
|
|
|
|
if global_learnings:
|
|
lines = ["## User Preferences & Profile"]
|
|
for item in global_learnings[-15:]: # Keep prompt concise
|
|
lines.append(f"- {item['fact']}")
|
|
sections.append("\n".join(lines))
|
|
|
|
if project_learnings:
|
|
lines = ["## Project Learnings"]
|
|
for item in project_learnings[-15:]:
|
|
lines.append(f"- {item['fact']}")
|
|
sections.append("\n".join(lines))
|
|
|
|
return "\n\n".join(sections) if sections else None
|