Agent-S mllm.py, module.py

by myeongjaechoi 2025. 3. 30. 16:51

import base64 #이미지지 인코딩
import numpy as np # 배열 형태의 데이터 처리
from gui_agents.s2.core.engine import (
    LMMEngineAnthropic,
    LMMEngineAzureOpenAI,
    LMMEngineHuggingFace,
    LMMEngineOpenAI,
    LMMEnginevLLM,
)

데이터 타입 매핑

OpenAI는image -> image_url
Anthropic는 image_url -> image

data_type_map = {
    "openai": {"image_url": "image_url"},
    "anthropic": {"image_url": "image"},
}

LMMAgent 클래스 초기화

engine_params : 엔진 설정 매개변수로, 어떤 언어 모델을 사용할지 지정
system_prompt : 언어 모델에 제공할 시스템 프롬프트
engine : 직접 엔진 객체를 제공할 때 사용

class LMMAgent:
    def __init__(self, engine_params=None, system_prompt=None, engine=None):
        if engine is None:
            if engine_params is not None:
                engine_type = engine_params.get("engine_type")
                if engine_type == "openai":
                    self.engine = LMMEngineOpenAI(**engine_params)
                elif engine_type == "anthropic":
                    self.engine = LMMEngineAnthropic(**engine_params)
                elif engine_type == "azure":
                    self.engine = LMMEngineAzureOpenAI(**engine_params)
                elif engine_type == "vllm":
                    self.engine = LMMEnginevLLM(**engine_params)
                elif engine_type == "huggingface":
                    self.engine = LMMEngineHuggingFace(**engine_params)
                else:
                    raise ValueError("engine_type is not supported")
            else:
                raise ValueError("engine_params must be provided")
        else:
            self.engine = engine

        self.messages = []  # 빈 메시지 리스트

        if system_prompt:
            self.add_system_prompt(system_prompt)
        else:
            self.add_system_prompt("You are a helpful assistant.")

이미지 인코딩 메서드

입력이 문자열(파일 경로)인 경우: 파일을 열어 내용을 읽고 base64로 인코딩
그렇지 않은 경우: 입력을 직접 base64로 인코딩

def encode_image(self, image_content):
    # 이미지 콘텐츠가 파일 경로인 경우, 이미지 콘텐츠 타입을 확인
    if isinstance(image_content, str):
        with open(image_content, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode("utf-8")
    else:
        return base64.b64encode(image_content).decode("utf-8")

초기화 메서드 : 메시지 리스트를 초기화하고 시스템 프롬프트만 포함하도록 함 -> 새로운 대화 시작할 때 유용

def reset(self):
    self.messages = [
        {
            "role": "system",
            "content": [{"type": "text", "text": self.system_prompt}],
        }
    ]

시스템 프롬프트 설정 메서드

메시지 리스트가 비어있지 않으면: 첫 번째 메시지를 새 시스템 프롬프트로 업데이트
메시지 리스트가 비어있으면: 새 시스템 프롬프트 메시지를 추가

def add_system_prompt(self, system_prompt):
    self.system_prompt = system_prompt
    if len(self.messages) > 0:
        self.messages[0] = {
            "role": "system",
            "content": [{"type": "text", "text": self.system_prompt}],
        }
    else:
        self.messages.append(
            {
                "role": "system",
                "content": [{"type": "text", "text": self.system_prompt}],
            }
        )

근데 생각해보니 나는 이미지 파일이 아닌 텍스트로만 사용자가 입력하게 할 것이고, 메시지 삭제도 필요없다. 그래서 임의로 작성해 보았다.

class SimplifiedLMMAgent:
    def __init__(self, engine, system_prompt="You are a helpful assistant."):
        """
        간단한 초기화 메서드. 특정 엔진과 시스템 프롬프트를 설정합니다.
        """
        self.engine = engine  # 사용할 언어 모델 엔진
        self.system_prompt = system_prompt  # 시스템 프롬프트
        self.messages = [{"role": "system", "content": self.system_prompt}]  # 초기 메시지 리스트

    def reset(self):
        """
        메시지 리스트를 초기화합니다.
        """
        self.messages = [{"role": "system", "content": self.system_prompt}]

    def add_message(self, text_content):
        """
        텍스트 메시지를 추가합니다.
        """
        self.messages.append({"role": "user", "content": text_content})

    def get_response(self):
        """
        현재 메시지 리스트를 기반으로 언어 모델 응답을 생성합니다.
        """
        return self.engine.generate(self.messages)

module.py

# 타입 힌트를 위한 모듈 임포트 (파이썬에게 '이 변수는 이런 종류일 수 있다'고 알려주는 역할)
from typing import Dict, Optional
# 다른 파일에 있는 LMMAgent 클래스 가져오기 (AI 도우미를 만들기 위한 틀)
from gui_agents.s2.core.mllm import LMMAgent

### 기본 모듈 클래스 (여러 기능을 가진 AI 도우미들의 공통 부모)
class BaseModule:
    # 초기 설정 메서드 (새 객체 생성 시 자동으로 실행)
    def __init__(self, engine_params: Dict, platform: str):
        self.engine_params = engine_params  # 엔진 설정 저장 (예: AI 모델 종류, 온도 등)
        self.platform = platform  # 사용 플랫폼 정보 저장 (예: Windows, iOS 등)

    ### AI 도우미 생성 메서드 (실제 작업을 수행할 로봇 생성기)
    def _create_agent(
        self, 
        system_prompt: str = None,  # 시스템 명령어 (예: "너는 번역 도우미야")
        engine_params: Optional[Dict] = None  # 선택적 엔진 설정
    ) -> LMMAgent:  # LMMAgent 타입 반환 (결과물은 항상 AI 도우미)
        
        """새 LMMAgent 인스턴스 생성 (주석: 이 기능은 AI 도우미를 새로 만듭니다)"""
        agent = LMMAgent(engine_params or self.engine_params)  # 설정값 선택 적용
        if system_prompt:  # 명령어가 있으면 추가
            agent.add_system_prompt(system_prompt)  # AI에게 역할 부여
        return agent  # 완성된 AI 도우미 반환

'출장 자동화 시스템' 카테고리의 다른 글

Agent-S agent_s.py (0)	2025.03.31
Agent-s knowledge.py (0)	2025.03.31
Agent-S grounding.py (0)	2025.03.30
Agent-s engine.py (0)	2025.03.30
Agent-S cli_app.py, common_utils.py, ocr_server.py, query_perplexica.py, procedural_memory.py (0)	2025.03.30