from llama_cpp import Llama
import gradio as gr

# モデルを読み込む
llm = Llama.from_pretrained(
    repo_id="mradermacher/ultiima-78B-i1-GGUF",
    filename="ultiima-78B.i1-IQ1_S.gguf",
)

# モデルからのレスポンスを生成する関数
def generate_response(prompt):
    response = llm(prompt, max_tokens=150)
    return response['choices'][0]['text'].strip()

# Gradio インターフェースを作成
def chat_with_model(input_text, history=None):
    if history is None:
        history = []
    history.append(("You", input_text))
    response = generate_response(input_text)
    history.append(("Model", response))
    return history, history

# Gradio インターフェースの設定
iface = gr.Interface(
    fn=chat_with_model,
    inputs=["text", "state"],
    outputs=["chatbot", "state"],
    title="ULTIIMA-78B Chat Interface",
    description="ULTIIMA-78B モデルを使用したチャットインターフェースです。",
)

# インターフェースを起動
iface.launch()