|
import streamlit as st |
|
from llama_cpp import Llama |
|
|
|
|
|
repo_ir = "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF" |
|
llm = Llama.from_pretrained( |
|
repo_id=repo_ir, |
|
filename="qwen2.5-coder-1.5b-instruct-q8_0.gguf", |
|
verbose=True, |
|
use_mmap=True, |
|
use_mlock=True, |
|
n_threads=4, |
|
n_threads_batch=4, |
|
n_ctx=8000, |
|
) |
|
print(f"{repo_ir} loaded successfully. ✅") |
|
|
|
|
|
|
|
def response_generator(messages): |
|
completion = llm.create_chat_completion( |
|
messages, max_tokens=2048, stream=True, temperature=0.7, top_p=0.95 |
|
) |
|
|
|
for message in completion: |
|
delta = message["choices"][0]["delta"] |
|
if "content" in delta: |
|
yield delta["content"] |
|
|
|
|
|
st.title("CSV TO SQL") |
|
|
|
|
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [] |
|
|
|
|
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.markdown(message["content"]) |
|
|
|
|
|
if prompt := st.chat_input("What is up?"): |
|
|
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
|
|
with st.chat_message("user"): |
|
st.markdown(prompt) |
|
|
|
messages = [{"role": "system", "content": "You are a helpful assistant"}] |
|
|
|
for val in st.session_state.messages: |
|
messages.append(val) |
|
|
|
messages.append({"role": "user", "content": prompt}) |
|
|
|
with st.chat_message("assistant"): |
|
response = st.write_stream(response_generator(messages=messages)) |
|
|
|
st.session_state.messages.append({"role": "assistant", "content": response}) |
|
|