igor
initial
58b9362
from threading import Thread
import os
from typing import Iterator
import gradio as gr
import torch
from gradio.components import textbox
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from transformers import AutoModelForCausalLM, TextIteratorStreamer, LlamaTokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM
llm = Llama.from_pretrained(
repo_id="igor-im/flux_prompt_expander",
filename="unsloth.Q8_0.gguf"
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
# seed=1337, # Uncomment to set a specific seed
# n_ctx=2048, # Uncomment to increase the context window
)
def response(prompt):
llm_response = llm(
prompt, # Prompt
max_tokens=200, # Generate up to 32 tokens, set to None to generate up to the end of the context window
echo=True # Echo the prompt back in the output
)
return llm_response.get('choices')[0].get('text')
interface = gr.Interface(fn=response, inputs='textbox', outputs='textbox')
interface.launch()