Spaces:
No application file
No application file
import gradio as gr | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
import os | |
import torch | |
print(f"Is CUDA available: {torch.cuda.is_available()}") | |
# True | |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") | |
# print(f"CMAKE_ARGS={os.environ['CMAKE_ARGS']}") | |
# print(f"FORCE_CMAKE={os.environ['FORCE_CMAKE']}") | |
print(f'Llama={Llama.__name__}') | |
models_path = 'models/' | |
if not os.path.exists(models_path): | |
os.makedirs(models_path) | |
downloaded_model_path = hf_hub_download(repo_id="miqudev/miqu-1-70b", | |
filename="miqu-1-70b.q4_k_m.gguf",local_dir = models_path) | |
print(f'Downloaded path: {downloaded_model_path}') | |
print('Initializing model...') | |
llm = Llama( | |
model_path=downloaded_model_path, | |
n_ctx=4096, | |
n_threads=10, | |
n_gpu_layers=100, | |
temp=0.7, | |
n_batch = 512, | |
n_predict = -1, | |
n_keep = 0 | |
) | |
print('Model loaded.') | |
def mix_query(query, history): | |
output = llm( | |
f"[INST] {query} [/INST]", | |
max_tokens=1024, | |
stop=["</s>"], | |
echo=False | |
) | |
print(output['choices'][0]['text']) | |
return output['choices'][0]['text'] | |
demo = gr.ChatInterface(fn=mix_query, | |
examples=["Explain the Fermi paradox"], title="TARS", | |
theme="soft") | |
demo.launch(share=True,server_name='0.0.0.0') |