Edit model card

Installing Libraries

Make sure these libraries are installed correctly.

  • pip install -q sentencepiece
  • pip install -q transformers
  • pip install -q accelerate
  • pip install --upgrade -q bitsandbytes

import torch
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer

model_path = "Neurai/llama7b"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    "Neurai/llama7b",
    # load_in_8bit=True,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    device_map="auto",
    )
model.eval()
print('model loaded')

SYS_PROMPT = "زرافه چند سال عمر میکند؟"

def response_generate(input_prompt):
    input_ids = tokenizer(input_prompt, return_tensors="pt")
    outputs = model.generate(
        inputs=input_ids["input_ids"].to("cuda"),
        attention_mask=input_ids["attention_mask"].to("cuda"),
        do_sample=True,
        temperature=0.3,
        top_k=50, 
        top_p=0.9,
        max_new_tokens=512,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id
    )
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    return response

print(response_generate(f"{SYS_PROMPT}"))
Downloads last month
68
Safetensors
Model size
6.9B params
Tensor type
F32
·
BF16
·
I8
·
Inference API
This model can be loaded on Inference API (serverless).