from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained("lucasdozie/aos-qmodel-hermeai") # Load the quantized model model = torch.jit.load("lucasdozie/aos-qmodel-hermeai/ggml-model-Q4_K_M.gguf")#"path/to/ggml-model-Q4_K_M.gguf") # Prepare input text input_text = "Hello, how are you?" inputs = tokenizer(input_text, return_tensors="pt") # Run inference outputs = model.generate(**inputs) generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) print(generated_text)