from vllm import LLM, SamplingParams

# Initialize the LLM with CPU-only mode
llm = LLM(model="Tann-dev/sex-chat-dirty-girlfriend", device="cpu")

# Set up sampling parameters
sampling_params = SamplingParams(temperature=0.7, max_tokens=50)

# Define a prompt to generate text
prompt = "Hello, how can I help you?"

# Generate text from the model
output = llm.generate([prompt], sampling_params=sampling_params)

# Print the output
print(f"Generated text: {output[0].text}")