from vllm import LLM, SamplingParams # Initialize the LLM with CPU-only mode llm = LLM(model="Tann-dev/sex-chat-dirty-girlfriend", device="cpu") # Set up sampling parameters sampling_params = SamplingParams(temperature=0.7, max_tokens=50) # Define a prompt to generate text prompt = "Hello, how can I help you?" # Generate text from the model output = llm.generate([prompt], sampling_params=sampling_params) # Print the output print(f"Generated text: {output[0].text}")