import gradio as gr from huggingface_hub import InferenceClient import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # Set the random seed for reproducibility torch.random.manual_seed(0) # Load the model without specifying 'device_map' for CPU usage model = AutoModelForCausalLM.from_pretrained( "AdnanRiaz107/CodePhi-3-mini-0.1Klora", torch_dtype="auto", # Use auto for dtype selection trust_remote_code=True, attn_implementation='eager', load_in_4bit= True,# Keep this if you want to use 'eager' ) # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained("AdnanRiaz107/CodePhi-3-mini-0.1Klora") # Create a text generation pipeline pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) # Generation arguments generation_args = { "max_new_tokens": 500, "return_full_text": False, "temperature": 0.0, "do_sample": False, } # Gradio interface function def generate_response(input_text): # Prepare the input for the model messages = [{"role": "user", "content": input_text}] # Generate output output = pipe(messages, **generation_args) return output[0]['generated_text'] # Create Gradio demo interface demo = gr.Interface( fn=generate_response, inputs=gr.Textbox( lines=2, placeholder="Enter your question here...", label="Your Input", ), outputs=gr.Textbox( label="Model Response", placeholder="Response will be displayed here...", ), title="AI Assistant for Python Code Generation", description="Ask any question or request information, and the AI assistant will provide a response. Try asking about recipes, solving equations, or general inquiries.", examples=[ ["Can you provide ways to eat combinations of bananas and dragonfruits?"], ["What about solving the equation 2x + 3 = 7?"], ["Tell me about the history of the internet."], ], theme="default" # You can change the theme to "compact", "default", "huggingface", etc. ) if __name__ == "__main__": demo.launch()