import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch from peft import ( LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model, ) model_name = "google/gemma-2-2b-it" lora_model_name="Anlam-Lab/gemma-2-2b-it-anlamlab-SA-Chatgpt4mini" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16) model = PeftModel.from_pretrained(model, lora_model_name) def generate_response(input_text): inputs = tokenizer(input_text, return_tensors="pt").to(model.device) generation_config = { "max_length": 512, "temperature": 0.01, "do_sample": True, "pad_token_id": tokenizer.pad_token_id, "eos_token_id": tokenizer.eos_token_id, } with torch.no_grad(): outputs = model.generate( **inputs, **generation_config ) response = tokenizer.decode(outputs[0]) return response.split("model\n")[1].split("")[0] iface = gr.Interface( fn=generate_response, inputs=gr.Textbox(lines=5, placeholder="Metninizi buraya girin..."), outputs=gr.Textbox(lines=5, label="Model Çıktısı"), title="Anlam-Lab" ) if __name__ == "__main__": iface.launch()