from transformers import AutoModelForCausalLM, AutoTokenizer import torch from peft import PeftModel import gradio as gr access_token = "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" model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf",token=access_token) model = PeftModel.from_pretrained(model, "fadliaulawi/Llama-2-7b-finetuned") tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", padding_side="left", use_fast = False,token=access_token) def generate_prompt( instruction, input, label ): # template = { # "description": "Template used by Alpaca-LoRA.", # "prompt_input": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n", # "prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n", # "response_split": "### Response:" # } # [INST] <> # {{ system_prompt }} # <> # {{ user_message }} [/INST] # return '''[INST] <>\n{0}\n<>\n\n{1} {2} [/INST]'''.format(template['prompt_input'].format(instruction=instruction, input=input), template['response_split'], label) template = { "description": "Template used by Alpaca-LoRA.", "prompt_input": "Di bawah ini adalah instruksi yang menjelaskan tugas, dipasangkan dengan masukan yang memberikan konteks lebih lanjut. Tulis tanggapan yang melengkapi permintaan dengan tepat.\n\n### Instruksi:\n{instruction}\n\n### Masukan:\n{input}", #"prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n", "response_split": "### Tanggapan:" } if input: res = template["prompt_input"].format(instruction=instruction, input=input) #else: # res = template["prompt_no_input"].format(instruction=instruction) res = f"{res} \n\n### Tanggapan:\n" if label: res = f"{res}{label}" return res def user(message, history): return "", history + [[message, None]] def generate_and_tokenize_prompt(data_point): full_prompt = generate_prompt( data_point["instruction"], data_point["input"], data_point["output"], ) # print(full_prompt) # return cutoff_len = 256 tokenizer.pad_token = tokenizer.eos_token result = tokenizer( full_prompt, truncation=True, max_length=cutoff_len, padding=True, return_tensors=None, ) if (result["input_ids"][-1] != tokenizer.eos_token_id and len(result["input_ids"]) < cutoff_len): result["input_ids"].append(tokenizer.eos_token_id) result["attention_mask"].append(1) # result["labels"] = result["input_ids"].copy() return result def bot(history,temperature, max_new_tokens, top_p,top_k): user_message = history[-1][0] data = { 'instruction': "Jika Anda seorang dokter, silakan menjawab pertanyaan medis berdasarkan deskripsi pasien.", 'input': user_message, 'output': '' } new_user_input_ids = generate_and_tokenize_prompt(data) # append the new user input tokens to the chat history bot_input_ids = torch.LongTensor([new_user_input_ids['input_ids']]) # generate a response response = model.generate( input_ids=bot_input_ids, pad_token_id=tokenizer.eos_token_id, temperature = float(temperature), max_new_tokens=max_new_tokens, top_p=float(top_p), top_k=top_k, do_sample=True ) # clean up response before returning response = tokenizer.batch_decode(response, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] sections = response.split("###") response = sections[3] response=response.split("Tanggapan:")[1].strip() history[-1][1] = response return history with gr.Blocks() as demo: gr.Markdown( """# ChatDoctor - Llama 2 7b 🩺 A [ChatDoctor - Llama 2 7b](https://huggingface.co/fadliaulawi/Llama-2-7b-finetuned) demo. From the [Llama 2 7b](https://huggingface.co/meta-llama/Llama-2-7b-hf) model and finetuned on the Indonesian translation of [ChatDoctor](https://github.com/Kent0n-Li/ChatDoctor) dataset. """ ) chatbot = gr.Chatbot() msg = gr.Textbox() submit = gr.Button("Submit") clear = gr.Button("Clear") examples = gr.Examples(examples=["Dokter, aku mengalami kelelahan akhir-akhir ini.", "Dokter, aku merasa pusing, lemah dan sakit dada tajam akhir-akhir ini.", "Dokter, aku merasa sangat depresi akhir-akhir ini dan juga mengalami perubahan suhu tubuhku.", "Dokter, saya sudah beberapa minggu mengalami suara serak dan tidak kunjung membaik meski sudah minum obat. Apa masalahnya?" ],inputs=[msg]) gr.Markdown( """## Adjust the additional inputs:""" ) temperature = gr.Slider(0, 5, value=0.8, step=0.1, label='Temperature',info="Controls randomness, higher values increase diversity.") max_length = gr.Slider(0, 1024, value=50, step=1, label='Max Length',info="The maximum numbers of output's tokens.") top_p = gr.Slider(0, 1, value=0.8, step=0.1, label='Top P',info="The cumulative probability cutoff for token selection. Lower values mean sampling from a smaller, more top-weighted nucleus.") top_k = gr.Slider(0, 50, value=10, step=1, label='Top K',info="Sample from the k most likely next tokens at each step. Lower k focuses on higher probability tokens.") submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, [chatbot,temperature,max_length,top_p,top_k], chatbot ) clear.click(lambda: None, None, chatbot, queue=False) demo.queue(concurrency_count=100).launch()