minhtcai commited on
Commit
17b826e
1 Parent(s): e7b7459

add interface

Browse files
Files changed (2) hide show
  1. app.py +19 -2
  2. llama_2_inference.py +47 -0
app.py CHANGED
@@ -1,4 +1,21 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
1
  import streamlit as st
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+
4
+ # Set the title of the app
5
+ st.title('LLaMA2Glenda')
6
+
7
+ # Load the model and tokenizer
8
+ model = AutoModelForCausalLM.from_pretrained("tminh/llama-2-7b-glenda")
9
+ tokenizer = AutoTokenizer.from_pretrained("TinyPixel/Llama-2-7B-bf16-sharded")
10
+
11
+ # Create a text input for the prompt
12
+ prompt = st.text_input('Enter your prompt:')
13
+
14
+ # Create a button to trigger the inference
15
+ if st.button('Generate Answer'):
16
+ # Run text generation pipeline
17
+ pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
18
+ result = pipe(f"<s>[INST] {prompt} [/INST]")
19
+ # Display the result
20
+ st.write(result[0]['generated_text'])
21
 
 
 
llama_2_inference.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Llama 2 Inference.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1tS9ep-M5slbxKrGP2btamFUhMM00QkKt
8
+
9
+ # Fine-tune Llama 2 in Google Colab
10
+ > 🗣️ Large Language Model Course
11
+
12
+ ❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne), based on Younes Belkada's [GitHub Gist](https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da). Special thanks to Tolga HOŞGÖR for his solution to empty the VRAM.
13
+
14
+ This notebook runs on a T4 GPU. (Last update: 24 Aug 2023)
15
+ """
16
+
17
+ !pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7
18
+
19
+ import os
20
+ import torch
21
+ from datasets import load_dataset
22
+ from transformers import (
23
+ AutoModelForCausalLM,
24
+ AutoTokenizer,
25
+ BitsAndBytesConfig,
26
+ HfArgumentParser,
27
+ TrainingArguments,
28
+ pipeline,
29
+ logging,
30
+ )
31
+ from peft import LoraConfig, PeftModel
32
+ from trl import SFTTrainer
33
+
34
+ model = AutoModelForCausalLM.from_pretrained("tminh/llama-2-7b-glenda")
35
+
36
+ model_name = "TinyPixel/Llama-2-7B-bf16-sharded"
37
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
38
+
39
+ # Ignore warnings
40
+ logging.set_verbosity(logging.CRITICAL)
41
+
42
+ # Run text generation pipeline with our next model
43
+ prompt = "What can drug D07OAC do?"
44
+ pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
45
+ result = pipe(f"<s>[INST] {prompt} [/INST]")
46
+ print(result[0]['generated_text'])
47
+