markIA23 commited on
Commit
03f13dc
verified
1 Parent(s): 5a88807

Update app.py

Browse files

el codigo de llama

Files changed (1) hide show
  1. app.py +134 -49
app.py CHANGED
@@ -1,78 +1,163 @@
 
1
  import os
 
2
  from huggingface_hub import login
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
- import gradio as gr
5
-
6
- # Obt茅n el token desde la variable de entorno
7
- hf_token = os.getenv("LLAMA31")
8
 
 
 
9
  if hf_token:
10
- # Autenticaci贸n en Hugging Face utilizando el token
11
  login(token=hf_token)
12
  else:
13
  raise ValueError("Hugging Face token no encontrado. Aseg煤rate de que la variable de entorno HF_TOKEN est茅 configurada.")
14
 
15
- # Configuraci贸n para cargar el modelo en 4 bits utilizando bitsandbytes
16
- bnb_config = BitsAndBytesConfig(load_in_4bit=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # Cargar el modelo y tokenizador
19
  model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 
 
20
  tokenizer = AutoTokenizer.from_pretrained(model_id)
21
- model = AutoModelForCausalLM.from_pretrained(
22
- model_id,
23
- device_map="auto",
24
- quantization_config=bnb_config # Aplicar cuantizaci贸n en 4 bits
25
- )
26
-
27
- # Definir la funci贸n de inferencia del chatbot
28
- def chat_fn(multimodal_message):
29
- # Extraer el texto de la pregunta proporcionada por el usuario
30
- question = multimodal_message["text"]
31
-
32
- # Construir la conversaci贸n inicial con el mensaje del usuario
33
- conversation = [{"role": "user", "content": question}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # Generar los IDs de entrada utilizando el tokenizador del modelo
36
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
 
 
 
37
  input_ids = input_ids.to(model.device)
38
 
39
- # Configurar el streamer para la generaci贸n progresiva de texto
40
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
41
 
42
- # Configurar los argumentos de generaci贸n
43
  generate_kwargs = dict(
44
- input_ids=input_ids,
45
  streamer=streamer,
46
- max_new_tokens=500, # Ajusta esto seg煤n tus necesidades
47
- do_sample=True,
48
- temperature=0.7, # Ajusta la temperatura seg煤n tus necesidades
 
49
  )
50
 
51
- # Iniciar la generaci贸n de texto en un hilo separado
52
  t = Thread(target=model.generate, kwargs=generate_kwargs)
53
  t.start()
54
 
55
- # Iterar sobre los tokens generados y construir la respuesta
56
- message = ""
57
  for text in streamer:
58
- message += text
59
- yield message
 
60
 
61
- # Crear la interfaz de usuario utilizando Gradio
62
- with gr.Blocks() as demo:
63
- # T铆tulo de la aplicaci贸n en espa帽ol
64
- gr.Markdown("# 馃攳 Chatbot Analizador de Documentos")
65
-
66
- # Cuadro de texto para mostrar la respuesta generada, etiquetado en espa帽ol
67
- response = gr.Textbox(lines=5, label="Respuesta")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- # Campo de texto multimodal para que el usuario suba un archivo e ingrese una pregunta, en espa帽ol
70
- chat = gr.MultimodalTextbox(file_types=["image"], interactive=True,
71
- show_label=False, placeholder="Sube una imagen del documento haciendo clic en '+' y haz una pregunta.")
72
 
73
- # Asignar la funci贸n chat_fn para que se ejecute cuando el usuario env铆e un mensaje en el chat
74
- chat.submit(chat_fn, inputs=chat, outputs=response)
75
-
76
- # Lanzar la aplicaci贸n si este archivo es ejecutado directamente
77
  if __name__ == "__main__":
78
- demo.launch()
 
1
+ import gradio as gr
2
  import os
3
+ import spaces
4
  from huggingface_hub import login
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
+ from threading import Thread
 
 
 
7
 
8
+ # Autenticaci贸n en Hugging Face
9
+ hf_token = os.getenv("HF_TOKEN") # Aseg煤rate de configurar la variable de entorno HF_TOKEN con tu token
10
  if hf_token:
 
11
  login(token=hf_token)
12
  else:
13
  raise ValueError("Hugging Face token no encontrado. Aseg煤rate de que la variable de entorno HF_TOKEN est茅 configurada.")
14
 
15
+ TITLE = '''
16
+ <h1 style="text-align: center;">Meta Llama3.1 8B <a href="https://huggingface.co/spaces/ysharma/Chat_with_Meta_llama3_1_8b?duplicate=true" id="duplicate-button"><button style="color:white">Duplicate this Space</button></a></h1>
17
+ '''
18
+
19
+ DESCRIPTION = '''
20
+ <div>
21
+ <p>This Space demonstrates the instruction-tuned model <a href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct"><b>Meta Llama3.1 8b Chat</b></a>. Feel free to play with this demo, or duplicate to run privately!</p>
22
+ <p>馃敤 Interested in trying out more powerful Instruct versions of Llama3.1? Check out the <a href="https://huggingface.co/chat/"><b>Hugging Chat</b></a> integration for 馃悩 Meta Llama 3.1 70b, and 馃 Meta Llama 3.1 405b</p>
23
+ <p>馃攷 For more details about the Llama3.1 release and how to use the model with <code>transformers</code>, take a look <a href="https://huggingface.co/blog/llama31">at our blog post</a>.</p>
24
+ </div>
25
+ '''
26
+
27
+ LICENSE = """
28
+ <p/>
29
+ ---
30
+ Built with Llama
31
+ """
32
+
33
+ PLACEHOLDER = """
34
+ <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
35
+ <img src="https://ysharma-dummy-chat-app.hf.space/file=/tmp/gradio/c21ff9c8e7ecb2f7d957a72f2ef03c610ac7bbc4/Meta_lockup_positive%20primary_RGB_small.jpg" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
36
+ <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Meta llama3.1</h1>
37
+ <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
38
+ </div>
39
+ """
40
+
41
+ css = """
42
+ h1 {
43
+ text-align: center;
44
+ display: block;
45
+ display: flex;
46
+ align-items: center;
47
+ justify-content: center;
48
+ }
49
+ #duplicate-button {
50
+ margin-left: 10px;
51
+ color: white;
52
+ background: #1565c0;
53
+ border-radius: 100vh;
54
+ font-size: 1rem;
55
+ padding: 3px 5px;
56
+ }
57
+ """
58
 
 
59
  model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
60
+
61
+ # Load the tokenizer and model
62
  tokenizer = AutoTokenizer.from_pretrained(model_id)
63
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
64
+ terminators = [
65
+ tokenizer.eos_token_id,
66
+ tokenizer.convert_tokens_to_ids("")
67
+ ]
68
+
69
+ MAX_INPUT_TOKEN_LENGTH = 4096
70
+
71
+ # Gradio inference function
72
+ @spaces.GPU(duration=120)
73
+ def chat_llama3_1_8b(message: str,
74
+ history: list,
75
+ temperature: float,
76
+ max_new_tokens: int
77
+ ) -> str:
78
+ """
79
+ Generate a streaming response using the llama3-8b model.
80
+ Args:
81
+ message (str): The input message.
82
+ history (list): The conversation history used by ChatInterface.
83
+ temperature (float): The temperature for generating the response.
84
+ max_new_tokens (int): The maximum number of new tokens to generate.
85
+ Returns:
86
+ str: The generated response.
87
+ """
88
+ conversation = []
89
+ for user, assistant in history:
90
+ conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
91
+ conversation.append({"role": "user", "content": message})
92
 
 
93
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
94
+ if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
95
+ input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
96
+ gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
97
  input_ids = input_ids.to(model.device)
98
 
 
99
  streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
100
 
 
101
  generate_kwargs = dict(
102
+ input_ids= input_ids,
103
  streamer=streamer,
104
+ max_new_tokens=max_new_tokens,
105
+ do_sample=temperature != 0, # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
106
+ temperature=temperature,
107
+ eos_token_id=terminators,
108
  )
109
 
 
110
  t = Thread(target=model.generate, kwargs=generate_kwargs)
111
  t.start()
112
 
113
+ outputs = []
 
114
  for text in streamer:
115
+ outputs.append(text)
116
+ yield "".join(outputs)
117
+
118
 
119
+ # Gradio block
120
+ chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
121
+
122
+ with gr.Blocks(fill_height=True, css=css) as demo:
123
+
124
+ gr.Markdown(TITLE)
125
+ gr.Markdown(DESCRIPTION)
126
+ #gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
127
+ gr.ChatInterface(
128
+ fn=chat_llama3_1_8b,
129
+ chatbot=chatbot,
130
+ fill_height=True,
131
+ examples_per_page=3,
132
+ additional_inputs_accordion=gr.Accordion(label="鈿欙笍 Parameters", open=False, render=False),
133
+ additional_inputs=[
134
+ gr.Slider(minimum=0,
135
+ maximum=1,
136
+ step=0.1,
137
+ value=0.95,
138
+ label="Temperature",
139
+ render=False),
140
+ gr.Slider(minimum=128,
141
+ maximum=4096,
142
+ step=1,
143
+ value=512,
144
+ label="Max new tokens",
145
+ render=False ),
146
+ ],
147
+ examples=[
148
+ ["There's a llama in my garden 馃槺 What should I do?"],
149
+ ["What is the best way to open a can of worms?"],
150
+ ["The odd numbers in this group add up to an even number: 15, 32, 5, 13, 82, 7, 1. "],
151
+ ['How to setup a human base on Mars? Give short answer.'],
152
+ ['Explain theory of relativity to me like I鈥檓 8 years old.'],
153
+ ['What is 9,000 * 9,000?'],
154
+ ['Write a pun-filled happy birthday message to my friend Alex.'],
155
+ ['Justify why a penguin might make a good king of the jungle.']
156
+ ],
157
+ cache_examples=False,
158
+ )
159
 
160
+ gr.Markdown(LICENSE)
 
 
161
 
 
 
 
 
162
  if __name__ == "__main__":
163
+ demo.launch()