Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Run the script and open the link in the browser.
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import os
|
5 |
+
import torch
|
6 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
7 |
+
|
8 |
+
# finetuning with original tokenizer
|
9 |
+
CHECKPOINT_PATH= '/work/pnrr_itserr/latin_model/cineca_checkpoints/finetuning-gpt2_2-nodes_tokenizer-original_lr-6e-4_fcocchi'
|
10 |
+
# finetuning with our embedding and tokenizer
|
11 |
+
##CHECKPOINT_PATH= '/work/pnrr_itserr/latin_model/cineca_checkpoints/finetuning-gpt2_2-nodes_tokenizer-our-embedding_lr-6e-4_fcocchi'
|
12 |
+
# scratch with original tokenizer
|
13 |
+
CHECKPOINT_PATH= '/work/pnrr_itserr/latin_model/cineca_checkpoints/scratch_2-nodes_tokenizer-original_packing_fcocchi'
|
14 |
+
# scratch with latbert tokenizer
|
15 |
+
CHECKPOINT_PATH= '/work/pnrr_itserr/latin_model/cineca_checkpoints/scratch_2-nodes_tokenizer_latbert-original_packing_fcocchi'
|
16 |
+
print(f"Loading model from: {CHECKPOINT_PATH}")
|
17 |
+
|
18 |
+
tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT_PATH)
|
19 |
+
model = AutoModelForCausalLM.from_pretrained(CHECKPOINT_PATH)
|
20 |
+
|
21 |
+
description="""
|
22 |
+
This is a Latin Language Model (LLM) based on GPT-2 and it was trained on a large corpus of Latin texts and can generate text in Latin.
|
23 |
+
Please enter a prompt in Latin to generate text.
|
24 |
+
"""
|
25 |
+
title= "(L<sup>3</sup>) - Latin Large Language Model"
|
26 |
+
article= "hello world ..."
|
27 |
+
examples= ['Accidere ex una scintilla', 'Audacter calumniare,', 'Consolatium misero comites']
|
28 |
+
logo_image= '/homes/fcocchi/itserr/itserr_latin_llm/ITSERR_row_logo.png'
|
29 |
+
|
30 |
+
def generate_text(prompt):
|
31 |
+
if torch.cuda.is_available(): device = torch.device("cuda")
|
32 |
+
else:
|
33 |
+
device = torch.device("cpu")
|
34 |
+
print("No GPU available")
|
35 |
+
|
36 |
+
print("***** Generate *****")
|
37 |
+
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
|
38 |
+
#generated_text = text_generator(prompt, max_length=100)
|
39 |
+
generated_text = text_generator(prompt, max_length=50, do_sample=True, temperature=1.0, repetition_penalty=2.0, truncation=True)
|
40 |
+
return generated_text[0]['generated_text']
|
41 |
+
|
42 |
+
custom_css = """
|
43 |
+
#logo {
|
44 |
+
display: block;
|
45 |
+
margin-left: auto;
|
46 |
+
margin-right: auto;
|
47 |
+
width: 512px;
|
48 |
+
height: 256px;
|
49 |
+
}
|
50 |
+
"""
|
51 |
+
|
52 |
+
with gr.Blocks(css=custom_css) as demo:
|
53 |
+
gr.Image(logo_image, elem_id="logo")
|
54 |
+
gr.Markdown(f"<h1 style='text-align: center;'>{title}</h1>")
|
55 |
+
gr.Markdown(description)
|
56 |
+
|
57 |
+
with gr.Row():
|
58 |
+
with gr.Column():
|
59 |
+
input_text = gr.Textbox(lines=5, placeholder="Enter latin text here...", label="Input Text")
|
60 |
+
with gr.Column():
|
61 |
+
output_text = gr.Textbox(lines=5, placeholder="Output text will appear here...", label="Output Text")
|
62 |
+
|
63 |
+
clean_button = gr.Button("Generate Text")
|
64 |
+
clean_button.click(fn=generate_text, inputs=input_text, outputs=output_text)
|
65 |
+
|
66 |
+
gr.Examples(examples=examples, inputs=input_text)
|
67 |
+
gr.Markdown(article)
|
68 |
+
|
69 |
+
demo.launch(share=True)
|
70 |
+
# iface = gr.Interface(fn=generate_text, inputs="text", outputs="text",
|
71 |
+
# description=description, title=title, examples=examples, article=article)
|