greeklawgpt2 / app.py
sotosbarl's picture
Update app.py
169dc04 verified
raw
history blame
4.59 kB
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
import pickle
import streamlit as st
from huggingface_hub import InferenceClient
client = InferenceClient(
"mistralai/Mistral-7B-Instruct-v0.1"
)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
from translate import Translator
def init_session_state():
if 'history' not in st.session_state:
st.session_state.history = ""
temperature=0.9
max_new_tokens=256
top_p=0.95
repetition_penalty=1.0
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
def format_prompt(message, history):
prompt = "<s>"
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
# Initialize session state
init_session_state()
# pipe = pipeline("text2text-generation", model="google/flan-t5-base")
pipe = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
# pipe = pipeline("text-generation", model="GeneZC/MiniChat-1.5-3B")
# pipe = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
# model_name = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSequenceClassification.from_pretrained(model_name)
classifier = pipeline("zero-shot-classification", model="MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
# with open('chapter_titles.pkl', 'rb') as file:
# titles_astiko = pickle.load(file)
# labels1 = ["κληρονομικό", "ακίνητα", "διαζύγιο"]
# # labels2 = ["αποδοχή κληρονομιάς", "αποποίηση", "διαθήκη"]
# # labels3 = ["μίσθωση", "κυριότητα", "έξωση", "απλήρωτα νοίκια"]
# titles_astiko = ["γάμος", "αλλοδαπός", "φορολογία", "κληρονομικά", "στέγη", "οικογενειακό", "εμπορικό","κλοπή","απάτη"]
# Load dictionary from the file using pickle
with open('my_dict.pickle', 'rb') as file:
dictionary = pickle.load(file)
def classify(text,labels):
output = classifier(text, labels, multi_label=False)
return output
text = st.text_input('Enter some text:') # Input field for new text
if text:
labels = list(dictionary)
output = classify(text,labels)
output = output["labels"][0]
labels = list(dictionary[output])
output2 = classify(text,labels)
output2 = output2["labels"][0]
answer = dictionary[output][output2]
# Create a translator object with specified source and target languages
translator = Translator(from_lang='el', to_lang='en')
translator2 = Translator(from_lang='en', to_lang='el')
st.text("H ερώτηση σας σχετίζεται με " + output+ " δίκαιο")
# Translate the text from Greek to English
answer = translator.translate(answer)
text = translator.translate(text)
st.text("Πιο συγκεκριμένα σχετίζεται με " + output2)
# text_to_translate2 = text[499:999]
# translated_text2 = translator.translate(text_to_translate2)
# st.session_state.history += "Based on this info only:" + answer +" ,answer this question, by reasoning step by step:" + text # Add new text to history
# out = pipe(st.session_state.history, max_new_tokens=256) # Generate output based on history
history = st.session_state.history
prompt = "Based on this info only:" + answer +" ,answer this question, by reasoning step by step:" + text
formatted_prompt = format_prompt(prompt, history)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
yield output
return output
# st.text(st.session_state.history)
# translated_text2 = translator2.translate(out[0]['generated_text'])
translated_text2 = translator2.translate(output)
st.text(translated_text2)
# with st.expander("View Full Output", expanded=False):
# st.write(translated_text2, allow_output_mutation=True)
# st.text(translated_text2)
# st.text("History: " + st.session_state.history)
# st.text(output)
# st.text(output2)
# st.text(answer)