Spaces:
Runtime error
Runtime error
File size: 4,588 Bytes
4e92053 169dc04 4e92053 4484813 4e92053 169dc04 4e92053 26c7407 67dbaea 3356d81 d06aa9f 4e92053 7cad2e4 728dd2c 7cad2e4 247f734 7cad2e4 2bf7bcc 4e92053 169dc04 a7e6f2a d6d3675 579b0a0 7cad2e4 169dc04 0def14f 579b0a0 7cad2e4 579b0a0 2bf7bcc 4e92053 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
import pickle
import streamlit as st
from huggingface_hub import InferenceClient
client = InferenceClient(
"mistralai/Mistral-7B-Instruct-v0.1"
)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
from translate import Translator
def init_session_state():
if 'history' not in st.session_state:
st.session_state.history = ""
temperature=0.9
max_new_tokens=256
top_p=0.95
repetition_penalty=1.0
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
def format_prompt(message, history):
prompt = "<s>"
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
# Initialize session state
init_session_state()
# pipe = pipeline("text2text-generation", model="google/flan-t5-base")
pipe = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
# pipe = pipeline("text-generation", model="GeneZC/MiniChat-1.5-3B")
# pipe = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
# model_name = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSequenceClassification.from_pretrained(model_name)
classifier = pipeline("zero-shot-classification", model="MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
# with open('chapter_titles.pkl', 'rb') as file:
# titles_astiko = pickle.load(file)
# labels1 = ["κληρονομικό", "ακίνητα", "διαζύγιο"]
# # labels2 = ["αποδοχή κληρονομιάς", "αποποίηση", "διαθήκη"]
# # labels3 = ["μίσθωση", "κυριότητα", "έξωση", "απλήρωτα νοίκια"]
# titles_astiko = ["γάμος", "αλλοδαπός", "φορολογία", "κληρονομικά", "στέγη", "οικογενειακό", "εμπορικό","κλοπή","απάτη"]
# Load dictionary from the file using pickle
with open('my_dict.pickle', 'rb') as file:
dictionary = pickle.load(file)
def classify(text,labels):
output = classifier(text, labels, multi_label=False)
return output
text = st.text_input('Enter some text:') # Input field for new text
if text:
labels = list(dictionary)
output = classify(text,labels)
output = output["labels"][0]
labels = list(dictionary[output])
output2 = classify(text,labels)
output2 = output2["labels"][0]
answer = dictionary[output][output2]
# Create a translator object with specified source and target languages
translator = Translator(from_lang='el', to_lang='en')
translator2 = Translator(from_lang='en', to_lang='el')
st.text("H ερώτηση σας σχετίζεται με " + output+ " δίκαιο")
# Translate the text from Greek to English
answer = translator.translate(answer)
text = translator.translate(text)
st.text("Πιο συγκεκριμένα σχετίζεται με " + output2)
# text_to_translate2 = text[499:999]
# translated_text2 = translator.translate(text_to_translate2)
# st.session_state.history += "Based on this info only:" + answer +" ,answer this question, by reasoning step by step:" + text # Add new text to history
# out = pipe(st.session_state.history, max_new_tokens=256) # Generate output based on history
history = st.session_state.history
prompt = "Based on this info only:" + answer +" ,answer this question, by reasoning step by step:" + text
formatted_prompt = format_prompt(prompt, history)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
yield output
return output
# st.text(st.session_state.history)
# translated_text2 = translator2.translate(out[0]['generated_text'])
translated_text2 = translator2.translate(output)
st.text(translated_text2)
# with st.expander("View Full Output", expanded=False):
# st.write(translated_text2, allow_output_mutation=True)
# st.text(translated_text2)
# st.text("History: " + st.session_state.history)
# st.text(output)
# st.text(output2)
# st.text(answer) |