from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline import torch import pickle import streamlit as st from huggingface_hub import InferenceClient client = InferenceClient( "mistralai/Mistral-7B-Instruct-v0.1" ) device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") from translate import Translator def init_session_state(): if 'history' not in st.session_state: st.session_state.history = "" temperature=0.9 max_new_tokens=256 top_p=0.95 repetition_penalty=1.0 generate_kwargs = dict( temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, ) def format_prompt(message, history): prompt = "" for user_prompt, bot_response in history: prompt += f"[INST] {user_prompt} [/INST]" prompt += f" {bot_response} " prompt += f"[INST] {message} [/INST]" return prompt # Initialize session state init_session_state() # pipe = pipeline("text2text-generation", model="google/flan-t5-base") pipe = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True) # pipe = pipeline("text-generation", model="GeneZC/MiniChat-1.5-3B") # pipe = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2") # model_name = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli" # tokenizer = AutoTokenizer.from_pretrained(model_name) # model = AutoModelForSequenceClassification.from_pretrained(model_name) classifier = pipeline("zero-shot-classification", model="MoritzLaurer/mDeBERTa-v3-base-mnli-xnli") # with open('chapter_titles.pkl', 'rb') as file: # titles_astiko = pickle.load(file) # labels1 = ["κληρονομικό", "ακίνητα", "διαζύγιο"] # # labels2 = ["αποδοχή κληρονομιάς", "αποποίηση", "διαθήκη"] # # labels3 = ["μίσθωση", "κυριότητα", "έξωση", "απλήρωτα νοίκια"] # titles_astiko = ["γάμος", "αλλοδαπός", "φορολογία", "κληρονομικά", "στέγη", "οικογενειακό", "εμπορικό","κλοπή","απάτη"] # Load dictionary from the file using pickle with open('my_dict.pickle', 'rb') as file: dictionary = pickle.load(file) def classify(text,labels): output = classifier(text, labels, multi_label=False) return output text = st.text_input('Enter some text:') # Input field for new text if text: labels = list(dictionary) output = classify(text,labels) output = output["labels"][0] labels = list(dictionary[output]) output2 = classify(text,labels) output2 = output2["labels"][0] answer = dictionary[output][output2] # Create a translator object with specified source and target languages translator = Translator(from_lang='el', to_lang='en') translator2 = Translator(from_lang='en', to_lang='el') st.text("H ερώτηση σας σχετίζεται με " + output+ " δίκαιο") # Translate the text from Greek to English answer = translator.translate(answer) text = translator.translate(text) st.text("Πιο συγκεκριμένα σχετίζεται με " + output2) # text_to_translate2 = text[499:999] # translated_text2 = translator.translate(text_to_translate2) # st.session_state.history += "Based on this info only:" + answer +" ,answer this question, by reasoning step by step:" + text # Add new text to history # out = pipe(st.session_state.history, max_new_tokens=256) # Generate output based on history history = st.session_state.history prompt = "Based on this info only:" + answer +" ,answer this question, by reasoning step by step:" + text formatted_prompt = format_prompt(prompt, history) stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) output = "" for response in stream: output += response.token.text yield output return output # st.text(st.session_state.history) # translated_text2 = translator2.translate(out[0]['generated_text']) translated_text2 = translator2.translate(output) st.text(translated_text2) # with st.expander("View Full Output", expanded=False): # st.write(translated_text2, allow_output_mutation=True) # st.text(translated_text2) # st.text("History: " + st.session_state.history) # st.text(output) # st.text(output2) # st.text(answer)