File size: 4,588 Bytes
4e92053
 
 
 
169dc04
 
 
 
 
 
 
4e92053
 
4484813
4e92053
 
 
 
 
169dc04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e92053
 
26c7407
67dbaea
 
3356d81
d06aa9f
4e92053
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7cad2e4
 
 
 
728dd2c
7cad2e4
 
 
 
 
 
247f734
 
 
7cad2e4
 
 
2bf7bcc
4e92053
169dc04
 
 
 
 
 
 
 
 
 
 
 
 
 
a7e6f2a
d6d3675
579b0a0
7cad2e4
169dc04
 
0def14f
579b0a0
7cad2e4
579b0a0
 
2bf7bcc
 
4e92053
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
import pickle
import streamlit as st
from huggingface_hub import InferenceClient

client = InferenceClient(
    "mistralai/Mistral-7B-Instruct-v0.1"
)


device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

from translate import Translator

def init_session_state():
    if 'history' not in st.session_state:
        st.session_state.history = ""


temperature=0.9
max_new_tokens=256
top_p=0.95
repetition_penalty=1.0


generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )

def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt


# Initialize session state
init_session_state()

# pipe = pipeline("text2text-generation", model="google/flan-t5-base")
pipe = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
# pipe = pipeline("text-generation", model="GeneZC/MiniChat-1.5-3B")
# pipe = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
# model_name = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSequenceClassification.from_pretrained(model_name)

classifier = pipeline("zero-shot-classification", model="MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")

# with open('chapter_titles.pkl', 'rb') as file:
#     titles_astiko = pickle.load(file)
# labels1 = ["κληρονομικό", "ακίνητα", "διαζύγιο"]
# # labels2 = ["αποδοχή κληρονομιάς", "αποποίηση", "διαθήκη"]
# # labels3 = ["μίσθωση", "κυριότητα", "έξωση", "απλήρωτα νοίκια"]


# titles_astiko = ["γάμος", "αλλοδαπός", "φορολογία", "κληρονομικά", "στέγη", "οικογενειακό", "εμπορικό","κλοπή","απάτη"]
# Load dictionary from the file using pickle
with open('my_dict.pickle', 'rb') as file:
    dictionary = pickle.load(file)

def classify(text,labels):
    output = classifier(text, labels, multi_label=False)
    
    return output


text = st.text_input('Enter some text:')  # Input field for new text

if text:

    labels = list(dictionary)
    
    output = classify(text,labels)

    output = output["labels"][0]

    labels = list(dictionary[output])

    output2 = classify(text,labels)

    output2 = output2["labels"][0]


    answer = dictionary[output][output2]

    # Create a translator object with specified source and target languages
    translator = Translator(from_lang='el', to_lang='en')
    translator2 = Translator(from_lang='en', to_lang='el')

    st.text("H ερώτηση σας σχετίζεται με " + output+ " δίκαιο")
 

# Translate the text from Greek to English
    answer = translator.translate(answer)
    text = translator.translate(text)

    st.text("Πιο συγκεκριμένα σχετίζεται με " + output2)


# text_to_translate2 = text[499:999]
# translated_text2 = translator.translate(text_to_translate2)

    

    # st.session_state.history += "Based on this info only:" + answer +" ,answer this question, by reasoning step by step:" + text  # Add new text to history
    # out = pipe(st.session_state.history, max_new_tokens=256)  # Generate output based on history

    history = st.session_state.history
    prompt =  "Based on this info only:" + answer +" ,answer this question, by reasoning step by step:" + text
    formatted_prompt = format_prompt(prompt, history)

    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        yield output
    return output


    # st.text(st.session_state.history)
    
    # translated_text2 = translator2.translate(out[0]['generated_text'])
    translated_text2 = translator2.translate(output)

    st.text(translated_text2)

    # with st.expander("View Full Output", expanded=False):
    #     st.write(translated_text2, allow_output_mutation=True)

    # st.text(translated_text2)
    # st.text("History: " + st.session_state.history)

    # st.text(output)
    # st.text(output2)

    # st.text(answer)