File size: 8,428 Bytes
0fdb130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import os
from langchain.prompts import PromptTemplate


labels = ['buy', 'sell', 'hold']
headers_inference_api = {"Authorization": f"Bearer {os.environ['HG_api_key']}"}
# headers_inference_endpoint = {
# 	"Authorization": f"Bearer {os.environ['HG_api_key_personal']}",
#	"Content-Type": "application/json"
#}

summarization_system_msg = """You are the best financial advisor and expert broker. You are \
reading Item 7 from Form 10-K of some company and you want to summarize it into 10 sentences the best as \
possible, so that then the human will analyze your summary and take on serious decisions, whether to \
buy, sell or hold the holdings of that company. There is no need to copy messages from the original text. \
Don't write general things, which aren't important to the investor. Include the most important parts, \
which describes the business growth, predictions for next years etc."""
summarization_user_msg = "Company's description: {company_description}"

summarization_user_prompt = PromptTemplate.from_template(
     template=summarization_user_msg
)

# summarization_template = """<system> You are the best financial advisor and expert broker. You are \
# reading Item 7 from Form 10-K of some company and you want to summarize it into 2-3 sentences the best as \
# possible, so that then the human will analyze your summary and take on serious decisions, whether to \
# buy, sell or hold the holdings of that company. There is no need to copy messages from the original text</system>

# Company's description: {company_description}"""

# summarization_prompt_template = PromptTemplate.from_template(
#      template=summarization_template 
# )

prediction_system_msg = """You are the best financial advisor and expert broker. I am an investor, who seek \
for your help. Below is the description of one big company. You need to reply to me with a \
single word, either 'sell', 'buy' or 'hold'. This word should best describe your recommendation \
on what is the best action for me with the company's holdings."""
prediction_user_msg = """Company's description: {company_description}

So what do you think? Sell, buy or hold?"""
prediction_user_prompt = PromptTemplate.from_template(
     template=prediction_user_msg
)

prediction_template = '<system> ' + prediction_system_msg + ' </system>\n\n' + prediction_user_msg
prediction_prompt = PromptTemplate.from_template(
     template=prediction_template
)





template = """<system> You are the best financial advisor and expert broker. I am an investor, who seek \
for your help. Below is the description of one big company. You need to reply to me with a \
single word, either 'sell', 'buy' or 'hold'. This word should best describe your recommendation \
on what is the best action for me with the company's holdings. </system>

Company's description: {company_description}

So what do you think? Sell, buy or hold?"""
prompt_template = PromptTemplate.from_template(
     template=template
)


chat_structure = """
### Instruction:
{instruction}

### Response:
"""
chat_prompt = PromptTemplate.from_template(
     template=chat_structure
)


instruction = """You are the best financial advisor and expert broker. I am an investor, who seek \
for your help. Below is the description of one big company. You need to reply to me with a \
single word, either 'sell', 'buy' or 'hold'. This word should best describe your recommendation \
on what is the best action for me with the company's holdings.

Company's description: {company_description}

So what do you think? Sell, buy or hold?"""
# text_gen_prompt = PromptTemplate.from_template(
#      template=chat_prompt.format(instruction=instruction_prompt.format(company_description=text.page_content))
# )
instruction_prompt = PromptTemplate.from_template(
     template=instruction
)





# predictor_system_message = """You are the preeminent financial advisor and expert broker, 
# renowned for your unparalleled market acumen. As you meticulously analyze the summary of Item 7 from 
# Form 10-K of some company, your task is to distill your profound insights into a single decisive word, 
# choosing from the options: 'sell', 'buy', or 'hold'. This word reflects your beliefs about the company's 
# future. Your selection should be astutely founded on a 
# comprehensive understanding of all economic facets and nuanced considerations. Remember, your 
# recommendation carries significant weight, influencing critical decisions on whether to divest, invest, 
# or maintain positions in that company. If you predict "buy" it means that the company is a good investment 
# option and is likely to grow in the next year. If you predict "sell" it means that you think that the 
# company won't perform wellduring the upcoming year. Approach this task with the sagacity and expertise that 
# has earned you your esteemed reputation. Please, don't include any warnings that it is difficult to make 
# a definitive recommendation, based on the information provided. Please, don't include any additional text 
# before your answer, don't write 'based on the information provided, I recommend ...'."""





summarizers = {
#     'financial-summarization-pegasus': {
#          'model_name': 'human-centered-summarization/financial-summarization-pegasus',
#          'api_url' : 'https://api-inference.huggingface.co/models/human-centered-summarization/financial-summarization-pegasus',
#          'chunk_size': 1_400,
#          'size': 'large'
#     },
    'bart-finance-pegasus': {
         'model_name': 'amitesh11/bart-finance-pegasus',
         'api_url': 'https://api-inference.huggingface.co/models/amitesh11/bart-finance-pegasus',
         'chunk_size': 2_600,
         'size': 'medium'
    },
#     'financial-summary': {
#          'model_name': 'Spacetimetravel/autotrain-financial-conversation_financial-summary-90517144315',
#          'api_url' : "https://api-inference.huggingface.co/models/Spacetimetravel/autotrain-financial-conversation_financial-summary-90517144315",
#          'chunk_size': 1_800,
#          'size': 'small'
#     },
    'gpt-3.5-turbo': {
         'model_name': 'gpt-3.5-turbo',
         'api_url' : "",
         'chunk_size': 6_000,
         'size': ''
    }
}



# There are 3 inference_types: chatGPT, Inference API and Inference Endpoint
# Add captions to display inference_type
predictors = {
     'gpt-3.5-turbo': {
         'model_name': 'OpenAI-gpt-3.5-turbo',
         'inference_type': 'chatGPT',
         'model_task': 'text-generation'
    },


	'blenderbot-3B': {
         'model_name': 'facebook/blenderbot-3B',
         'api_url' : 'https://api-inference.huggingface.co/models/facebook/blenderbot-3B',
         'inference_type': 'Inference API',
         'model_task': 'conversational'
    },
	'TinyLlama-1.1B': {
         'model_name': 'tog/TinyLlama-1.1B-alpaca-chat-v1.0',
         'api_url' : 'https://api-inference.huggingface.co/models/tog/TinyLlama-1.1B-alpaca-chat-v1.0',
         'inference_type': 'Inference API',
         'model_task': 'conversational'
    },

 
	'open-llama-7b-v2': {
         'model_name': 'VMware/open-llama-7b-v2-open-instruct',
         'api_url' : 'https://audqis4a3tk9s0li.us-east-1.aws.endpoints.huggingface.cloud',
         'inference_type': 'Inference Endpoint',
         'model_task': 'conversational'
    },
 
 
     'gpt2-xl': {
         'model_name': 'gpt2-xl',
         'api_url' : 'https://api-inference.huggingface.co/models/gpt2-xl',
         'inference_type': 'Inference API',
         'model_task': 'text-generation'
    },
	'distilgpt2-finance': {
         'model_name': 'lxyuan/distilgpt2-finetuned-finance',
         'api_url' : 'https://api-inference.huggingface.co/models/lxyuan/distilgpt2-finetuned-finance',
         'inference_type': 'Inference API',
         'model_task': 'text-generation'
    },


     'embedding_mlp_classifier': {
          'dataset_name': 'CabraVC/vector_dataset_2023-12-02_00-32',
          'embedding_model': 'all-distilroberta-v1',
          'embedding_dim': 768,
          'mlp_model': 'embedding_mlp.safetensors',
          
     },
     'embedding_mlp_classifier_gtr-t5-xxl': {
          'dataset_name': 'CabraVC/vector_dataset_2023-12-02_00-32',
          'embedding_model': 'gtr-t5-xxl',
          'embedding_dim': 768,
          'mlp_model': 'embedding_mlp.safetensors',    
     }
}



summary_scores_template = {
    'rouge-1': [],
    'rouge-2': [],
    'rouge-l': []
}