nickmuchi commited on
Commit
2b8b510
1 Parent(s): e5a33f5

Update variables.py

Browse files
Files changed (1) hide show
  1. variables.py +73 -0
variables.py CHANGED
@@ -1,6 +1,79 @@
1
  ##Variables
2
 
3
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  CONFIG = {
6
  "bearer_token": os.environ.get("bearer_token")
 
1
  ##Variables
2
 
3
  import os
4
+ import streamlit as st
5
+ import pathlib
6
+
7
+ from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.vectorstores import FAISS
10
+ from langchain.chat_models.openai import ChatOpenAI
11
+ from langchain import VectorDBQA
12
+ import pandas as pd
13
+
14
+ from langchain.chat_models import ChatOpenAI
15
+ from langchain.prompts.chat import (
16
+ ChatPromptTemplate,
17
+ SystemMessagePromptTemplate,
18
+ AIMessagePromptTemplate,
19
+ HumanMessagePromptTemplate,
20
+ )
21
+ from langchain.schema import (
22
+ AIMessage,
23
+ HumanMessage,
24
+ SystemMessage
25
+ )
26
+
27
+ @st.experimental_singleton(suppress_st_warning=True)
28
+ def get_latest_file():
29
+ '''Get the latest file from output folder'''
30
+
31
+ # set the directory path
32
+ directory_path = "output/"
33
+
34
+ # create a list of all text files in the directory and sort by modification time
35
+ text_files = sorted(pathlib.Path(directory_path).glob("*.txt"), key=lambda f: f.stat().st_mtime)
36
+
37
+ # get the latest modified file
38
+ latest_file = text_files[-1]
39
+
40
+ # open the file and read its contents
41
+ with open(latest_file, "r") as f:
42
+ file_contents = f.read()
43
+
44
+ return file_contents
45
+
46
+ @st.experimental_singleton(suppress_st_warning=True)
47
+ def process_tweets(file,embed_model,query):
48
+ '''Process file with latest tweets'''
49
+
50
+ # Split tweets int chunks
51
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
52
+ texts = text_splitter.split_text(file)
53
+
54
+ model = bi_enc_dict[embed_model]
55
+
56
+ if model == "hkunlp/instructor-large":
57
+ emb = HuggingFaceInstructEmbeddings(model_name=model,
58
+ query_instruction='Represent the Financial question for retrieving supporting documents: ',
59
+ embed_instruction='Represent the Financial document for retrieval: ')
60
+
61
+ elif model == "sentence-transformers/all-mpnet-base-v2":
62
+ emb = HuggingFaceEmbeddings(model_name=model)
63
+
64
+ docsearch = FAISS.from_texts(texts, emb)
65
+
66
+ chain_type_kwargs = {"prompt": prompt}
67
+ chain = VectorDBQA.from_chain_type(
68
+ ChatOpenAI(temperature=0),
69
+ chain_type="stuff",
70
+ vectorstore=docsearch,
71
+ chain_type_kwargs=chain_type_kwargs
72
+ )
73
+
74
+ result = chain({"query": query}, return_only_outputs=True)
75
+
76
+ return result
77
 
78
  CONFIG = {
79
  "bearer_token": os.environ.get("bearer_token")