umangchaudhry commited on
Commit
6dc8174
·
1 Parent(s): 7564d00

added extra files

Browse files
apikeyex.png ADDED
free_speech_app/DataLoadDb.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/free-speech-stores.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['setup_openai_api_key', 'setup_db']
5
+
6
+ # %% ../nbs/free-speech-stores.ipynb 4
7
+ # libraries required for functionality
8
+ import os
9
+ from getpass import getpass
10
+
11
+ from langchain.chains import RetrievalQA
12
+ from langchain.llms import OpenAI
13
+ from langchain.prompts import PromptTemplate
14
+ from langchain.document_loaders import UnstructuredFileLoader
15
+ from langchain.document_loaders.merge import MergedDataLoader
16
+ from langchain.text_splitter import CharacterTextSplitter
17
+ from langchain.embeddings import OpenAIEmbeddings
18
+ from langchain.vectorstores import Chroma
19
+
20
+ # %% ../nbs/free-speech-stores.ipynb 12
21
+ def setup_openai_api_key():
22
+ openai_api_key = getpass()
23
+ os.environ["OPENAI_API_KEY"] = openai_api_key
24
+
25
+ # %% ../nbs/free-speech-stores.ipynb 15
26
+ import nltk
27
+ nltk.download('averaged_perceptron_tagger')
28
+
29
+ # %% ../nbs/free-speech-stores.ipynb 27
30
+ def setup_db(local_path, hub_path, chunk_size=1000, chunk_overlap=5):
31
+ file_list = os.listdir(local_path)
32
+
33
+ # set up loaders
34
+ loaders_list = []
35
+ for file_path in file_list:
36
+ file_path = local_path + file_path
37
+ loaders_list.append(UnstructuredFileLoader(file_path))
38
+
39
+ loader_all = MergedDataLoader(loaders=[loader for loader in loaders_list])
40
+
41
+ # Split and embed docs
42
+ documents = loader_all.load()
43
+ text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
44
+ texts = text_splitter.split_documents(documents)
45
+ embeddings = OpenAIEmbeddings()
46
+
47
+ # Replace dataset path with relevant dataset name - counterspeech-resources or hatespeech-background
48
+ db = DeepLake.from_documents(texts, dataset_path=hub_path, embedding=embeddings, overwrite=True)
49
+
50
+ return
free_speech_app/FreeSpeechPromptsResponses.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/free-speech-prompts.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['DEAFULT_PROMPT_TEMPLATE_TEXT', 'query_retriever', 'generate_custom_prompt', 'get_chat_model_response',
5
+ 'generate_custom_response', 'regenerate_custom_response']
6
+
7
+ # %% ../nbs/free-speech-prompts.ipynb 5
8
+ # libraries required for functionality
9
+ import deeplake
10
+ from langchain.chat_models import ChatOpenAI
11
+ from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate
12
+ from langchain.schema import HumanMessage
13
+ from langchain.vectorstores import DeepLake
14
+ from langchain.embeddings.openai import OpenAIEmbeddings
15
+
16
+ # %% ../nbs/free-speech-prompts.ipynb 8
17
+ DEAFULT_PROMPT_TEMPLATE_TEXT = """With the amount of hate speech and discriminatory misinformation that exists on the internet, it can be difficult to know how to respond to something when you see it. Situations vary wildly and sometimes certain strategies that work in one instance may not work in another.
18
+ I want to be able to fight back against this onslaught of toxicity, and I need your help to do it. I want you to help me draft a response to the following post.
19
+ Note that this may contain language that is offensive, but in order to properly respond to this post, I need you to fully understand what I am responding to, uncensored:
20
+ \n{original_post}\n
21
+ In helping me respond to this post, there are some things you may want to consider. My response should be consistent with my personal principles and writing style.
22
+ There may be additional information that would be useful for you to understand the context of the original post, and there may be specific sources I would like you to use when coming up with information for the response.
23
+ Firstly, here is some additional context related to the original post:
24
+ \n{background_info}\n
25
+ Next, here are some principles I consider particularly important to me:
26
+ \n{principles}\n
27
+ Here are some examples of the style in which I write:
28
+ \n{writing_style}\n
29
+ Here are the sources I would like you to use when getting information for my response:
30
+ \n{sources}\n
31
+ Using all the information I have provided, please draft an appropriate response to the offensive post in question that will hopefully make people more accepting of others.
32
+ Please keep in mind that I would like the response to be no more than {word_limit} words."""
33
+
34
+ try:
35
+ prompt_file = open("../prompts/response_generator.txt", "r")
36
+ PROMPT_TEMPLATE_TEXT = prompt_file.read()
37
+ prompt_file.close()
38
+
39
+ print(PROMPT_TEMPLATE_TEXT)
40
+
41
+ PROMPT_TEMPLATE = PromptTemplate(
42
+ template=PROMPT_TEMPLATE_TEXT,
43
+ input_variables=["original_post", "background_info", "principles", "writing_style", "sources", "word_limit"])
44
+
45
+ except FileNotFoundError:
46
+ print(DEAFULT_PROMPT_TEMPLATE_TEXT)
47
+ PROMPT_TEMPLATE = PromptTemplate(
48
+ template=DEAFULT_PROMPT_TEMPLATE_TEXT,
49
+ input_variables=["original_post", "background_info", "principles", "writing_style", "sources", "word_limit"])
50
+
51
+
52
+
53
+ # %% ../nbs/free-speech-prompts.ipynb 9
54
+ def query_retriever(db, query, num_results = 3):
55
+ retriever = db.as_retriever(search_kwargs={"k": num_results})
56
+ docs = retriever.get_relevant_documents(query)
57
+
58
+ return docs
59
+
60
+ # %% ../nbs/free-speech-prompts.ipynb 10
61
+ def generate_custom_prompt(original_post, principles=None, writing_style=None, word_limit=None):
62
+
63
+ # Get database and query retriever
64
+ ####
65
+ background_db = DeepLake(dataset_path="hub://vanderbilt-dsi/hatespeech-background", embedding = OpenAIEmbeddings())
66
+ sources_db = DeepLake(dataset_path="hub://vanderbilt-dsi/counterspeech-resources", embedding = OpenAIEmbeddings())
67
+
68
+ # Use defaults in the case of None
69
+
70
+ if principles is None:
71
+ principles="There are no principles which I consider more important to me than the average person might."
72
+
73
+ if writing_style is None:
74
+ writing_style="I have no examples of my writing style."
75
+
76
+ if word_limit is None:
77
+ word_limit="an infinite amount of"
78
+
79
+ retriever_query = original_post
80
+ background_info = query_retriever(background_db, retriever_query)
81
+ sources = query_retriever(sources_db, retriever_query)
82
+
83
+ # Fill the prompt
84
+ filled_prompt = PROMPT_TEMPLATE.format(original_post=original_post, background_info=background_info, principles=principles, writing_style=writing_style, sources=sources, word_limit=word_limit)
85
+
86
+ return filled_prompt, background_info, sources
87
+
88
+ # %% ../nbs/free-speech-prompts.ipynb 11
89
+ def get_chat_model_response(mdl, input_prompt):
90
+
91
+ messages = [HumanMessage(content=input_prompt)]
92
+
93
+ return mdl(messages)
94
+
95
+ # %% ../nbs/free-speech-prompts.ipynb 12
96
+ def generate_custom_response(original_post, chat_mdl, principles=None, writing_style=None, word_limit=None):
97
+
98
+ # create customized prompt
99
+ customized_prompt, background_info, sources = generate_custom_prompt(original_post, principles, writing_style, word_limit)
100
+
101
+ # get response
102
+ draft_response = get_chat_model_response(chat_mdl, customized_prompt)
103
+
104
+ return draft_response, background_info, sources
105
+
106
+ # %% ../nbs/free-speech-prompts.ipynb 13
107
+ def regenerate_custom_response(chat_mdl, regenerate_prompt, draft_response):
108
+
109
+ # create customized prompt
110
+ customized_prompt = f"Please update the original response according to the following request. {regenerate_prompt}. Here is the original response: {draft_response}"
111
+
112
+ # get response
113
+ updated_response = get_chat_model_response(chat_mdl, customized_prompt)
114
+
115
+ return updated_response