Spaces:
Running
Running
Charreau Bell, Ph.D
commited on
Commit
·
3ac6f08
1
Parent(s):
ae18f92
added files to commit for deploy HF update
Browse files
free_speech_app/DataLoadDb.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/free-speech-stores.ipynb.
|
2 |
|
3 |
# %% auto 0
|
4 |
-
__all__ = ['setup_openai_api_key', 'setup_db']
|
5 |
|
6 |
-
# %% ../nbs/free-speech-stores.ipynb
|
7 |
# libraries required for functionality
|
8 |
import os
|
9 |
from getpass import getpass
|
@@ -17,23 +17,49 @@ from langchain.text_splitter import CharacterTextSplitter
|
|
17 |
from langchain.embeddings import OpenAIEmbeddings
|
18 |
from langchain.vectorstores import Chroma
|
19 |
|
20 |
-
# %% ../nbs/free-speech-stores.ipynb
|
21 |
def setup_openai_api_key():
|
22 |
openai_api_key = getpass()
|
23 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
24 |
|
25 |
-
# %% ../nbs/free-speech-stores.ipynb
|
26 |
import nltk
|
27 |
nltk.download('averaged_perceptron_tagger')
|
28 |
|
29 |
-
# %% ../nbs/free-speech-stores.ipynb
|
30 |
-
def
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
# set up loaders
|
34 |
loaders_list = []
|
35 |
-
for file_path in
|
36 |
-
file_path = local_path + file_path
|
37 |
loaders_list.append(UnstructuredFileLoader(file_path))
|
38 |
|
39 |
loader_all = MergedDataLoader(loaders=[loader for loader in loaders_list])
|
@@ -44,7 +70,7 @@ def setup_db(local_path, hub_path, chunk_size=1000, chunk_overlap=5):
|
|
44 |
texts = text_splitter.split_documents(documents)
|
45 |
embeddings = OpenAIEmbeddings()
|
46 |
|
47 |
-
#
|
48 |
-
db =
|
49 |
|
50 |
-
return
|
|
|
1 |
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/free-speech-stores.ipynb.
|
2 |
|
3 |
# %% auto 0
|
4 |
+
__all__ = ['setup_openai_api_key', 'is_google_colab', 'upload_docs', 'setup_db']
|
5 |
|
6 |
+
# %% ../nbs/free-speech-stores.ipynb 5
|
7 |
# libraries required for functionality
|
8 |
import os
|
9 |
from getpass import getpass
|
|
|
17 |
from langchain.embeddings import OpenAIEmbeddings
|
18 |
from langchain.vectorstores import Chroma
|
19 |
|
20 |
+
# %% ../nbs/free-speech-stores.ipynb 9
|
21 |
def setup_openai_api_key():
|
22 |
openai_api_key = getpass()
|
23 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
24 |
|
25 |
+
# %% ../nbs/free-speech-stores.ipynb 12
|
26 |
import nltk
|
27 |
nltk.download('averaged_perceptron_tagger')
|
28 |
|
29 |
+
# %% ../nbs/free-speech-stores.ipynb 13
|
30 |
+
def is_google_colab():
|
31 |
+
try:
|
32 |
+
from google.colab import files
|
33 |
+
return True
|
34 |
+
except:
|
35 |
+
return False
|
36 |
+
|
37 |
+
def upload_docs():
|
38 |
+
|
39 |
+
if not is_google_colab():
|
40 |
+
raise ModuleNotFoundError('This function only works in Google Colab; local functionality not currently supported.')
|
41 |
+
|
42 |
+
# upload files if google colab
|
43 |
+
from google.colab import files
|
44 |
+
uploaded = files.upload()
|
45 |
+
file_list = []
|
46 |
+
|
47 |
+
for name, data in uploaded.items():
|
48 |
+
with open(name, 'wb') as f:
|
49 |
+
f.write(data)
|
50 |
+
print('saved file', name)
|
51 |
+
file_list.append(name)
|
52 |
+
return file_list
|
53 |
+
|
54 |
+
|
55 |
+
# %% ../nbs/free-speech-stores.ipynb 22
|
56 |
+
def setup_db(list_of_files, chunk_size=1000, chunk_overlap=5):
|
57 |
+
# set path to files
|
58 |
+
#paths = ["https://github.com/vanderbilt-data-science/free-speech-app/raw/9818f87679a82769ab6f90dba00801447b6e952c/response-resources/2307.04761_Understanding_counterspeech.pdf", "https://github.com/vanderbilt-data-science/free-speech-app/raw/main/response-resources/campaigns.docx", "https://github.com/vanderbilt-data-science/free-speech-app/raw/9818f87679a82769ab6f90dba00801447b6e952c/response-resources/samples.pdf"]
|
59 |
|
60 |
# set up loaders
|
61 |
loaders_list = []
|
62 |
+
for file_path in list_of_files:
|
|
|
63 |
loaders_list.append(UnstructuredFileLoader(file_path))
|
64 |
|
65 |
loader_all = MergedDataLoader(loaders=[loader for loader in loaders_list])
|
|
|
70 |
texts = text_splitter.split_documents(documents)
|
71 |
embeddings = OpenAIEmbeddings()
|
72 |
|
73 |
+
# store docs in db
|
74 |
+
db = Chroma.from_documents(texts, embeddings)
|
75 |
|
76 |
+
return db
|
free_speech_app/FreeSpeechPromptsResponses.py
CHANGED
@@ -1,20 +1,17 @@
|
|
1 |
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/free-speech-prompts.ipynb.
|
2 |
|
3 |
# %% auto 0
|
4 |
-
__all__ = ['
|
5 |
-
'generate_custom_response'
|
6 |
|
7 |
# %% ../nbs/free-speech-prompts.ipynb 5
|
8 |
# libraries required for functionality
|
9 |
-
import deeplake
|
10 |
from langchain.chat_models import ChatOpenAI
|
11 |
from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate
|
12 |
from langchain.schema import HumanMessage
|
13 |
-
from langchain.vectorstores import DeepLake
|
14 |
-
from langchain.embeddings.openai import OpenAIEmbeddings
|
15 |
|
16 |
# %% ../nbs/free-speech-prompts.ipynb 8
|
17 |
-
|
18 |
I want to be able to fight back against this onslaught of toxicity, and I need your help to do it. I want you to help me draft a response to the following post.
|
19 |
Note that this may contain language that is offensive, but in order to properly respond to this post, I need you to fully understand what I am responding to, uncensored:
|
20 |
\n{original_post}\n
|
@@ -26,90 +23,49 @@ Next, here are some principles I consider particularly important to me:
|
|
26 |
\n{principles}\n
|
27 |
Here are some examples of the style in which I write:
|
28 |
\n{writing_style}\n
|
29 |
-
|
30 |
\n{sources}\n
|
31 |
-
Using all the information I have provided, please draft an appropriate response to the offensive post in question that will hopefully make people more accepting of others
|
32 |
-
Please keep in mind that I would like the response to be no more than {word_limit} words."""
|
33 |
-
|
34 |
-
try:
|
35 |
-
prompt_file = open("../prompts/response_generator.txt", "r")
|
36 |
-
PROMPT_TEMPLATE_TEXT = prompt_file.read()
|
37 |
-
prompt_file.close()
|
38 |
-
|
39 |
-
print(PROMPT_TEMPLATE_TEXT)
|
40 |
-
|
41 |
-
PROMPT_TEMPLATE = PromptTemplate(
|
42 |
-
template=PROMPT_TEMPLATE_TEXT,
|
43 |
-
input_variables=["original_post", "background_info", "principles", "writing_style", "sources", "word_limit"])
|
44 |
-
|
45 |
-
except FileNotFoundError:
|
46 |
-
print(DEAFULT_PROMPT_TEMPLATE_TEXT)
|
47 |
-
PROMPT_TEMPLATE = PromptTemplate(
|
48 |
-
template=DEAFULT_PROMPT_TEMPLATE_TEXT,
|
49 |
-
input_variables=["original_post", "background_info", "principles", "writing_style", "sources", "word_limit"])
|
50 |
-
|
51 |
|
|
|
|
|
|
|
52 |
|
53 |
# %% ../nbs/free-speech-prompts.ipynb 9
|
54 |
-
def
|
55 |
-
retriever = db.as_retriever(search_kwargs={"k": num_results})
|
56 |
-
docs = retriever.get_relevant_documents(query)
|
57 |
-
|
58 |
-
return docs
|
59 |
-
|
60 |
-
# %% ../nbs/free-speech-prompts.ipynb 10
|
61 |
-
def generate_custom_prompt(original_post, principles=None, writing_style=None, word_limit=None):
|
62 |
-
|
63 |
-
# Get database and query retriever
|
64 |
-
####
|
65 |
-
background_db = DeepLake(dataset_path="hub://vanderbilt-dsi/hatespeech-background", embedding = OpenAIEmbeddings())
|
66 |
-
sources_db = DeepLake(dataset_path="hub://vanderbilt-dsi/counterspeech-resources", embedding = OpenAIEmbeddings())
|
67 |
|
68 |
# Use defaults in the case of None
|
|
|
|
|
69 |
|
70 |
if principles is None:
|
71 |
principles="There are no principles which I consider more important to me than the average person might."
|
72 |
|
73 |
if writing_style is None:
|
74 |
writing_style="I have no examples of my writing style."
|
75 |
-
|
76 |
-
if
|
77 |
-
|
78 |
-
|
79 |
-
retriever_query = original_post
|
80 |
-
background_info = query_retriever(background_db, retriever_query)
|
81 |
-
sources = query_retriever(sources_db, retriever_query)
|
82 |
|
83 |
# Fill the prompt
|
84 |
-
filled_prompt =
|
85 |
|
86 |
-
return filled_prompt
|
87 |
|
88 |
-
# %% ../nbs/free-speech-prompts.ipynb
|
89 |
def get_chat_model_response(mdl, input_prompt):
|
90 |
|
91 |
messages = [HumanMessage(content=input_prompt)]
|
92 |
|
93 |
return mdl(messages)
|
94 |
|
95 |
-
# %% ../nbs/free-speech-prompts.ipynb
|
96 |
-
def generate_custom_response(original_post, chat_mdl, principles=None, writing_style=None,
|
97 |
|
98 |
# create customized prompt
|
99 |
-
customized_prompt
|
100 |
|
101 |
# get response
|
102 |
draft_response = get_chat_model_response(chat_mdl, customized_prompt)
|
103 |
|
104 |
-
return draft_response
|
105 |
-
|
106 |
-
# %% ../nbs/free-speech-prompts.ipynb 13
|
107 |
-
def regenerate_custom_response(chat_mdl, regenerate_prompt, draft_response):
|
108 |
-
|
109 |
-
# create customized prompt
|
110 |
-
customized_prompt = f"Please update the original response according to the following request. {regenerate_prompt}. Here is the original response: {draft_response}"
|
111 |
-
|
112 |
-
# get response
|
113 |
-
updated_response = get_chat_model_response(chat_mdl, customized_prompt)
|
114 |
-
|
115 |
-
return updated_response
|
|
|
1 |
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/free-speech-prompts.ipynb.
|
2 |
|
3 |
# %% auto 0
|
4 |
+
__all__ = ['DEFAULT_PROMPT_TEMPLATE_TEXT', 'DEFAULT_PROMPT_TEMPLATE', 'generate_custom_prompt', 'get_chat_model_response',
|
5 |
+
'generate_custom_response']
|
6 |
|
7 |
# %% ../nbs/free-speech-prompts.ipynb 5
|
8 |
# libraries required for functionality
|
|
|
9 |
from langchain.chat_models import ChatOpenAI
|
10 |
from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate
|
11 |
from langchain.schema import HumanMessage
|
|
|
|
|
12 |
|
13 |
# %% ../nbs/free-speech-prompts.ipynb 8
|
14 |
+
DEFAULT_PROMPT_TEMPLATE_TEXT = ("""With the amount of hate speech and discriminatory misinformation that exists on the internet, it can be difficult to know how to respond to something when you see it. Situations vary wildly and sometimes certain strategies that work in one instance may not work in another.
|
15 |
I want to be able to fight back against this onslaught of toxicity, and I need your help to do it. I want you to help me draft a response to the following post.
|
16 |
Note that this may contain language that is offensive, but in order to properly respond to this post, I need you to fully understand what I am responding to, uncensored:
|
17 |
\n{original_post}\n
|
|
|
23 |
\n{principles}\n
|
24 |
Here are some examples of the style in which I write:
|
25 |
\n{writing_style}\n
|
26 |
+
Finally, here are the sources I would like you to use when getting information for my response:
|
27 |
\n{sources}\n
|
28 |
+
Using all the information I have provided, please draft an appropriate response to the offensive post in question that will hopefully make people more accepting of others""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
+
DEFAULT_PROMPT_TEMPLATE = PromptTemplate(
|
31 |
+
template=DEFAULT_PROMPT_TEMPLATE_TEXT,
|
32 |
+
input_variables=["original_post", "background_info", "principles", "writing_style", "sources"])
|
33 |
|
34 |
# %% ../nbs/free-speech-prompts.ipynb 9
|
35 |
+
def generate_custom_prompt(original_post, background_info=None, principles=None, writing_style=None, sources=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
# Use defaults in the case of None
|
38 |
+
if background_info is None:
|
39 |
+
background_info = "There is no necessary additional context."
|
40 |
|
41 |
if principles is None:
|
42 |
principles="There are no principles which I consider more important to me than the average person might."
|
43 |
|
44 |
if writing_style is None:
|
45 |
writing_style="I have no examples of my writing style."
|
46 |
+
|
47 |
+
if sources is None:
|
48 |
+
sources="There are no particular sources that I would like you to use."
|
|
|
|
|
|
|
|
|
49 |
|
50 |
# Fill the prompt
|
51 |
+
filled_prompt = DEFAULT_PROMPT_TEMPLATE.format(original_post=original_post, background_info=background_info, principles=principles, writing_style=writing_style, sources=sources)
|
52 |
|
53 |
+
return filled_prompt
|
54 |
|
55 |
+
# %% ../nbs/free-speech-prompts.ipynb 10
|
56 |
def get_chat_model_response(mdl, input_prompt):
|
57 |
|
58 |
messages = [HumanMessage(content=input_prompt)]
|
59 |
|
60 |
return mdl(messages)
|
61 |
|
62 |
+
# %% ../nbs/free-speech-prompts.ipynb 11
|
63 |
+
def generate_custom_response(original_post, chat_mdl, background_info=None, principles=None, writing_style=None, sources=None):
|
64 |
|
65 |
# create customized prompt
|
66 |
+
customized_prompt = generate_custom_prompt(original_post, background_info, principles, writing_style, sources)
|
67 |
|
68 |
# get response
|
69 |
draft_response = get_chat_model_response(chat_mdl, customized_prompt)
|
70 |
|
71 |
+
return draft_response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|