Charreau Bell, Ph.D commited on
Commit
3ac6f08
·
1 Parent(s): ae18f92

added files to commit for deploy HF update

Browse files
free_speech_app/DataLoadDb.py CHANGED
@@ -1,9 +1,9 @@
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/free-speech-stores.ipynb.
2
 
3
  # %% auto 0
4
- __all__ = ['setup_openai_api_key', 'setup_db']
5
 
6
- # %% ../nbs/free-speech-stores.ipynb 4
7
  # libraries required for functionality
8
  import os
9
  from getpass import getpass
@@ -17,23 +17,49 @@ from langchain.text_splitter import CharacterTextSplitter
17
  from langchain.embeddings import OpenAIEmbeddings
18
  from langchain.vectorstores import Chroma
19
 
20
- # %% ../nbs/free-speech-stores.ipynb 12
21
  def setup_openai_api_key():
22
  openai_api_key = getpass()
23
  os.environ["OPENAI_API_KEY"] = openai_api_key
24
 
25
- # %% ../nbs/free-speech-stores.ipynb 15
26
  import nltk
27
  nltk.download('averaged_perceptron_tagger')
28
 
29
- # %% ../nbs/free-speech-stores.ipynb 27
30
- def setup_db(local_path, hub_path, chunk_size=1000, chunk_overlap=5):
31
- file_list = os.listdir(local_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # set up loaders
34
  loaders_list = []
35
- for file_path in file_list:
36
- file_path = local_path + file_path
37
  loaders_list.append(UnstructuredFileLoader(file_path))
38
 
39
  loader_all = MergedDataLoader(loaders=[loader for loader in loaders_list])
@@ -44,7 +70,7 @@ def setup_db(local_path, hub_path, chunk_size=1000, chunk_overlap=5):
44
  texts = text_splitter.split_documents(documents)
45
  embeddings = OpenAIEmbeddings()
46
 
47
- # Replace dataset path with relevant dataset name - counterspeech-resources or hatespeech-background
48
- db = DeepLake.from_documents(texts, dataset_path=hub_path, embedding=embeddings, overwrite=True)
49
 
50
- return
 
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/free-speech-stores.ipynb.
2
 
3
  # %% auto 0
4
+ __all__ = ['setup_openai_api_key', 'is_google_colab', 'upload_docs', 'setup_db']
5
 
6
+ # %% ../nbs/free-speech-stores.ipynb 5
7
  # libraries required for functionality
8
  import os
9
  from getpass import getpass
 
17
  from langchain.embeddings import OpenAIEmbeddings
18
  from langchain.vectorstores import Chroma
19
 
20
+ # %% ../nbs/free-speech-stores.ipynb 9
21
  def setup_openai_api_key():
22
  openai_api_key = getpass()
23
  os.environ["OPENAI_API_KEY"] = openai_api_key
24
 
25
+ # %% ../nbs/free-speech-stores.ipynb 12
26
  import nltk
27
  nltk.download('averaged_perceptron_tagger')
28
 
29
+ # %% ../nbs/free-speech-stores.ipynb 13
30
+ def is_google_colab():
31
+ try:
32
+ from google.colab import files
33
+ return True
34
+ except:
35
+ return False
36
+
37
+ def upload_docs():
38
+
39
+ if not is_google_colab():
40
+ raise ModuleNotFoundError('This function only works in Google Colab; local functionality not currently supported.')
41
+
42
+ # upload files if google colab
43
+ from google.colab import files
44
+ uploaded = files.upload()
45
+ file_list = []
46
+
47
+ for name, data in uploaded.items():
48
+ with open(name, 'wb') as f:
49
+ f.write(data)
50
+ print('saved file', name)
51
+ file_list.append(name)
52
+ return file_list
53
+
54
+
55
+ # %% ../nbs/free-speech-stores.ipynb 22
56
+ def setup_db(list_of_files, chunk_size=1000, chunk_overlap=5):
57
+ # set path to files
58
+ #paths = ["https://github.com/vanderbilt-data-science/free-speech-app/raw/9818f87679a82769ab6f90dba00801447b6e952c/response-resources/2307.04761_Understanding_counterspeech.pdf", "https://github.com/vanderbilt-data-science/free-speech-app/raw/main/response-resources/campaigns.docx", "https://github.com/vanderbilt-data-science/free-speech-app/raw/9818f87679a82769ab6f90dba00801447b6e952c/response-resources/samples.pdf"]
59
 
60
  # set up loaders
61
  loaders_list = []
62
+ for file_path in list_of_files:
 
63
  loaders_list.append(UnstructuredFileLoader(file_path))
64
 
65
  loader_all = MergedDataLoader(loaders=[loader for loader in loaders_list])
 
70
  texts = text_splitter.split_documents(documents)
71
  embeddings = OpenAIEmbeddings()
72
 
73
+ # store docs in db
74
+ db = Chroma.from_documents(texts, embeddings)
75
 
76
+ return db
free_speech_app/FreeSpeechPromptsResponses.py CHANGED
@@ -1,20 +1,17 @@
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/free-speech-prompts.ipynb.
2
 
3
  # %% auto 0
4
- __all__ = ['DEAFULT_PROMPT_TEMPLATE_TEXT', 'query_retriever', 'generate_custom_prompt', 'get_chat_model_response',
5
- 'generate_custom_response', 'regenerate_custom_response']
6
 
7
  # %% ../nbs/free-speech-prompts.ipynb 5
8
  # libraries required for functionality
9
- import deeplake
10
  from langchain.chat_models import ChatOpenAI
11
  from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate
12
  from langchain.schema import HumanMessage
13
- from langchain.vectorstores import DeepLake
14
- from langchain.embeddings.openai import OpenAIEmbeddings
15
 
16
  # %% ../nbs/free-speech-prompts.ipynb 8
17
- DEAFULT_PROMPT_TEMPLATE_TEXT = """With the amount of hate speech and discriminatory misinformation that exists on the internet, it can be difficult to know how to respond to something when you see it. Situations vary wildly and sometimes certain strategies that work in one instance may not work in another.
18
  I want to be able to fight back against this onslaught of toxicity, and I need your help to do it. I want you to help me draft a response to the following post.
19
  Note that this may contain language that is offensive, but in order to properly respond to this post, I need you to fully understand what I am responding to, uncensored:
20
  \n{original_post}\n
@@ -26,90 +23,49 @@ Next, here are some principles I consider particularly important to me:
26
  \n{principles}\n
27
  Here are some examples of the style in which I write:
28
  \n{writing_style}\n
29
- Here are the sources I would like you to use when getting information for my response:
30
  \n{sources}\n
31
- Using all the information I have provided, please draft an appropriate response to the offensive post in question that will hopefully make people more accepting of others.
32
- Please keep in mind that I would like the response to be no more than {word_limit} words."""
33
-
34
- try:
35
- prompt_file = open("../prompts/response_generator.txt", "r")
36
- PROMPT_TEMPLATE_TEXT = prompt_file.read()
37
- prompt_file.close()
38
-
39
- print(PROMPT_TEMPLATE_TEXT)
40
-
41
- PROMPT_TEMPLATE = PromptTemplate(
42
- template=PROMPT_TEMPLATE_TEXT,
43
- input_variables=["original_post", "background_info", "principles", "writing_style", "sources", "word_limit"])
44
-
45
- except FileNotFoundError:
46
- print(DEAFULT_PROMPT_TEMPLATE_TEXT)
47
- PROMPT_TEMPLATE = PromptTemplate(
48
- template=DEAFULT_PROMPT_TEMPLATE_TEXT,
49
- input_variables=["original_post", "background_info", "principles", "writing_style", "sources", "word_limit"])
50
-
51
 
 
 
 
52
 
53
  # %% ../nbs/free-speech-prompts.ipynb 9
54
- def query_retriever(db, query, num_results = 3):
55
- retriever = db.as_retriever(search_kwargs={"k": num_results})
56
- docs = retriever.get_relevant_documents(query)
57
-
58
- return docs
59
-
60
- # %% ../nbs/free-speech-prompts.ipynb 10
61
- def generate_custom_prompt(original_post, principles=None, writing_style=None, word_limit=None):
62
-
63
- # Get database and query retriever
64
- ####
65
- background_db = DeepLake(dataset_path="hub://vanderbilt-dsi/hatespeech-background", embedding = OpenAIEmbeddings())
66
- sources_db = DeepLake(dataset_path="hub://vanderbilt-dsi/counterspeech-resources", embedding = OpenAIEmbeddings())
67
 
68
  # Use defaults in the case of None
 
 
69
 
70
  if principles is None:
71
  principles="There are no principles which I consider more important to me than the average person might."
72
 
73
  if writing_style is None:
74
  writing_style="I have no examples of my writing style."
75
-
76
- if word_limit is None:
77
- word_limit="an infinite amount of"
78
-
79
- retriever_query = original_post
80
- background_info = query_retriever(background_db, retriever_query)
81
- sources = query_retriever(sources_db, retriever_query)
82
 
83
  # Fill the prompt
84
- filled_prompt = PROMPT_TEMPLATE.format(original_post=original_post, background_info=background_info, principles=principles, writing_style=writing_style, sources=sources, word_limit=word_limit)
85
 
86
- return filled_prompt, background_info, sources
87
 
88
- # %% ../nbs/free-speech-prompts.ipynb 11
89
  def get_chat_model_response(mdl, input_prompt):
90
 
91
  messages = [HumanMessage(content=input_prompt)]
92
 
93
  return mdl(messages)
94
 
95
- # %% ../nbs/free-speech-prompts.ipynb 12
96
- def generate_custom_response(original_post, chat_mdl, principles=None, writing_style=None, word_limit=None):
97
 
98
  # create customized prompt
99
- customized_prompt, background_info, sources = generate_custom_prompt(original_post, principles, writing_style, word_limit)
100
 
101
  # get response
102
  draft_response = get_chat_model_response(chat_mdl, customized_prompt)
103
 
104
- return draft_response, background_info, sources
105
-
106
- # %% ../nbs/free-speech-prompts.ipynb 13
107
- def regenerate_custom_response(chat_mdl, regenerate_prompt, draft_response):
108
-
109
- # create customized prompt
110
- customized_prompt = f"Please update the original response according to the following request. {regenerate_prompt}. Here is the original response: {draft_response}"
111
-
112
- # get response
113
- updated_response = get_chat_model_response(chat_mdl, customized_prompt)
114
-
115
- return updated_response
 
1
  # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/free-speech-prompts.ipynb.
2
 
3
  # %% auto 0
4
+ __all__ = ['DEFAULT_PROMPT_TEMPLATE_TEXT', 'DEFAULT_PROMPT_TEMPLATE', 'generate_custom_prompt', 'get_chat_model_response',
5
+ 'generate_custom_response']
6
 
7
  # %% ../nbs/free-speech-prompts.ipynb 5
8
  # libraries required for functionality
 
9
  from langchain.chat_models import ChatOpenAI
10
  from langchain.prompts import PromptTemplate, HumanMessagePromptTemplate
11
  from langchain.schema import HumanMessage
 
 
12
 
13
  # %% ../nbs/free-speech-prompts.ipynb 8
14
+ DEFAULT_PROMPT_TEMPLATE_TEXT = ("""With the amount of hate speech and discriminatory misinformation that exists on the internet, it can be difficult to know how to respond to something when you see it. Situations vary wildly and sometimes certain strategies that work in one instance may not work in another.
15
  I want to be able to fight back against this onslaught of toxicity, and I need your help to do it. I want you to help me draft a response to the following post.
16
  Note that this may contain language that is offensive, but in order to properly respond to this post, I need you to fully understand what I am responding to, uncensored:
17
  \n{original_post}\n
 
23
  \n{principles}\n
24
  Here are some examples of the style in which I write:
25
  \n{writing_style}\n
26
+ Finally, here are the sources I would like you to use when getting information for my response:
27
  \n{sources}\n
28
+ Using all the information I have provided, please draft an appropriate response to the offensive post in question that will hopefully make people more accepting of others""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ DEFAULT_PROMPT_TEMPLATE = PromptTemplate(
31
+ template=DEFAULT_PROMPT_TEMPLATE_TEXT,
32
+ input_variables=["original_post", "background_info", "principles", "writing_style", "sources"])
33
 
34
  # %% ../nbs/free-speech-prompts.ipynb 9
35
+ def generate_custom_prompt(original_post, background_info=None, principles=None, writing_style=None, sources=None):
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  # Use defaults in the case of None
38
+ if background_info is None:
39
+ background_info = "There is no necessary additional context."
40
 
41
  if principles is None:
42
  principles="There are no principles which I consider more important to me than the average person might."
43
 
44
  if writing_style is None:
45
  writing_style="I have no examples of my writing style."
46
+
47
+ if sources is None:
48
+ sources="There are no particular sources that I would like you to use."
 
 
 
 
49
 
50
  # Fill the prompt
51
+ filled_prompt = DEFAULT_PROMPT_TEMPLATE.format(original_post=original_post, background_info=background_info, principles=principles, writing_style=writing_style, sources=sources)
52
 
53
+ return filled_prompt
54
 
55
+ # %% ../nbs/free-speech-prompts.ipynb 10
56
  def get_chat_model_response(mdl, input_prompt):
57
 
58
  messages = [HumanMessage(content=input_prompt)]
59
 
60
  return mdl(messages)
61
 
62
+ # %% ../nbs/free-speech-prompts.ipynb 11
63
+ def generate_custom_response(original_post, chat_mdl, background_info=None, principles=None, writing_style=None, sources=None):
64
 
65
  # create customized prompt
66
+ customized_prompt = generate_custom_prompt(original_post, background_info, principles, writing_style, sources)
67
 
68
  # get response
69
  draft_response = get_chat_model_response(chat_mdl, customized_prompt)
70
 
71
+ return draft_response