awacke1 commited on
Commit
fc9c564
1 Parent(s): c93920f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -82
app.py CHANGED
@@ -19,66 +19,61 @@ import httpx
19
  import pandas as pd
20
  import datasets as ds
21
 
22
- # -------------------------------------------- For Memory - you will need to set up a dataset and HF_TOKEN ---------
23
- #UseMemory=False
24
  UseMemory=True
25
-
26
-
27
- DATASET_REPO_URL="https://huggingface.co/datasets/awacke1/ChatbotMemory.csv"
28
- DATASET_REPO_ID="awacke1/ChatbotMemory.csv"
29
- DATA_FILENAME="ChatbotMemory.csv"
30
- DATA_FILE=os.path.join("data", DATA_FILENAME)
31
  HF_TOKEN=os.environ.get("HF_TOKEN")
32
 
33
- if UseMemory:
34
- try:
35
- hf_hub_download(
36
- repo_id=DATASET_REPO_ID,
37
- filename=DATA_FILENAME,
38
- cache_dir=DATA_DIRNAME,
39
- force_filename=DATA_FILENAME
40
- )
41
- except:
42
- print("file not found")
43
- repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
44
- #repo.git_pull(rebase=True)
45
- #repo.git_pull()
 
 
 
46
 
47
- def get_df(name: str):
48
- dataset = load_dataset(str, split="train")
49
- return dataset
50
 
51
- #def store_message(name: str, message: str) -> str:
52
- def store_message(name: str, message: str):
53
- if name and message:
54
- #repo.git_pull() # test repull to avoid out of sync rrepo error due to others commits
55
- #repo = repo.git_pull(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN) # test repull to avoid out of sync rrepo error due to others commits
56
- #repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN)
57
- #
58
- with open(DATA_FILE, "a") as csvfile:
59
- writer = csv.DictWriter(csvfile, fieldnames=[ "time", "message", "name", ])
60
- writer.writerow(
61
- {"time": str(datetime.now()), "message": message.strip(), "name": name.strip() }
62
- )
63
- #repo.git_pull(rebase=True)
64
- commit_url = repo.push_to_hub()
65
-
66
- # test api retrieval of any dataset that is saved, then return it...
67
- # app = FastAPI()
68
- # see: https://gradio.app/sharing_your_app/#api-page
69
-
70
- # f=get_df(DATASET_REPO_ID)
71
- # print(f)
72
- #return commit_url
73
- return ""
74
- # ----------------------------------------------- For Memory
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  mname = "facebook/blenderbot-400M-distill"
77
  model = BlenderbotForConditionalGeneration.from_pretrained(mname)
78
  tokenizer = BlenderbotTokenizer.from_pretrained(mname)
79
 
80
  def take_last_tokens(inputs, note_history, history):
81
- """Filter the last 128 tokens"""
82
  if inputs['input_ids'].shape[1] > 128:
83
  inputs['input_ids'] = torch.tensor([inputs['input_ids'][0][-128:].tolist()])
84
  inputs['attention_mask'] = torch.tensor([inputs['attention_mask'][0][-128:].tolist()])
@@ -87,7 +82,6 @@ def take_last_tokens(inputs, note_history, history):
87
  return inputs, note_history, history
88
 
89
  def add_note_to_history(note, note_history):# good example of non async since we wait around til we know it went okay.
90
- """Add a note to the historical information"""
91
  note_history.append(note)
92
  note_history = '</s> <s>'.join(note_history)
93
  return [note_history]
@@ -96,12 +90,21 @@ title = "💬ChatBack🧠💾"
96
  description = """Chatbot With persistent memory dataset allowing multiagent system AI to access a shared dataset as memory pool with stored interactions.
97
  Current Best SOTA Chatbot: https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+ChatBack%21+Are+you+ready+to+rock%3F """
98
 
 
 
 
 
 
 
 
 
99
  def chat(message, history):
100
  history = history or []
101
  if history:
102
  history_useful = ['</s> <s>'.join([str(a[0])+'</s> <s>'+str(a[1]) for a in history])]
103
  else:
104
  history_useful = []
 
105
  history_useful = add_note_to_history(message, history_useful)
106
  inputs = tokenizer(history_useful, return_tensors="pt")
107
  inputs, history_useful, history = take_last_tokens(inputs, history_useful, history)
@@ -114,46 +117,28 @@ def chat(message, history):
114
  df=pd.DataFrame()
115
 
116
  if UseMemory:
117
- store_message(message, response) # Save to dataset -- uncomment with code above, create a dataset to store and add your HF_TOKEN from profile to this repo to use.
118
- df = ds.load_dataset("awacke1/ChatbotMemory.csv")
119
- df = df["train"].to_pandas()
120
- df = df.sort_values(by="time",ascending=False)
121
- #df.sort_index(axis=1, ascending=False)
122
- return history, df
123
- #return df
124
- #return history, df
125
 
126
 
127
- #gr.Interface(
128
- # fn=chat,
129
- # theme="huggingface",
130
- # css=".footer {display:none !important}",
131
- # inputs=["text", "state"],
132
- # #outputs=["chatbot", "state", "text"],
133
- # outputs=["chatbot", "state", "dataframe"],
134
- # title=title,
135
- # allow_flagging="never",
136
- # description=f"Gradio chatbot backed by memory in a dataset repository.",
137
- # article=f"The memory dataset for saves is [{DATASET_REPO_URL}]({DATASET_REPO_URL}) And here: https://huggingface.co/spaces/awacke1/DatasetAnalyzer Code and datasets on chat are here hf tk: https://paperswithcode.com/datasets?q=chat&v=lst&o=newest"
138
- # ).launch(debug=True)
139
-
140
-
141
-
142
  with gr.Blocks() as demo:
143
- gr.Markdown("<h1><center>🍰Gradio chatbot backed by memory in a dataset repository.🎨</center></h1>")
144
- #gr.Markdown("The memory dataset for saves is [{DATASET_REPO_URL}]({DATASET_REPO_URL}) And here: https://huggingface.co/spaces/awacke1/DatasetAnalyzer Code and datasets on chat are here hf tk: https://paperswithcode.com/datasets?q=chat&v=lst&o=newest")
145
 
146
  with gr.Row():
147
  t1 = gr.Textbox(lines=1, default="", label="Chat Text:")
148
- b1 = gr.Button("Send Message")
149
 
150
  with gr.Row(): # inputs and buttons
151
  s1 = gr.State([])
152
- s2 = gr.Markdown()
153
- with gr.Row():
154
  df1 = gr.Dataframe(wrap=True, max_rows=1000, overflow_row_behaviour= "paginate")
155
- #chatoutput = gr.Dataframe(wrap=True, max_rows=1000, overflow_row_behaviour= "paginate", datatype = ["markdown", "markdown"], headers=['url', 'prompt'])
156
-
157
- b1.click(fn=chat, inputs=[t1, s1], outputs=[s1, df1])
158
 
159
- demo.launch(debug=True, show_error=True)
 
 
 
19
  import pandas as pd
20
  import datasets as ds
21
 
 
 
22
  UseMemory=True
 
 
 
 
 
 
23
  HF_TOKEN=os.environ.get("HF_TOKEN")
24
 
25
+ def SaveResult(text, outputfileName):
26
+ basedir = os.path.dirname(__file__)
27
+ savePath = outputfileName
28
+ print("Saving: " + text + " to " + savePath)
29
+ from os.path import exists
30
+ file_exists = exists(savePath)
31
+ if file_exists:
32
+ with open(outputfileName, "a") as f: #append
33
+ f.write(str(text.replace("\n"," ")))
34
+ f.write('\n')
35
+ else:
36
+ with open(outputfileName, "w") as f: #write
37
+ f.write(str("time, message, text\n")) # one time only to get column headers for CSV file
38
+ f.write(str(text.replace("\n"," ")))
39
+ f.write('\n')
40
+ return
41
 
 
 
 
42
 
43
+ def store_message(name: str, message: str, outputfileName: str):
44
+ basedir = os.path.dirname(__file__)
45
+ savePath = outputfileName
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ # if file doesnt exist, create it with labels
48
+ from os.path import exists
49
+ file_exists = exists(savePath)
50
+
51
+ if (file_exists==False):
52
+ with open(savePath, "w") as f: #write
53
+ f.write(str("time, message, text\n")) # one time only to get column headers for CSV file
54
+ if name and message:
55
+ writer = csv.DictWriter(f, fieldnames=["time", "message", "name"])
56
+ writer.writerow(
57
+ {"time": str(datetime.now()), "message": message.strip(), "name": name.strip() }
58
+ )
59
+ df = pd.read_csv(savePath)
60
+ df = df.sort_values(df.columns[0],ascending=False)
61
+ else:
62
+ if name and message:
63
+ with open(savePath, "a") as csvfile:
64
+ writer = csv.DictWriter(csvfile, fieldnames=[ "time", "message", "name", ])
65
+ writer.writerow(
66
+ {"time": str(datetime.now()), "message": message.strip(), "name": name.strip() }
67
+ )
68
+ df = pd.read_csv(savePath)
69
+ df = df.sort_values(df.columns[0],ascending=False)
70
+ return df
71
+
72
  mname = "facebook/blenderbot-400M-distill"
73
  model = BlenderbotForConditionalGeneration.from_pretrained(mname)
74
  tokenizer = BlenderbotTokenizer.from_pretrained(mname)
75
 
76
  def take_last_tokens(inputs, note_history, history):
 
77
  if inputs['input_ids'].shape[1] > 128:
78
  inputs['input_ids'] = torch.tensor([inputs['input_ids'][0][-128:].tolist()])
79
  inputs['attention_mask'] = torch.tensor([inputs['attention_mask'][0][-128:].tolist()])
 
82
  return inputs, note_history, history
83
 
84
  def add_note_to_history(note, note_history):# good example of non async since we wait around til we know it went okay.
 
85
  note_history.append(note)
86
  note_history = '</s> <s>'.join(note_history)
87
  return [note_history]
 
90
  description = """Chatbot With persistent memory dataset allowing multiagent system AI to access a shared dataset as memory pool with stored interactions.
91
  Current Best SOTA Chatbot: https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+ChatBack%21+Are+you+ready+to+rock%3F """
92
 
93
+ def get_base(filename):
94
+ basedir = os.path.dirname(__file__)
95
+ print(basedir)
96
+ #loadPath = basedir + "\\" + filename # works on windows
97
+ loadPath = basedir + filename
98
+ print(loadPath)
99
+ return loadPath
100
+
101
  def chat(message, history):
102
  history = history or []
103
  if history:
104
  history_useful = ['</s> <s>'.join([str(a[0])+'</s> <s>'+str(a[1]) for a in history])]
105
  else:
106
  history_useful = []
107
+
108
  history_useful = add_note_to_history(message, history_useful)
109
  inputs = tokenizer(history_useful, return_tensors="pt")
110
  inputs, history_useful, history = take_last_tokens(inputs, history_useful, history)
 
117
  df=pd.DataFrame()
118
 
119
  if UseMemory:
120
+ #outputfileName = 'ChatbotMemory.csv'
121
+ outputfileName = 'ChatbotMemory3.csv' # Test first time file create
122
+ df = store_message(message, response, outputfileName) # Save to dataset
123
+ basedir = get_base(outputfileName)
124
+
125
+ return history, df, basedir
 
 
126
 
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  with gr.Blocks() as demo:
129
+ gr.Markdown("<h1><center>🍰Gradio chatbot backed by dataframe CSV memory🎨</center></h1>")
 
130
 
131
  with gr.Row():
132
  t1 = gr.Textbox(lines=1, default="", label="Chat Text:")
133
+ b1 = gr.Button("Respond and Retrieve Messages")
134
 
135
  with gr.Row(): # inputs and buttons
136
  s1 = gr.State([])
 
 
137
  df1 = gr.Dataframe(wrap=True, max_rows=1000, overflow_row_behaviour= "paginate")
138
+ with gr.Row(): # inputs and buttons
139
+ file = gr.File(label="File")
140
+ s2 = gr.Markdown()
141
 
142
+ b1.click(fn=chat, inputs=[t1, s1], outputs=[s1, df1, file])
143
+
144
+ demo.launch(debug=True, show_error=True)