Spaces:
Runtime error
Runtime error
File size: 4,858 Bytes
c18db37 2ef4006 c18db37 08af166 6266cf4 5455896 ff0ccdb 6266cf4 de6d7ec 68decb1 efe1021 9b06b1e a1b669a d434e57 85064b1 6481f63 9b06b1e f240a0c 9b06b1e 8f99b37 9b06b1e 08af166 5455896 6481f63 85064b1 c18db37 d434e57 c18db37 c60c8cf 48295f3 c60c8cf c18db37 dd5e8e8 f60697c c18db37 d434e57 c18db37 85064b1 6481f63 20415a9 c18db37 117b6a7 c18db37 6481f63 c18db37 6ca51ed 6481f63 5816dc1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration
import torch
import gradio as gr
from datasets import load_dataset
# PersistDataset -----
import os
import csv
from gradio import inputs, outputs
import huggingface_hub
from huggingface_hub import Repository, hf_hub_download, upload_file
from datetime import datetime
#fastapi is where its at: share your app, share your api
import fastapi
from typing import List, Dict
import httpx
import pandas as pd
# -------------------------------------------- For Memory - you will need to set up a dataset and HF_TOKEN ---------
UseMemory=False
DATASET_REPO_URL="https://huggingface.co/datasets/awacke1/ChatbotMemory.csv"
DATASET_REPO_ID="awacke1/ChatbotMemory.csv"
DATA_FILENAME="ChatbotMemory.csv"
DATA_FILE=os.path.join("data", DATA_FILENAME)
HF_TOKEN=os.environ.get("HF_TOKEN")
if UseMemory:
try:
hf_hub_download(
repo_id=DATASET_REPO_ID,
filename=DATA_FILENAME,
cache_dir=DATA_DIRNAME,
force_filename=DATA_FILENAME
)
except:
print("file not found")
repo = Repository(
local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
)
def get_df(name: str):
dataset = load_dataset(str, split="train")
return dataset
#def store_message(name: str, message: str) -> str:
def store_message(name: str, message: str):
if name and message:
with open(DATA_FILE, "a") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=[ "time", "message", "name", ])
writer.writerow(
{"time": str(datetime.now()), "message": message.strip(), "name": name.strip() }
)
commit_url = repo.push_to_hub()
# test api retrieval of any dataset that is saved, then return it...
# app = FastAPI()
# see: https://gradio.app/sharing_your_app/#api-page
# f=get_df(DATASET_REPO_ID)
# print(f)
#return commit_url
return ""
# ----------------------------------------------- For Memory
mname = "facebook/blenderbot-400M-distill"
model = BlenderbotForConditionalGeneration.from_pretrained(mname)
tokenizer = BlenderbotTokenizer.from_pretrained(mname)
def take_last_tokens(inputs, note_history, history):
"""Filter the last 128 tokens"""
if inputs['input_ids'].shape[1] > 128:
inputs['input_ids'] = torch.tensor([inputs['input_ids'][0][-128:].tolist()])
inputs['attention_mask'] = torch.tensor([inputs['attention_mask'][0][-128:].tolist()])
note_history = ['</s> <s>'.join(note_history[0].split('</s> <s>')[2:])]
history = history[1:]
return inputs, note_history, history
def add_note_to_history(note, note_history):# good example of non async since we wait around til we know it went okay.
"""Add a note to the historical information"""
note_history.append(note)
note_history = '</s> <s>'.join(note_history)
return [note_history]
title = "💬ChatBack🧠💾"
description = """Chatbot With persistent memory dataset allowing multiagent system AI to access a shared dataset as memory pool with stored interactions.
Current Best SOTA Chatbot: https://huggingface.co/facebook/blenderbot-400M-distill?text=Hey+my+name+is+ChatBack%21+Are+you+ready+to+rock%3F """
def chat(message, history):
history = history or []
if history:
history_useful = ['</s> <s>'.join([str(a[0])+'</s> <s>'+str(a[1]) for a in history])]
else:
history_useful = []
history_useful = add_note_to_history(message, history_useful)
inputs = tokenizer(history_useful, return_tensors="pt")
inputs, history_useful, history = take_last_tokens(inputs, history_useful, history)
reply_ids = model.generate(**inputs)
response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
history_useful = add_note_to_history(response, history_useful)
list_history = history_useful[0].split('</s> <s>')
history.append((list_history[-2], list_history[-1]))
#ret =
if UseMemory:
store_message(message, response) # Save to dataset -- uncomment with code above, create a dataset to store and add your HF_TOKEN from profile to this repo to use.
return history, history
gr.Interface(
fn=chat,
theme="huggingface",
css=".footer {display:none !important}",
inputs=["text", "state"],
#outputs=["chatbot", "state", "text"],
outputs=["chatbot", "state"],
title=title,
allow_flagging="never",
description=f"Gradio chatbot backed by memory in a dataset repository.",
article=f"The memory dataset for saves is [{DATASET_REPO_URL}]({DATASET_REPO_URL}) And here: https://huggingface.co/spaces/awacke1/DatasetAnalyzer Code and datasets on chat are here hf tk: https://paperswithcode.com/datasets?q=chat&v=lst&o=newest"
).launch(debug=True)
|