CogwiseAI's picture
Upload app.py
28e56c1
raw
history blame
7.64 kB
import streamlit as st
import uuid
import sys
import requests
from peft import *
import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
LoraConfig,
PeftConfig,
get_peft_model,
prepare_model_for_kbit_training,
)
from transformers import (
AutoConfig,
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
)
USER_ICON = "images/user-icon.png"
AI_ICON = "images/ai-icon.png"
MAX_HISTORY_LENGTH = 5
if 'user_id' in st.session_state:
user_id = st.session_state['user_id']
else:
user_id = str(uuid.uuid4())
st.session_state['user_id'] = user_id
if 'chat_history' not in st.session_state:
st.session_state['chat_history'] = []
if "chats" not in st.session_state:
st.session_state.chats = [
{
'id': 0,
'question': '',
'answer': ''
}
]
if "questions" not in st.session_state:
st.session_state.questions = []
if "answers" not in st.session_state:
st.session_state.answers = []
if "input" not in st.session_state:
st.session_state.input = ""
st.markdown("""
<style>
.block-container {
padding-top: 32px;
padding-bottom: 32px;
padding-left: 0;
padding-right: 0;
}
.element-container img {
background-color: #000000;
}
.main-header {
font-size: 24px;
}
</style>
""", unsafe_allow_html=True)
def write_top_bar():
col1, col2, col3 = st.columns([1,10,2])
with col1:
st.image(AI_ICON, use_column_width='always')
with col2:
header = "Cogwise Intelligent Assistant"
st.write(f"<h3 class='main-header'>{header}</h3>", unsafe_allow_html=True)
with col3:
clear = st.button("Clear Chat")
return clear
clear = write_top_bar()
if clear:
st.session_state.questions = []
st.session_state.answers = []
st.session_state.input = ""
st.session_state["chat_history"] = []
def handle_input():
input = st.session_state.input
question_with_id = {
'question': input,
'id': len(st.session_state.questions)
}
st.session_state.questions.append(question_with_id)
chat_history = st.session_state["chat_history"]
if len(chat_history) == MAX_HISTORY_LENGTH:
chat_history = chat_history[:-1]
# api_url = "https://9pl792yjf9.execute-api.us-east-1.amazonaws.com/beta/chatcogwise"
# api_request_data = {"question": input, "session": user_id}
# api_response = requests.post(api_url, json=api_request_data)
# result = api_response.json()
# answer = result['answer']
# !pip install -Uqqq pip --progress-bar off
# !pip install -qqq bitsandbytes == 0.39.0
# !pip install -qqqtorch --2.0.1 --progress-bar off
# !pip install -qqq -U git + https://github.com/huggingface/transformers.git@e03a9cc --progress-bar off
# !pip install -qqq -U git + https://github.com/huggingface/peft.git@42a184f --progress-bar off
# !pip install -qqq -U git + https://github.com/huggingface/accelerate.git@c9fbb71 --progress-bar off
# !pip install -qqq datasets == 2.12.0 --progress-bar off
# !pip install -qqq loralib == 0.1.1 --progress-bar off
# !pip install einops
import os
# from pprint import pprint
# import json
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# notebook_login()
# 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
# """### Load dataset"""
from datasets import load_dataset
dataset_name = "nisaar/Lawyer_GPT_India"
# dataset_name = "patrick11434/TEST_LLM_DATASET"
dataset = load_dataset(dataset_name, split="train")
# """## Load adapters from the Hub
# You can also directly load adapters from the Hub using the commands below:
# """
# change peft_model_id
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
load_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)
peft_model_id = "nisaar/falcon7b-Indian_Law_150Prompts"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
return_dict=True,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token
model = PeftModel.from_pretrained(model, peft_model_id)
"""## Inference
You can then directly use the trained model or the model that you have loaded from the 🤗 Hub for inference as you would do it usually in `transformers`.
"""
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config_temperature = 1
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config_eod_token_id = tokenizer.eos_token_id
DEVICE = "cuda:0"
# Commented out IPython magic to ensure Python compatibility.
# %%time
# prompt = f"""
# <human>: Who appoints the Chief Justice of India?
# <assistant>:
# """.strip()
#
# encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
# with torch.inference_mode():
# outputs = model.generate(
# input_ids=encoding.attention_mask,
# generation_config=generation_config,
# )
# print(tokenizer.decode(outputs[0],skip_special_tokens=True))
def generate_response(question: str) -> str:
prompt = f"""
<human>: {question}
<assistant>:
""".strip()
encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
with torch.inference_mode():
outputs = model.generate(
input_ids=encoding.input_ids,
attention_mask=encoding.attention_mask,
generation_config=generation_config,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
assistant_start = '<assistant>:'
response_start = response.find(assistant_start)
return response[response_start + len(assistant_start):].strip()
# prompt = "Debate the merits and demerits of introducing simultaneous elections in India?"
prompt=input
answer=print(generate_response(prompt))
# answer='Yes'
chat_history.append((input, answer))
st.session_state.answers.append({
'answer': answer,
'id': len(st.session_state.questions)
})
st.session_state.input = ""
def write_user_message(md):
col1, col2 = st.columns([1,12])
with col1:
st.image(USER_ICON, use_column_width='always')
with col2:
st.warning(md['question'])
def render_answer(answer):
col1, col2 = st.columns([1,12])
with col1:
st.image(AI_ICON, use_column_width='always')
with col2:
st.info(answer)
def write_chat_message(md, q):
chat = st.container()
with chat:
render_answer(md['answer'])
with st.container():
for (q, a) in zip(st.session_state.questions, st.session_state.answers):
write_user_message(q)
write_chat_message(a, q)
st.markdown('---')
input = st.text_input("You are talking to an AI, ask any question.", key="input", on_change=handle_input)