|
import gradio as gr |
|
import pandas as pd |
|
from transformers import pipeline |
|
from bs4 import BeautifulSoup |
|
import requests |
|
from PyPDF2 import PdfReader |
|
import docx |
|
from pptx import Presentation |
|
import openpyxl |
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer |
|
|
|
|
|
model_name = "facebook/llama-7b-hf" |
|
rag_tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
rag_model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
|
|
|
|
|
def read_text_from_document(file): |
|
if file.name.endswith('.txt'): |
|
text = file.read().decode('utf-8') |
|
elif file.name.endswith('.pdf'): |
|
reader = PdfReader(file) |
|
text = '' |
|
for page in reader.pages: |
|
text += page.extract_text() |
|
elif file.name.endswith('.docx'): |
|
doc = docx.Document(file) |
|
text = '' |
|
for para in doc.paragraphs: |
|
text += para.text |
|
elif file.name.endswith('.pptx'): |
|
presentation = Presentation(file) |
|
text = '' |
|
for slide in presentation.slides: |
|
for shape in slide.shapes: |
|
if hasattr(shape, "text"): |
|
text += shape.text |
|
elif file.name.endswith('.xlsx'): |
|
wb = openpyxl.load_workbook(file) |
|
sheet = wb.active |
|
text = '' |
|
for row in sheet.rows: |
|
for cell in row: |
|
text += str(cell.value) + ' ' |
|
return text |
|
|
|
|
|
def scrape_url(url): |
|
try: |
|
response = requests.get(url) |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
text = soup.get_text() |
|
return text |
|
except Exception as e: |
|
return str(e) |
|
|
|
|
|
def answer_questions(data, question): |
|
if data: |
|
inputs = rag_tokenizer.encode("Question: " + question + " Context: " + data, return_tensors="pt") |
|
outputs = rag_model.generate(inputs, max_length=100) |
|
answer = rag_tokenizer.decode(outputs, skip_special_tokens=True) |
|
return answer |
|
else: |
|
return "No data provided" |
|
|
|
|
|
demo = gr.Interface( |
|
fn=lambda data, url, question: answer_questions(read_text_from_document(data) if data else scrape_url(url), question), |
|
inputs=[ |
|
gr.File(label="Upload Document (.txt, .pdf, .docx, .pptx, .xlsx)"), |
|
gr.Textbox(label="Enter URL"), |
|
gr.Textbox(label="Ask a question") |
|
], |
|
outputs=gr.Textbox(label="Answer"), |
|
title="RAG Chat", |
|
description="Upload a document or enter a URL and ask a question" |
|
) |
|
|
|
|
|
demo.launch() |