GeminiDoc / app.py
drmurataltun's picture
Update app.py
16a2b39 verified
import streamlit as st
import google.generativeai as genai
import markdown
from docx import Document
from bs4 import BeautifulSoup
from PyPDF2 import PdfFileReader
import tempfile
import os
# Configure the API key
genai.configure(api_key=os.getenv('gemini_api'))
# Function to convert PDF to text
def pdf_to_text(file):
with open(file, 'rb') as f:
pdf = PdfFileReader(f)
text = ""
for page_num in range(pdf.numPages):
page = pdf.getPage(page_num)
text += page.extract_text()
return text
# Function to upload file to the Generative AI API
def upload_file(file_path):
st.write("Uploading file...")
text_file = genai.upload_file(path=file_path)
st.write(f"Completed upload: {text_file.uri}")
return text_file
# Function to convert text to Markdown
def to_markdown(text):
text = text.replace('•', ' *')
return textwrap.indent(text, '> ', predicate=lambda _: True)
chat_session = None
# Function to build the model
def build_model(text_file):
global chat_session
generation_config = {
"temperature": 0.2,
"top_p": 0.95,
"top_k": 64,
"max_output_tokens": 8192,
"response_mime_type": "text/plain",
}
model = genai.GenerativeModel(
model_name="gemini-1.5-flash",
generation_config=generation_config,
system_instruction="""Yüklenen belgedeki bilgilere göre Türkçe cevap ver.
Eğer sorunun cevabı belgede bulunmuyorsa 'Belgede Cevap Bulunmuyor' yaz.
""",
)
chat_session = model.start_chat(history=[])
response = chat_session.send_message(["Yüklenen belgeyi bir cümle ile özetle", text_file])
st.markdown(to_markdown(response.text))
# Function to interact with the chat model
def chat(prompt):
try:
response = chat_session.send_message(prompt)
markdown_text = to_markdown(response.text)
st.markdown(markdown_text)
return response.text
except ValueError:
st.write(response.prompt_feedback)
st.write(response.candidates[0].finish_reason)
st.write(response.candidates[0].safety_ratings)
except Exception as e:
st.write("An unexpected error occurred:", e)
# Function to generate a report based on questions
def generate_report(questions):
report_text = "\n## SORULARINIZ VE CEVAPLARI\n"
for question in questions:
report_text += f"\n## {question}\n"
answer = chat(question)
report_text += f"\n{answer}\n"
return report_text
# Function to convert Markdown to HTML
def convert_Markdown_to_HTML(report_text):
html_text = markdown.markdown(report_text)
return html_text
# Function to add HTML to a Word document
def add_html_to_word(html_text, doc):
soup = BeautifulSoup(html_text, 'html.parser')
for element in soup:
if element.name == 'h1':
doc.add_heading(element.get_text(), level=1)
elif element.name == 'h2':
doc.add_heading(element.get_text(), level=2)
elif element.name == 'h3':
doc.add_heading(element.get_text(), level=3)
elif element.name == 'h4':
doc.add_heading(element.get_text(), level=4)
elif element.name == 'h5':
doc.add_heading(element.get_text(), level=5)
elif element.name == 'h6':
doc.add_heading(element.get_text(), level=6)
elif element.name == 'p':
doc.add_paragraph(element.get_text())
elif element.name == 'ul':
for li in element.find_all('li'):
doc.add_paragraph(li.get_text(), style='List Bullet')
elif element.name == 'ol':
for li in element.find_all('li'):
doc.add_paragraph(li.get_text(), style='List Number')
elif element.name:
doc.add_paragraph(element.get_text())
# Streamlit interface
st.title("REPORT GENERATOR: ASK YOUR QUESTIONS TO A PDF FILE by @drmurataltun")
st.write("Upload a PDF to ask questions and get the answers.")
uploaded_file = st.file_uploader("Upload PDF", type="pdf")
questions_input = st.text_area("Enter Questions", placeholder="Type your questions here, one per line.", height=150)
if uploaded_file and questions_input:
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(uploaded_file.read())
temp_file_path = temp_file.name
text_content = pdf_to_text(temp_file_path)
text_file = upload_file(temp_file_path)
build_model(text_file)
questions = questions_input.split("\n")
report_text = generate_report(questions)
html_text = convert_Markdown_to_HTML(report_text)
doc = Document()
add_html_to_word(html_text, doc)
doc_name = os.path.basename(temp_file_path).replace(".pdf", ".docx")
doc_name = "Rapor " + doc_name
doc.save(doc_name)
st.markdown(report_text)
st.write("Document generated successfully!")
with open(doc_name, "rb") as file:
st.download_button(label="Download Report", data=file, file_name=doc_name)
os.remove(temp_file_path)
os.remove(doc_name)
genai.delete_file(text_file.name)