raannakasturi's picture
Update tools.py
33dc0c0 verified
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from llama_cpp import Llama
def extract_text_from_pdf(pdf_path):
loader = PyPDFLoader(pdf_path)
pages = loader.load_and_split()
all_text = " ".join([page.page_content for page in pages])
start_index = all_text.find("ABSTRACT")
end_index = all_text.find("REFERENCES")
if start_index != -1 and end_index != -1 and start_index < end_index:
relevant_text = all_text[start_index:end_index]
else:
relevant_text = "Unable to locate the specified sections in the document."
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
text_list = text_splitter.split_text(relevant_text)
research_paper_text = "".join(text_list)
length_of_research_paper = len(research_paper_text)
return research_paper_text, length_of_research_paper
def load_llm_model():
try:
llm = Llama.from_pretrained(
repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF",
filename="Llama-3.2-1B-Instruct-Q8_0.gguf",
n_ctx=50000,
n_batch=16384,
verbose=False,
)
print("LLM model loaded successfully")
return llm
except Exception as e:
print(f"Error loading LLM model: {e}")
raise