File size: 1,359 Bytes
9e075cc 93f7561 9e075cc 33dc0c0 9e075cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from llama_cpp import Llama
def extract_text_from_pdf(pdf_path):
loader = PyPDFLoader(pdf_path)
pages = loader.load_and_split()
all_text = " ".join([page.page_content for page in pages])
start_index = all_text.find("ABSTRACT")
end_index = all_text.find("REFERENCES")
if start_index != -1 and end_index != -1 and start_index < end_index:
relevant_text = all_text[start_index:end_index]
else:
relevant_text = "Unable to locate the specified sections in the document."
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=50)
text_list = text_splitter.split_text(relevant_text)
research_paper_text = "".join(text_list)
length_of_research_paper = len(research_paper_text)
return research_paper_text, length_of_research_paper
def load_llm_model():
try:
llm = Llama.from_pretrained(
repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF",
filename="Llama-3.2-1B-Instruct-Q8_0.gguf",
n_ctx=50000,
n_batch=16384,
verbose=False,
)
print("LLM model loaded successfully")
return llm
except Exception as e:
print(f"Error loading LLM model: {e}")
raise |