import os
import shutil
from typing import Optional

from langchain.document_loaders import UnstructuredFileLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from loguru import logger
from tqdm import tqdm

from .parser import parse_pdf

PROMPT_TEMPLATE = """已知信息：
{context} 

根据上述已知信息，简洁和专业的来回答用户的问题。
如果无法从中得到答案，请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”，不允许在答案中添加编造成分，答案请使用中文。 
问题是：{question}"""


def _get_documents(filepath, chunk_size=500, chunk_overlap=0, two_column=False):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
    )
    file_type = os.path.splitext(filepath)[1]

    logger.info(f"Loading file: {filepath}")
    texts = Document(page_content="", metadata={"source": filepath})
    try:
        if file_type == ".pdf":
            logger.debug("Loading PDF...")
            try:
                pdftext = parse_pdf(filepath, two_column).text
            except:
                from PyPDF2 import PdfReader

                pdftext = ""
                with open(filepath, "rb") as pdfFileObj:
                    pdfReader = PdfReader(pdfFileObj)
                    for page in tqdm(pdfReader.pages):
                        pdftext += page.extract_text()

            texts = Document(page_content=pdftext, metadata={"source": filepath})

        elif file_type == ".docx":
            from langchain.document_loaders import UnstructuredWordDocumentLoader

            logger.debug("Loading Word...")
            loader = UnstructuredWordDocumentLoader(filepath)
            texts = loader.load()
        elif file_type == ".pptx":
            from langchain.document_loaders import UnstructuredPowerPointLoader

            logger.debug("Loading PowerPoint...")
            loader = UnstructuredPowerPointLoader(filepath)
            texts = loader.load()
        elif file_type == ".epub":
            from langchain.document_loaders import UnstructuredEPubLoader

            logger.debug("Loading EPUB...")
            loader = UnstructuredEPubLoader(filepath)
            texts = loader.load()
        elif file_type == ".md":
            loader = UnstructuredFileLoader(filepath, mode="elements")
            return loader.load()
        else:
            loader = UnstructuredFileLoader(filepath, mode="elements")
            return loader.load_and_split(text_splitter=text_splitter)
    except Exception as e:
        import traceback
        logger.error(f"Error loading file: {filepath}")
        traceback.print_exc()

    return text_splitter.split_documents([texts])


def get_documents(filepath, chunk_size=500, chunk_overlap=0, two_column=False):
    documents = []
    logger.debug("Loading documents...")
    if os.path.isfile(filepath):
        documents.extend(
            _get_documents(
                filepath,
                chunk_size=chunk_size,
                chunk_overlap=chunk_overlap,
                two_column=two_column
            )
        )
    else:
        for file in filepath:
            documents.extend(
                _get_documents(
                    file,
                    chunk_size=chunk_size,
                    chunk_overlap=chunk_overlap,
                    two_column=two_column
                )
            )
    logger.debug("Documents loaded.")
    return documents


def generate_prompt(related_docs, query: str, prompt_template=PROMPT_TEMPLATE) -> str:
    context = "\n".join([doc[0].page_content for doc in related_docs])
    return prompt_template.replace("{question}", query).replace("{context}", context)


class DocQAPromptAdapter:
    def __init__(self, chunk_size: Optional[int] = 500, chunk_overlap: Optional[int] = 0, api_key: Optional[str] = "xxx"):
        self.embeddings = OpenAIEmbeddings(openai_api_key=api_key)
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap

        self.vector_store = None

    def create_vector_store(self, file_path, vs_path, embeddings=None):
        documents = get_documents(file_path, chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap)
        self.vector_store = FAISS.from_documents(documents, self.embeddings if not embeddings else embeddings)
        self.vector_store.save_local(vs_path)

    def reset_vector_store(self, vs_path, embeddings=None):
        self.vector_store = FAISS.load_local(vs_path, self.embeddings if not embeddings else embeddings)

    @staticmethod
    def delete_files(files):
        for file in files:
            if os.path.exists(file):
                if os.path.isfile(file):
                    os.remove(file)
                else:
                    shutil.rmtree(file)

    def __call__(self, query, vs_path=None, topk=6):
        if vs_path is not None and os.path.exists(vs_path):
            self.reset_vector_store(vs_path)
        self.vector_store.embedding_function = self.embeddings.embed_query
        related_docs_with_score = self.vector_store.similarity_search_with_score(query, k=topk)
        return generate_prompt(related_docs_with_score, query)