ml-playground / app_bu1.py
Sasidhar's picture
Rename app.py to app_bu1.py
959cfc9
import os
import openai
import pinecone
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
import streamlit as st
st.header("Document Question Answering")
directory = st.text_area("")
#directory = '/content/data'
def load_docs(directory):
loader = DirectoryLoader(directory)
documents = loader.load()
return documents
def split_docs(documents, chunk_size=1000, chunk_overlap=20):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
docs = text_splitter.split_documents(documents)
return docs
if directory:
documents = load_docs(directory)
st.write(len(documents))
docs = split_docs(documents)
print(len(docs))
embeddings = OpenAIEmbeddings(model_name="ada")
query_result = embeddings.embed_query("Hello world")
st.write(len(query_result))