File size: 1,020 Bytes
2927735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ffd7f9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Change this in gradio
import os
from driveapi.drive import drive_content
from driveapi.service import get_shared_folder_id

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS 

# drive_shared_link = os.environ.get('DRIVE_LINK')
# shared_folder_id = get_shared_folder_id(drive_shared_link)

def create_chroma_db():
    drive_shared_link = os.environ.get('DRIVE_LINK')
    if drive_shared_link == None:
        return ""
    shared_folder_id = get_shared_folder_id(drive_shared_link)
    raw_text = drive_content(shared_folder_id)
    embedding = OpenAIEmbeddings()

    text_splitter = CharacterTextSplitter(        
            separator = "\n",
            chunk_size = 1000,
            chunk_overlap  = 200, 
            length_function = len,
        )
    texts = text_splitter.split_text(raw_text)
    print('Length of text: ' + str(len(raw_text)))
    db = FAISS.from_texts(texts, embedding)

    return db