from langchain.vectorstores.pinecone import * from langchain.vectorstores.pinecone import Pinecone as OriginalPinecone class Pinecone(OriginalPinecone): @classmethod def from_texts( cls, texts: List[str], embedding: Embeddings, metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, batch_size: int = 32, text_key: str = "text", index_name: Optional[str] = None, namespace: Optional[str] = None, **kwargs: Any, ) -> Pinecone: """Construct Pinecone wrapper from raw documents. This is a user friendly interface that: 1. Embeds documents. 2. Adds the documents to a provided Pinecone index This is intended to be a quick way to get started. Example: .. code-block:: python from langchain import Pinecone from langchain.embeddings import OpenAIEmbeddings embeddings = OpenAIEmbeddings() pinecone = Pinecone.from_texts( texts, embeddings, index_name="langchain-demo" ) """ try: import pinecone except ImportError: raise ValueError( "Could not import pinecone python package. " "Please install it with `pip install pinecone-client`." ) _index_name = index_name or str(uuid.uuid4()) indexes = pinecone.list_indexes() # checks if provided index exists if _index_name in indexes: index = pinecone.Index(_index_name) else: index = None for i in range(0, len(texts), batch_size): # set end position of batch i_end = min(i + batch_size, len(texts)) # get batch of texts and ids lines_batch = texts[i:i_end] # create ids if not provided if ids: ids_batch = ids[i:i_end] else: ids_batch = [str(uuid.uuid4()) for n in range(i, i_end)] # create embeddings # embeds = embedding.embed_documents(lines_batch) embeds = [embedding.embed_documents([line_batch])[0] for line_batch in lines_batch] # prep metadata and upsert batch if metadatas: metadata = metadatas[i:i_end] else: metadata = [{} for _ in range(i, i_end)] for j, line in enumerate(lines_batch): metadata[j][text_key] = line to_upsert = zip(ids_batch, embeds, metadata) # Create index if it does not exist if index is None: pinecone.create_index(_index_name, dimension=len(embeds[0])) index = pinecone.Index(_index_name) # upsert to Pinecone index.upsert(vectors=list(to_upsert), namespace=namespace) return cls(index, embedding.embed_query, text_key, namespace)