mitulagr2 commited on
Commit
9151071
1 Parent(s): b5808ba

update embedding

Browse files
Files changed (3) hide show
  1. Dockerfile +4 -4
  2. app/rag.py +16 -4
  3. start_service.sh +8 -8
Dockerfile CHANGED
@@ -16,13 +16,13 @@ COPY ./start_service.sh /code/start_service.sh
16
  #
17
  COPY ./app /code/app
18
 
19
- RUN useradd -m docker && echo "docker:docker" | chpasswd && adduser docker sudo
20
 
21
- RUN curl -fsSL https://ollama.com/install.sh | sh
22
 
23
- USER docker
24
 
25
- RUN nohup ollama serve & sleep 5
26
 
27
  #
28
  # RUN chmod +x /code/start_service.sh
 
16
  #
17
  COPY ./app /code/app
18
 
19
+ # RUN useradd -m docker && echo "docker:docker" | chpasswd && adduser docker sudo
20
 
21
+ # RUN curl -fsSL https://ollama.com/install.sh | sh
22
 
23
+ # USER docker
24
 
25
+ # RUN nohup ollama serve & sleep 5
26
 
27
  #
28
  # RUN chmod +x /code/start_service.sh
app/rag.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  from llama_index.core import (
2
  SimpleDirectoryReader,
3
  # VectorStoreIndex,
@@ -16,12 +19,14 @@ from llama_index.core.vector_stores import VectorStoreQuery
16
  from llama_index.core.indices.vector_store.base import VectorStoreIndex
17
  from llama_index.vector_stores.qdrant import QdrantVectorStore
18
  from qdrant_client import QdrantClient
19
- import logging
20
 
21
  from llama_index.llms.llama_cpp import LlamaCPP
22
  from llama_index.embeddings.fastembed import FastEmbedEmbedding
23
 
24
 
 
 
 
25
  class ChatPDF:
26
  logging.basicConfig(level=logging.INFO)
27
  logger = logging.getLogger(__name__)
@@ -56,11 +61,18 @@ class ChatPDF:
56
 
57
  self.logger.info("initializing the vector store related objects")
58
  # client = QdrantClient(host="localhost", port=6333)
 
59
  client = QdrantClient(":memory:")
60
- self.vector_store = QdrantVectorStore(client=client, collection_name="rag_documents", enable_hybrid=True)
 
 
 
 
61
 
62
  self.logger.info("initializing the FastEmbedEmbedding")
63
- self.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en")
 
 
64
 
65
  llm = LlamaCPP(
66
  model_url=self.model_url,
@@ -124,7 +136,7 @@ class ChatPDF:
124
  retriever = VectorIndexRetriever(
125
  index=index,
126
  similarity_top_k=6,
127
- vector_store_query_mode="hybrid"
128
  )
129
 
130
  self.logger.info("configure response synthesizer")
 
1
+ import os
2
+ import logging
3
+
4
  from llama_index.core import (
5
  SimpleDirectoryReader,
6
  # VectorStoreIndex,
 
19
  from llama_index.core.indices.vector_store.base import VectorStoreIndex
20
  from llama_index.vector_stores.qdrant import QdrantVectorStore
21
  from qdrant_client import QdrantClient
 
22
 
23
  from llama_index.llms.llama_cpp import LlamaCPP
24
  from llama_index.embeddings.fastembed import FastEmbedEmbedding
25
 
26
 
27
+ QDRANT_API_URL = os.getenv('QDRANT_API_URL')
28
+ QDRANT_API_KEY = os.getenv('QDRANT_API_KEY')
29
+
30
  class ChatPDF:
31
  logging.basicConfig(level=logging.INFO)
32
  logger = logging.getLogger(__name__)
 
61
 
62
  self.logger.info("initializing the vector store related objects")
63
  # client = QdrantClient(host="localhost", port=6333)
64
+ # client = QdrantClient(url=QDRANT_API_URL, api_key=QDRANT_API_KEY)
65
  client = QdrantClient(":memory:")
66
+ self.vector_store = QdrantVectorStore(
67
+ client=client,
68
+ collection_name="rag_documents",
69
+ # enable_hybrid=True
70
+ )
71
 
72
  self.logger.info("initializing the FastEmbedEmbedding")
73
+ self.embed_model = FastEmbedEmbedding(
74
+ # model_name="BAAI/bge-small-en"
75
+ )
76
 
77
  llm = LlamaCPP(
78
  model_url=self.model_url,
 
136
  retriever = VectorIndexRetriever(
137
  index=index,
138
  similarity_top_k=6,
139
+ # vector_store_query_mode="hybrid"
140
  )
141
 
142
  self.logger.info("configure response synthesizer")
start_service.sh CHANGED
@@ -1,16 +1,16 @@
1
  #!/bin/sh
2
 
3
- # Start Ollama in the background
4
- ollama serve &
5
 
6
- # Wait for Ollama to start
7
- sleep 5
8
 
9
- #
10
- ollama pull mxbai-embed-large
11
 
12
- # Pull and run <YOUR_MODEL_NAME>
13
- ollama pull qwen:1.8b
14
 
15
  #
16
  fastapi run /code/app/main.py --port 7860
 
1
  #!/bin/sh
2
 
3
+ # # Start Ollama in the background
4
+ # ollama serve &
5
 
6
+ # # Wait for Ollama to start
7
+ # sleep 5
8
 
9
+ # #
10
+ # ollama pull mxbai-embed-large
11
 
12
+ # # Pull and run <YOUR_MODEL_NAME>
13
+ # ollama pull qwen:1.8b
14
 
15
  #
16
  fastapi run /code/app/main.py --port 7860