FridayMaster commited on
Commit
155ba37
1 Parent(s): 6dc00a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -13
app.py CHANGED
@@ -10,9 +10,9 @@ import nltk
10
  nltk.download('punkt')
11
  nltk.download('punkt_tab')
12
 
13
- # Define paths as variables
14
- manual_path = "ubuntu_manual.txt"
15
  faiss_path = "manual_chunked_faiss_index_500.bin"
 
16
 
17
  # Load the Ubuntu manual from a .txt file
18
  try:
@@ -22,7 +22,7 @@ except FileNotFoundError:
22
  raise FileNotFoundError(f"The file {manual_path} was not found.")
23
 
24
  # Function to chunk the text into smaller pieces
25
- def chunk_text(text, chunk_size=500): # Larger chunks
26
  sentences = sent_tokenize(text)
27
  chunks = []
28
  current_chunk = []
@@ -46,31 +46,31 @@ manual_chunks = chunk_text(full_text, chunk_size=500)
46
  try:
47
  index = faiss.read_index(faiss_path)
48
  except Exception as e:
49
- raise RuntimeError(f"Failed to load FAISS index from {faiss_path}: {e}")
50
 
51
  # Load your embedding model
52
- embedding_model = SentenceTransformer('FridayMaster/fine_tune_embedding')
53
 
54
  # OpenAI API key
55
- openai.api_key = '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'
56
 
57
  # Function to create embeddings
58
  def embed_text(text_list):
59
- return np.array(embedding_model.encode(text_list), dtype=np.float32)
 
 
60
 
61
  # Function to retrieve relevant chunks for a user query
62
  def retrieve_chunks(query, k=5):
63
  query_embedding = embed_text([query])
64
 
65
- # Search the FAISS index
66
  try:
67
  distances, indices = index.search(query_embedding, k=k)
68
- print("Indices:", indices)
69
- print("Distances:", distances)
70
  except Exception as e:
71
  raise RuntimeError(f"FAISS search failed: {e}")
72
-
73
- # Check if indices are valid
74
  if len(indices[0]) == 0:
75
  return []
76
 
@@ -129,4 +129,3 @@ if __name__ == "__main__":
129
 
130
 
131
 
132
-
 
10
  nltk.download('punkt')
11
  nltk.download('punkt_tab')
12
 
13
+ # Paths
 
14
  faiss_path = "manual_chunked_faiss_index_500.bin"
15
+ manual_path = "ubuntu_manual.txt"
16
 
17
  # Load the Ubuntu manual from a .txt file
18
  try:
 
22
  raise FileNotFoundError(f"The file {manual_path} was not found.")
23
 
24
  # Function to chunk the text into smaller pieces
25
+ def chunk_text(text, chunk_size=500):
26
  sentences = sent_tokenize(text)
27
  chunks = []
28
  current_chunk = []
 
46
  try:
47
  index = faiss.read_index(faiss_path)
48
  except Exception as e:
49
+ raise RuntimeError(f"Failed to load FAISS index: {e}")
50
 
51
  # Load your embedding model
52
+ embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
53
 
54
  # OpenAI API key
55
+ openai.api_key = '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'
56
 
57
  # Function to create embeddings
58
  def embed_text(text_list):
59
+ embeddings = embedding_model.encode(text_list)
60
+ print("Embedding shape:", embeddings.shape) # Debugging: Print shape
61
+ return np.array(embeddings, dtype=np.float32)
62
 
63
  # Function to retrieve relevant chunks for a user query
64
  def retrieve_chunks(query, k=5):
65
  query_embedding = embed_text([query])
66
 
 
67
  try:
68
  distances, indices = index.search(query_embedding, k=k)
69
+ print("Indices:", indices) # Debugging: Print indices
70
+ print("Distances:", distances) # Debugging: Print distances
71
  except Exception as e:
72
  raise RuntimeError(f"FAISS search failed: {e}")
73
+
 
74
  if len(indices[0]) == 0:
75
  return []
76
 
 
129
 
130
 
131