Arafath10 commited on
Commit
aaf0100
·
verified ·
1 Parent(s): eecfda5

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +5 -10
main.py CHANGED
@@ -25,15 +25,10 @@ app.add_middleware(
25
  allow_headers=["*"],
26
  )
27
 
28
- def chunk_text(text, chunk_size=512):
29
- return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
30
 
31
  @app.post("/get_embeding")
32
- async def get_embeding(text):
33
- chunks = chunk_text(text)
34
- all_embeddings = []
35
- for chunk in chunks:
36
-
37
  # Tokenize the input text
38
  inputs = tokenizer(chunk, return_tensors="pt")
39
 
@@ -47,9 +42,9 @@ async def get_embeding(text):
47
  # Optionally, you can average the token embeddings to get a single vector for the sentence
48
  sentence_embedding = torch.mean(embeddings, dim=1)
49
 
50
- print(sentence_embedding)
51
- all_embeddings.append(sentence_embedding.tolist())
52
- return all_embeddings
53
 
54
 
55
 
 
25
  allow_headers=["*"],
26
  )
27
 
 
 
28
 
29
  @app.post("/get_embeding")
30
+ async def get_embeding(chunk):
31
+
 
 
 
32
  # Tokenize the input text
33
  inputs = tokenizer(chunk, return_tensors="pt")
34
 
 
42
  # Optionally, you can average the token embeddings to get a single vector for the sentence
43
  sentence_embedding = torch.mean(embeddings, dim=1)
44
 
45
+ #print(sentence_embedding)
46
+ return sentence_embedding.tolist()
47
+
48
 
49
 
50