embeding_api

Paused

Arafath10 commited on Jul 31, 2024

Commit

aaf0100

verified ·

1 Parent(s): eecfda5

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -25,15 +25,10 @@ app.add_middleware(
     allow_headers=["*"],
 )
-def chunk_text(text, chunk_size=512):
-    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
 @app.post("/get_embeding")
-async def get_embeding(text):
-    chunks = chunk_text(text)
-    all_embeddings = []
-    for chunk in chunks:
         # Tokenize the input text
         inputs = tokenizer(chunk, return_tensors="pt")
@@ -47,9 +42,9 @@ async def get_embeding(text):
         # Optionally, you can average the token embeddings to get a single vector for the sentence
         sentence_embedding = torch.mean(embeddings, dim=1)
-        print(sentence_embedding)
-        all_embeddings.append(sentence_embedding.tolist())
-    return all_embeddings

     allow_headers=["*"],
 )
 @app.post("/get_embeding")
+async def get_embeding(chunk):
         # Tokenize the input text
         inputs = tokenizer(chunk, return_tensors="pt")
         # Optionally, you can average the token embeddings to get a single vector for the sentence
         sentence_embedding = torch.mean(embeddings, dim=1)
+        #print(sentence_embedding)
+        return sentence_embedding.tolist()