Arafath10 commited on
Commit
07fb065
·
verified ·
1 Parent(s): e59b55b

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +36 -4
main.py CHANGED
@@ -2,6 +2,19 @@ from fastapi import FastAPI, HTTPException
2
  from fastapi.responses import JSONResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  app = FastAPI()
7
  app.add_middleware(
@@ -12,11 +25,30 @@ app.add_middleware(
12
  allow_headers=["*"],
13
  )
14
 
 
 
15
 
16
-
17
- @app.post("/get_n_depth_results")
18
- async def get_n_depth_results(url,input_query):
19
- return "done"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
 
22
 
 
2
  from fastapi.responses import JSONResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
 
5
+ from transformers import AutoModel, AutoTokenizer
6
+ import torch
7
+
8
+ device = torch.device("cpu")
9
+
10
+ # Load the model and tokenizer
11
+ model = AutoModel.from_pretrained(
12
+ "nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True
13
+ )
14
+ tokenizer = AutoTokenizer.from_pretrained(
15
+ "nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True
16
+ )
17
+
18
 
19
  app = FastAPI()
20
  app.add_middleware(
 
25
  allow_headers=["*"],
26
  )
27
 
28
+ def chunk_text(text, chunk_size=512):
29
+ return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
30
 
31
+ @app.post("/get_embeding")
32
+ async def get_embeding(text):
33
+ chunks = chunk_text(text)
34
+ for chunk in chunks:
35
+
36
+ # Tokenize the input text
37
+ inputs = tokenizer(chunk, return_tensors="pt")
38
+
39
+ # Generate embeddings
40
+ with torch.no_grad():
41
+ outputs = model(**inputs)
42
+
43
+ # The embeddings can be found in the 'last_hidden_state'
44
+ embeddings = outputs.last_hidden_state
45
+
46
+ # Optionally, you can average the token embeddings to get a single vector for the sentence
47
+ sentence_embedding = torch.mean(embeddings, dim=1)
48
+
49
+ #print(sentence_embedding)
50
+
51
+ return sentence_embedding
52
 
53
 
54