vasilee commited on
Commit
7a6907a
·
1 Parent(s): b16c32f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +0 -57
main.py CHANGED
@@ -1,6 +1,5 @@
1
  from torch import Tensor
2
  from transformers import AutoTokenizer, AutoModel
3
- from ctranslate2 import Translator
4
  from typing import Union
5
 
6
  from fastapi import FastAPI
@@ -19,26 +18,11 @@ embeddingTokenizer = AutoTokenizer.from_pretrained(
19
  './multilingual-e5-base')
20
  embeddingModel = AutoModel.from_pretrained('./multilingual-e5-base')
21
 
22
- # chatGpt replacement
23
- inferenceTokenizer = AutoTokenizer.from_pretrained(
24
- "./flan-alpaca-gpt4-xl-ct2")
25
- inferenceTranslator = Translator(
26
- "./flan-alpaca-gpt4-xl-ct2", compute_type="int8", device="cpu")
27
-
28
 
29
  class EmbeddingRequest(BaseModel):
30
  input: Union[str, None] = None
31
 
32
 
33
- class TokensCountRequest(BaseModel):
34
- input: Union[str, None] = None
35
-
36
-
37
- class InferenceRequest(BaseModel):
38
- input: Union[str, None] = None
39
- max_length: Union[int, None] = 0
40
-
41
-
42
  app = FastAPI()
43
 
44
 
@@ -62,44 +46,3 @@ async def text_embedding(request: EmbeddingRequest):
62
  return {
63
  'embedding': embeddings[0].tolist()
64
  }
65
-
66
-
67
- @app.post('/inference')
68
- async def inference(request: InferenceRequest):
69
- input_text = request.input
70
- max_length = 256
71
- try:
72
- max_length = int(request.max_length)
73
- max_length = min(1024, max_length)
74
- except:
75
- pass
76
-
77
- # process request
78
- input_tokens = inferenceTokenizer.convert_ids_to_tokens(
79
- inferenceTokenizer.encode(input_text))
80
-
81
- results = inferenceTranslator.translate_batch(
82
- [input_tokens], beam_size=1, max_input_length=0, max_decoding_length=max_length, num_hypotheses=1, repetition_penalty=1.3, sampling_topk=40, sampling_temperature=0.7, use_vmap=False)
83
-
84
- output_tokens = results[0].hypotheses[0]
85
- output_text = inferenceTokenizer.decode(
86
- inferenceTokenizer.convert_tokens_to_ids(output_tokens), skip_special_tokens=True)
87
-
88
- # create response
89
- return {
90
- 'generated_text': output_text
91
- }
92
-
93
-
94
- @app.post('/tokens-count')
95
- async def tokens_count(request: TokensCountRequest):
96
- input_text = request.input
97
-
98
- tokens = inferenceTokenizer.convert_ids_to_tokens(
99
- inferenceTokenizer.encode(input_text))
100
-
101
- # create response
102
- return {
103
- 'tokens': tokens,
104
- 'total': len(tokens)
105
- }
 
1
  from torch import Tensor
2
  from transformers import AutoTokenizer, AutoModel
 
3
  from typing import Union
4
 
5
  from fastapi import FastAPI
 
18
  './multilingual-e5-base')
19
  embeddingModel = AutoModel.from_pretrained('./multilingual-e5-base')
20
 
 
 
 
 
 
 
21
 
22
  class EmbeddingRequest(BaseModel):
23
  input: Union[str, None] = None
24
 
25
 
 
 
 
 
 
 
 
 
 
26
  app = FastAPI()
27
 
28
 
 
46
  return {
47
  'embedding': embeddings[0].tolist()
48
  }