asv7j commited on
Commit
af7530b
·
verified ·
1 Parent(s): 650ebf8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -42
app.py CHANGED
@@ -8,20 +8,19 @@ access_token = os.getenv("read_access")
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
  device = "cpu" # the device to load the model onto
10
 
11
- #tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
12
 
13
- #model1 = AutoModelForCausalLM.from_pretrained(
14
- # "Qwen/Qwen2-1.5B-Instruct",
15
- # device_map="auto"
16
- #)
17
-
18
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it", token=access_token)
19
  model = AutoModelForCausalLM.from_pretrained(
20
- "google/gemma-2-2b-it",
21
- device_map="auto",
22
- token=access_token
23
  )
24
 
 
25
  app = FastAPI()
26
 
27
  @app.get("/")
@@ -77,7 +76,7 @@ async def read_droot():
77
 
78
  generated_ids = model1.generate(
79
  model_inputs.input_ids,
80
- max_new_tokens=64
81
  )
82
  generated_ids = [
83
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
@@ -91,34 +90,3 @@ async def read_droot():
91
  return {"Hello": "World!"}
92
  #return {response: time}
93
 
94
-
95
- @app.get("/tet")
96
- async def read_droot():
97
- starttime = time.time()
98
- messages = [
99
- {"role": "user", "content": "I'm Alok. Who are you?"},
100
- {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
101
- {"role": "user", "content": "How are you?"}
102
- ]
103
- text = tokenizer.apply_chat_template(
104
- messages,
105
- tokenize=False,
106
- add_generation_prompt=True
107
- )
108
- model_inputs = tokenizer([text], return_tensors="pt").to(device)
109
-
110
- generated_ids = model.generate(
111
- model_inputs.input_ids,
112
- max_new_tokens=64
113
- )
114
- generated_ids = [
115
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
116
- ]
117
-
118
- response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
119
- print(response)
120
- end_time = time.time()
121
- time_taken = end_time - starttime
122
- print(time_taken)
123
- return {"Hello": "resps"}
124
- #return {response: time}
 
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
  device = "cpu" # the device to load the model onto
10
 
11
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
12
 
13
+ model1 = AutoModelForCausalLM.from_pretrained(
14
+ "Qwen/Qwen2-1.5B-Instruct",
15
+ device_map="auto"
16
+ )
 
 
17
  model = AutoModelForCausalLM.from_pretrained(
18
+ "Qwen/Qwen2-1.5B-Instruct",
19
+ device_map="auto"
20
+ torch_dtype="auto"
21
  )
22
 
23
+
24
  app = FastAPI()
25
 
26
  @app.get("/")
 
76
 
77
  generated_ids = model1.generate(
78
  model_inputs.input_ids,
79
+ max_new_tokens=128
80
  )
81
  generated_ids = [
82
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
 
90
  return {"Hello": "World!"}
91
  #return {response: time}
92