Abhi-22 commited on
Commit
737b074
1 Parent(s): 842041c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -31
app.py CHANGED
@@ -1,3 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import gradio as gr
2
  # from huggingface_hub import InferenceClient
3
 
@@ -63,34 +97,3 @@
63
  # if __name__ == "__main__":
64
  # demo.launch()
65
 
66
- import gradio as gr
67
- from transformers import AutoModelForCausalLM, AutoTokenizer
68
-
69
- # Load NVLM-D-72B model and tokenizer
70
- model_name = "nvidia/NVLM-D-72B"
71
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
72
- model = AutoModelForCausalLM.from_pretrained(
73
- model_name,
74
- trust_remote_code=True,
75
- device_map="auto"
76
- )
77
-
78
- # Inference function
79
- def generate_response(prompt, max_tokens=50):
80
- inputs = tokenizer(prompt, return_tensors="pt").to("cuda") # Adjust to "cpu" if GPU unavailable
81
- outputs = model.generate(**inputs, max_new_tokens=max_tokens)
82
- return tokenizer.decode(outputs[0])
83
-
84
- # Gradio interface
85
- interface = gr.Interface(
86
- fn=generate_response,
87
- inputs=[
88
- gr.Textbox(lines=2, label="Enter your prompt"),
89
- gr.Slider(10, 100, step=10, value=50, label="Max Tokens")
90
- ],
91
- outputs="text",
92
- title="NVIDIA NVLM-D-72B Demo",
93
- description="Generate text using NVIDIA's NVLM-D-72B model."
94
- )
95
- if __name__ == "__main__":
96
- interface.launch()
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+
4
+ # Load NVLM-D-72B model and tokenizer
5
+ # model_name = "nvidia/NVLM-D-72B"
6
+ model_name = "nvidia/NVLM-D-7B"
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_name,
10
+ trust_remote_code=True,
11
+ device_map="auto"
12
+ )
13
+
14
+ # Inference function
15
+ def generate_response(prompt, max_tokens=50):
16
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda") # Adjust to "cpu" if GPU unavailable
17
+ outputs = model.generate(**inputs, max_new_tokens=max_tokens)
18
+ return tokenizer.decode(outputs[0])
19
+
20
+ # Gradio interface
21
+ interface = gr.Interface(
22
+ fn=generate_response,
23
+ inputs=[
24
+ gr.Textbox(lines=2, label="Enter your prompt"),
25
+ gr.Slider(10, 100, step=10, value=50, label="Max Tokens")
26
+ ],
27
+ outputs="text",
28
+ title="NVIDIA NVLM-D-72B Demo",
29
+ description="Generate text using NVIDIA's NVLM-D-72B model."
30
+ )
31
+ if __name__ == "__main__":
32
+ interface.launch()
33
+
34
+
35
  # import gradio as gr
36
  # from huggingface_hub import InferenceClient
37
 
 
97
  # if __name__ == "__main__":
98
  # demo.launch()
99