EdBoy2202 commited on
Commit
7bf24af
·
verified ·
1 Parent(s): f250bf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -40
app.py CHANGED
@@ -1,56 +1,102 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
- import torch
 
 
4
 
5
- # Check if CUDA is available and set the device accordingly
6
- # device = 0 if torch.cuda.is_available() else -1
 
 
 
7
 
8
- # Initialize the Phi model pipeline
9
  @st.cache_resource
10
- def load_model():
11
- return pipeline(
12
- "text-generation",
13
- model="microsoft/phi-2",
14
- torch_dtype=torch.bfloat16,
15
- # device=device,
16
  )
17
 
18
- phi_model = load_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- # Function to generate response
21
- def generate_response(prompt, max_length=512):
22
- response = phi_model(prompt, max_length=max_length, do_sample=True, temperature=0.7)
23
- return response[0]['generated_text']
24
 
25
- # Streamlit UI
26
- st.title("Chain of Thought vs Traditional Reasoning - Phi Model")
 
 
 
 
 
27
 
28
- # User input
29
- user_question = st.text_input("Enter your question:")
 
 
 
 
30
 
31
- if user_question:
32
- # Generate responses
33
- with st.spinner("Generating responses..."):
34
- traditional_prompt = f"Question: {user_question}\nAnswer:"
35
- cot_prompt = f"Question: {user_question}\nLet's approach this step by step:\n1)"
36
 
37
- traditional_response = generate_response(traditional_prompt)
38
- cot_response = generate_response(cot_prompt)
39
 
40
- # Display results
41
- st.subheader("Traditional Output")
42
- st.write(traditional_response)
 
43
 
44
- st.subheader("Chain of Thought Reasoning")
45
- st.write(cot_response)
 
46
 
47
- # Add explanatory text
48
- st.markdown("""
49
- ## About this demo
50
- This demo showcases the difference between traditional output and chain of thought (CoT) reasoning using the Phi-2 model from Microsoft.
51
 
52
- - **Traditional Output**: Provides a direct answer to the question.
53
- - **Chain of Thought Reasoning**: Shows the step-by-step thought process leading to the answer.
 
 
 
 
 
 
 
 
 
 
54
 
55
- CoT reasoning often results in more detailed and transparent explanations, which can be helpful for complex problems or when understanding the reasoning process is important.
56
- """)
 
1
  import streamlit as st
2
+ from huggingface_hub import InferenceClient, HfApi
3
+ import time
4
+ import requests
5
+ from requests.exceptions import RequestException
6
 
7
+ # Set page config at the very beginning
8
+ st.set_page_config(page_title="Phi-3.5 Chatbot", page_icon="🤖")
9
+
10
+ # Add a text input for the Hugging Face API token
11
+ hf_token = st.text_input("Enter your Hugging Face API token", type="password")
12
 
 
13
  @st.cache_resource
14
+ def get_client(token):
15
+ return InferenceClient(
16
+ "microsoft/Phi-3.5-mini-instruct",
17
+ token=token
 
 
18
  )
19
 
20
+ def validate_token(token):
21
+ try:
22
+ api = HfApi(token=token)
23
+ api.whoami()
24
+ return True
25
+ except Exception as e:
26
+ st.error(f"Token validation failed: {str(e)}")
27
+ return False
28
+
29
+ def make_request_with_retries(client, prompt, max_new_tokens, temperature, top_p, max_retries=5, initial_delay=1):
30
+ for attempt in range(max_retries):
31
+ try:
32
+ response = client.text_generation(
33
+ prompt,
34
+ max_new_tokens=max_new_tokens,
35
+ temperature=temperature,
36
+ top_p=top_p,
37
+ )
38
+ return response
39
+ except RequestException as e:
40
+ if attempt < max_retries - 1:
41
+ delay = initial_delay * (2 ** attempt) # Exponential backoff
42
+ st.warning(f"Request failed. Retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})")
43
+ time.sleep(delay)
44
+ else:
45
+ raise e
46
+
47
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
48
+ if not hf_token:
49
+ st.error("Please enter your Hugging Face API token.")
50
+ return
51
 
52
+ if not validate_token(hf_token):
53
+ return
 
 
54
 
55
+ client = get_client(hf_token)
56
+
57
+ # Construct the prompt
58
+ prompt = f"{system_message}\n\n"
59
+ for user_msg, assistant_msg in history:
60
+ prompt += f"Human: {user_msg}\nAssistant: {assistant_msg}\n\n"
61
+ prompt += f"Human: {message}\nAssistant:"
62
 
63
+ try:
64
+ response = make_request_with_retries(client, prompt, max_tokens, temperature, top_p)
65
+ yield response
66
+ except Exception as e:
67
+ st.error(f"An error occurred: {str(e)}")
68
+ yield "I'm sorry, but I encountered an error while processing your request."
69
 
70
+ st.title("Phi-3.5 Mini Chatbot")
 
 
 
 
71
 
72
+ if "messages" not in st.session_state:
73
+ st.session_state.messages = []
74
 
75
+ system_message = st.text_input("System message", value="You are a helpful AI assistant.")
76
+ max_tokens = st.slider("Max new tokens", min_value=1, max_value=1024, value=256, step=1)
77
+ temperature = st.slider("Temperature", min_value=0.01, max_value=1.0, value=0.7, step=0.01)
78
+ top_p = st.slider("Top P", min_value=0.0, max_value=1.0, value=0.9, step=0.01)
79
 
80
+ for message in st.session_state.messages:
81
+ with st.chat_message(message["role"]):
82
+ st.markdown(message["content"])
83
 
84
+ if prompt := st.chat_input("What is your message?"):
85
+ st.session_state.messages.append({"role": "user", "content": prompt})
86
+ with st.chat_message("user"):
87
+ st.markdown(prompt)
88
 
89
+ with st.chat_message("assistant"):
90
+ message_placeholder = st.empty()
91
+ full_response = ""
92
+ for response in respond(prompt,
93
+ [(msg["content"], st.session_state.messages[i+1]["content"])
94
+ for i, msg in enumerate(st.session_state.messages[:-1:2])],
95
+ system_message,
96
+ max_tokens,
97
+ temperature,
98
+ top_p):
99
+ message_placeholder.markdown(response)
100
+ full_response = response
101
 
102
+ st.session_state.messages.append({"role": "assistant", "content": full_response})