Spaces:

EdBoy2202
/

COTvsTraditional

Sleeping

App Files Files Community

EdBoy2202 commited on Nov 4, 2024

Commit

7bf24af

verified ·

1 Parent(s): f250bf0

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -40

app.py CHANGED Viewed

@@ -1,56 +1,102 @@
 import streamlit as st
-from transformers import pipeline
-import torch
-# Check if CUDA is available and set the device accordingly
-# device = 0 if torch.cuda.is_available() else -1
-# Initialize the Phi model pipeline
 @st.cache_resource
-def load_model():
-    return pipeline(
-        "text-generation",
-        model="microsoft/phi-2",
-        torch_dtype=torch.bfloat16,
-        # device=device,
     )
-phi_model = load_model()
-# Function to generate response
-def generate_response(prompt, max_length=512):
-    response = phi_model(prompt, max_length=max_length, do_sample=True, temperature=0.7)
-    return response[0]['generated_text']
-# Streamlit UI
-st.title("Chain of Thought vs Traditional Reasoning - Phi Model")
-# User input
-user_question = st.text_input("Enter your question:")
-if user_question:
-    # Generate responses
-    with st.spinner("Generating responses..."):
-        traditional_prompt = f"Question: {user_question}\nAnswer:"
-        cot_prompt = f"Question: {user_question}\nLet's approach this step by step:\n1)"
-        traditional_response = generate_response(traditional_prompt)
-        cot_response = generate_response(cot_prompt)
-    # Display results
-    st.subheader("Traditional Output")
-    st.write(traditional_response)
-    st.subheader("Chain of Thought Reasoning")
-    st.write(cot_response)
-# Add explanatory text
-st.markdown("""
-## About this demo
-This demo showcases the difference between traditional output and chain of thought (CoT) reasoning using the Phi-2 model from Microsoft.
-- **Traditional Output**: Provides a direct answer to the question.
-- **Chain of Thought Reasoning**: Shows the step-by-step thought process leading to the answer.
-CoT reasoning often results in more detailed and transparent explanations, which can be helpful for complex problems or when understanding the reasoning process is important.
-""")

 import streamlit as st
+from huggingface_hub import InferenceClient, HfApi
+import time
+import requests
+from requests.exceptions import RequestException
+# Set page config at the very beginning
+st.set_page_config(page_title="Phi-3.5 Chatbot", page_icon="🤖")
+# Add a text input for the Hugging Face API token
+hf_token = st.text_input("Enter your Hugging Face API token", type="password")
 @st.cache_resource
+def get_client(token):
+    return InferenceClient(
+        "microsoft/Phi-3.5-mini-instruct",
+        token=token
     )
+def validate_token(token):
+    try:
+        api = HfApi(token=token)
+        api.whoami()
+        return True
+    except Exception as e:
+        st.error(f"Token validation failed: {str(e)}")
+        return False
+def make_request_with_retries(client, prompt, max_new_tokens, temperature, top_p, max_retries=5, initial_delay=1):
+    for attempt in range(max_retries):
+        try:
+            response = client.text_generation(
+                prompt,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+            )
+            return response
+        except RequestException as e:
+            if attempt < max_retries - 1:
+                delay = initial_delay * (2 ** attempt)  # Exponential backoff
+                st.warning(f"Request failed. Retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})")
+                time.sleep(delay)
+            else:
+                raise e
+def respond(message, history, system_message, max_tokens, temperature, top_p):
+    if not hf_token:
+        st.error("Please enter your Hugging Face API token.")
+        return
+    if not validate_token(hf_token):
+        return
+    client = get_client(hf_token)
+    # Construct the prompt
+    prompt = f"{system_message}\n\n"
+    for user_msg, assistant_msg in history:
+        prompt += f"Human: {user_msg}\nAssistant: {assistant_msg}\n\n"
+    prompt += f"Human: {message}\nAssistant:"
+    try:
+        response = make_request_with_retries(client, prompt, max_tokens, temperature, top_p)
+        yield response
+    except Exception as e:
+        st.error(f"An error occurred: {str(e)}")
+        yield "I'm sorry, but I encountered an error while processing your request."
+st.title("Phi-3.5 Mini Chatbot")
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+system_message = st.text_input("System message", value="You are a helpful AI assistant.")
+max_tokens = st.slider("Max new tokens", min_value=1, max_value=1024, value=256, step=1)
+temperature = st.slider("Temperature", min_value=0.01, max_value=1.0, value=0.7, step=0.01)
+top_p = st.slider("Top P", min_value=0.0, max_value=1.0, value=0.9, step=0.01)
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+if prompt := st.chat_input("What is your message?"):
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    with st.chat_message("assistant"):
+        message_placeholder = st.empty()
+        full_response = ""
+        for response in respond(prompt,
+                                [(msg["content"], st.session_state.messages[i+1]["content"])
+                                 for i, msg in enumerate(st.session_state.messages[:-1:2])],
+                                system_message,
+                                max_tokens,
+                                temperature,
+                                top_p):
+            message_placeholder.markdown(response)
+            full_response = response
+    st.session_state.messages.append({"role": "assistant", "content": full_response})