DexterSptizu commited on
Commit
4abd716
Β·
verified Β·
1 Parent(s): f1585e7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.document_loaders import WebBaseLoader
3
+ from openai import OpenAI
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ # Initialize session state for OpenAI summary
7
+ if 'openai_summary' not in st.session_state:
8
+ st.session_state.openai_summary = None
9
+ if 'show_summary' not in st.session_state:
10
+ st.session_state.show_summary = False
11
+
12
+ def toggle_summary():
13
+ st.session_state.show_summary = not st.session_state.show_summary
14
+
15
+ # Set page configuration
16
+ st.set_page_config(
17
+ page_title="🦜 LangChain Document Explorer",
18
+ layout="wide",
19
+ initial_sidebar_state="expanded"
20
+ )
21
+
22
+ # Custom CSS for better styling
23
+ st.markdown("""
24
+ <style>
25
+ .main {
26
+ padding: 2rem;
27
+ }
28
+ .stButton>button {
29
+ width: 100%;
30
+ margin-top: 1rem;
31
+ }
32
+ .css-1d391kg {
33
+ padding: 1rem;
34
+ }
35
+ </style>
36
+ """, unsafe_allow_html=True)
37
+
38
+ # Main title with emoji
39
+ st.title("🦜 Webscrapping and Summarizing using OpenAI")
40
+ st.markdown("""
41
+ Explore web content with AI-powered analysis and processing.
42
+ Upload a URL to get started!
43
+ """)
44
+
45
+ # Sidebar configuration
46
+ with st.sidebar:
47
+ st.header("βš™οΈ Configuration")
48
+ openai_api_key = st.text_input("OpenAI API Key:", type="password")
49
+
50
+ st.markdown("---")
51
+ st.markdown("""
52
+ ### πŸ“– Quick Guide
53
+ 1. Enter your OpenAI API key
54
+ 2. Input a webpage URL
55
+ 3. Explore different analyses in the tabs
56
+ """)
57
+
58
+ st.markdown("---")
59
+ st.markdown("Made with ❀️ using LangChain 0.3 & Streamlit 1.41.0")
60
+
61
+ # Main content area
62
+ url = st.text_input("πŸ”— Enter webpage URL:", "https://python.langchain.com/docs/")
63
+
64
+ # Document loading
65
+ docs = None
66
+ if url:
67
+ try:
68
+ with st.spinner("Loading webpage..."):
69
+ loader = WebBaseLoader(web_paths=[url])
70
+ docs = loader.load()
71
+ st.success("βœ… Webpage loaded successfully!")
72
+ except Exception as e:
73
+ st.error(f"❌ Error loading webpage: {str(e)}")
74
+
75
+ # Process and display content in tabs
76
+ if docs:
77
+ tabs = st.tabs(["πŸ“„ Original Content", "πŸ€– AI Analysis", "πŸ“Š Embeddings"])
78
+
79
+ # Original Content Tab
80
+ with tabs[0]:
81
+ full_text = " ".join([doc.page_content for doc in docs])
82
+ st.markdown("### Original Web Content")
83
+ st.markdown(full_text)
84
+
85
+ # AI Analysis Tab
86
+ with tabs[1]:
87
+ if openai_api_key:
88
+ st.markdown("### AI Content Analysis")
89
+
90
+ if st.button("Generate AI Summary", key="generate_summary"):
91
+ try:
92
+ with st.spinner("Generating AI summary..."):
93
+ client = OpenAI(api_key=openai_api_key)
94
+ response = client.chat.completions.create(
95
+ model="gpt-3.5-turbo",
96
+ messages=[
97
+ {"role": "system", "content": "Create a detailed writeup with key points and insights from the following text. Be grounded in the given text"},
98
+ {"role": "user", "content": full_text}
99
+ ],
100
+ max_tokens=500
101
+ )
102
+ st.session_state.openai_summary = response.choices[0].message.content
103
+
104
+ except Exception as e:
105
+ st.error(f"❌ Error generating summary: {str(e)}")
106
+
107
+ # Display OpenAI summary if available
108
+ if st.session_state.openai_summary:
109
+ st.markdown("#### πŸ“ AI-Generated Summary")
110
+ st.markdown(st.session_state.openai_summary)
111
+ else:
112
+ st.warning("⚠️ Please enter your OpenAI API key in the sidebar to use AI analysis.")
113
+
114
+ # Embeddings Tab
115
+ with tabs[2]:
116
+ st.markdown("### Document Embeddings")
117
+ try:
118
+ with st.spinner("Generating embeddings..."):
119
+ model = SentenceTransformer('all-MiniLM-L6-v2')
120
+ embeddings = model.encode(full_text)
121
+
122
+ st.markdown(f"**Embeddings Shape**: {embeddings.shape}")
123
+ st.markdown("#### Embedding Vector Preview")
124
+ st.write(embeddings[:10]) # Show first 10 dimensions
125
+
126
+ # Visualize embedding statistics
127
+ import numpy as np
128
+ st.markdown("#### Embedding Statistics")
129
+ col1, col2, col3 = st.columns(3)
130
+ with col1:
131
+ st.metric("Mean", f"{np.mean(embeddings):.4f}")
132
+ with col2:
133
+ st.metric("Std Dev", f"{np.std(embeddings):.4f}")
134
+ with col3:
135
+ st.metric("Dimensions", embeddings.shape[0])
136
+
137
+ except Exception as e:
138
+ st.error(f"❌ Error generating embeddings: {str(e)}")
139
+ else:
140
+ st.info("πŸ‘† Please enter a URL above to get started!")