Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -32,7 +32,7 @@ def create_embeddings(text):
|
|
32 |
print("Embeddings created successfully.")
|
33 |
return embeddings, sentences
|
34 |
|
35 |
-
def
|
36 |
logging.info("Generating plot.")
|
37 |
# Generate embeddings for the query
|
38 |
query_embedding = model.encode([query])[0]
|
@@ -79,36 +79,55 @@ def generate_plot(query, pdf_file):
|
|
79 |
save(p)
|
80 |
logging.info("Plot saved to file.")
|
81 |
return temp_file.name
|
|
|
|
|
82 |
|
83 |
-
def
|
84 |
-
logging.info("
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
93 |
|
94 |
-
#
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
-
|
99 |
-
|
100 |
-
logging.info("Returning iframe HTML content.")
|
101 |
-
return iframe_html
|
102 |
|
103 |
-
|
|
|
|
|
|
|
|
|
104 |
iface = gr.Interface(
|
105 |
fn=gradio_interface,
|
106 |
inputs=[gr.File(label="Upload PDF"), gr.Textbox(label="Query")],
|
107 |
-
outputs=gr.
|
108 |
title="PDF Content Visualizer",
|
109 |
description="Upload a PDF and enter a query to visualize the content."
|
110 |
)
|
111 |
|
112 |
-
|
113 |
if __name__ == "__main__":
|
114 |
iface.launch()
|
|
|
32 |
print("Embeddings created successfully.")
|
33 |
return embeddings, sentences
|
34 |
|
35 |
+
def generate_plot_bokeh(query, pdf_file):
|
36 |
logging.info("Generating plot.")
|
37 |
# Generate embeddings for the query
|
38 |
query_embedding = model.encode([query])[0]
|
|
|
79 |
save(p)
|
80 |
logging.info("Plot saved to file.")
|
81 |
return temp_file.name
|
82 |
+
import plotly.express as px
|
83 |
+
import plotly.graph_objects as go
|
84 |
|
85 |
+
def generate_plotly_figure(query, pdf_file):
|
86 |
+
logging.info("Generating plot with Plotly.")
|
87 |
+
# Generate embeddings for the query
|
88 |
+
query_embedding = model.encode([query])[0]
|
89 |
+
|
90 |
+
# Process the PDF and create embeddings
|
91 |
+
text = process_pdf(pdf_file.name)
|
92 |
+
embeddings, sentences = create_embeddings(text)
|
93 |
+
|
94 |
+
logging.info("Data prepared for UMAP.")
|
95 |
+
# Prepare the data for UMAP and visualization
|
96 |
+
all_embeddings = np.vstack([embeddings, query_embedding])
|
97 |
+
all_sentences = sentences + [query]
|
98 |
|
99 |
+
# UMAP transformation
|
100 |
+
umap_transform = umap.UMAP(n_neighbors=15, min_dist=0.0, n_components=2, random_state=42)
|
101 |
+
umap_embeddings = umap_transform.fit_transform(all_embeddings)
|
102 |
+
|
103 |
+
logging.info("UMAP transformation completed.")
|
104 |
+
# Find the closest sentences to the query
|
105 |
+
distances = cosine_similarity([query_embedding], embeddings)[0]
|
106 |
+
closest_indices = distances.argsort()[-5:][::-1] # Adjust the number as needed
|
107 |
+
|
108 |
+
# Prepare data for plotting
|
109 |
+
colors = ['red' if i in closest_indices else 'blue' for i in range(len(sentences))]
|
110 |
+
fig = go.Figure()
|
111 |
+
fig.add_trace(go.Scatter(x=umap_embeddings[:-1, 0], y=umap_embeddings[:-1, 1], mode='markers',
|
112 |
+
marker=dict(color=colors), text=all_sentences[:-1]))
|
113 |
+
|
114 |
+
fig.update_layout(title="UMAP Projection of Sentences", xaxis_title="UMAP 1", yaxis_title="UMAP 2")
|
115 |
|
116 |
+
logging.info("Plotly figure created successfully.")
|
117 |
+
return fig
|
|
|
|
|
118 |
|
119 |
+
def gradio_interface(pdf_file, query):
|
120 |
+
logging.info("Gradio interface called.")
|
121 |
+
fig = generate_plotly_figure(query, pdf_file)
|
122 |
+
logging.info("Returning Plotly figure.")
|
123 |
+
return fig
|
124 |
iface = gr.Interface(
|
125 |
fn=gradio_interface,
|
126 |
inputs=[gr.File(label="Upload PDF"), gr.Textbox(label="Query")],
|
127 |
+
outputs=gr.Plot(), # Updated to use gr.Plot() for Plotly figures
|
128 |
title="PDF Content Visualizer",
|
129 |
description="Upload a PDF and enter a query to visualize the content."
|
130 |
)
|
131 |
|
|
|
132 |
if __name__ == "__main__":
|
133 |
iface.launch()
|