Spaces:
Runtime error
Runtime error
alonsosilva
commited on
Commit
•
cf23c39
1
Parent(s):
b3f4f85
Add app
Browse files- Dockerfile +23 -0
- README.md +1 -0
- app.py +53 -0
- requirements.txt +8 -0
Dockerfile
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11
|
2 |
+
|
3 |
+
# Set up a new user named "user" with user ID 1000 for permission
|
4 |
+
RUN useradd -m -u 1000 user
|
5 |
+
|
6 |
+
# Switch to the "user" user
|
7 |
+
USER user
|
8 |
+
|
9 |
+
# Set home to the user's home directory
|
10 |
+
ENV HOME=/home/user \
|
11 |
+
PATH=/home/user/.local/bin:$PATH
|
12 |
+
|
13 |
+
# Upgreade pip
|
14 |
+
RUN pip install --no-cache-dir --upgrade pip
|
15 |
+
|
16 |
+
COPY --chown=user requirements.txt .
|
17 |
+
|
18 |
+
# Install requirements
|
19 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
20 |
+
|
21 |
+
COPY --chown=user app.py app.py
|
22 |
+
|
23 |
+
ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
@@ -6,6 +6,7 @@ colorTo: green
|
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
license: mit
|
|
|
9 |
---
|
10 |
|
11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
license: mit
|
9 |
+
app_port: 7860
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import solara
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
from sentence_transformers import SentenceTransformer
|
6 |
+
from huggingface_hub import snapshot_download
|
7 |
+
from umap import UMAP
|
8 |
+
from annoy import AnnoyIndex
|
9 |
+
from cluestar import plot_text
|
10 |
+
|
11 |
+
news = pd.read_csv('https://raw.githubusercontent.com/alonsosilvaallende/fake-and-real-news-titles/main/example.csv')
|
12 |
+
texts = list(news["title"].values)
|
13 |
+
texts = [str(text) for text in texts if str(text) != 'nan']
|
14 |
+
|
15 |
+
sentences = ["This is an example sentence", "Each sentence is converted"]
|
16 |
+
model_path = snapshot_download(
|
17 |
+
repo_id="TaylorAI/gte-tiny", allow_patterns=["*.json", "pytorch_model.bin"]
|
18 |
+
)
|
19 |
+
|
20 |
+
embedder2 = SentenceTransformer(model_path)
|
21 |
+
embeddings2 = [embedder2.encode(str(texts[i])) for i in range(500)]
|
22 |
+
|
23 |
+
reducer = UMAP()
|
24 |
+
X2 = reducer.fit_transform(embeddings2)
|
25 |
+
|
26 |
+
f = len(embeddings2[0])
|
27 |
+
t = AnnoyIndex(f, 'angular')
|
28 |
+
for i, embedded_text in enumerate(embeddings2):
|
29 |
+
t.add_item(i, embedded_text)
|
30 |
+
t.build(1000)
|
31 |
+
|
32 |
+
query = solara.reactive("What did Nancy Pelosi said about Obamacare?")
|
33 |
+
@solara.component
|
34 |
+
def Page():
|
35 |
+
with solara.Column(margin=10):
|
36 |
+
solara.Markdown("#Embeddings")
|
37 |
+
solara.InputText("Enter some query:", query, continuous_update=True)
|
38 |
+
if query.value != "":
|
39 |
+
embedded_query = embedder2.encode(query.value)
|
40 |
+
idx, distances = t.get_nns_by_vector(embedded_query, 10, include_distances=True)
|
41 |
+
df_neighbors = pd.DataFrame()
|
42 |
+
df_neighbors["neighbors"]=[texts[i] for i in idx]
|
43 |
+
df_neighbors["distances"] = distances
|
44 |
+
x = reducer.transform([embedded_query])
|
45 |
+
color_array = ["texts" if i not in idx else "neighbors" for i in range(len(texts[:500]))]+["query"]
|
46 |
+
solara.AltairChart(plot_text(np.vstack((X2,x)), texts[:500]+[query.value], color_array=color_array).configure_range(
|
47 |
+
category=['#0000ff', '#ff0000', '#a0aab4']
|
48 |
+
))
|
49 |
+
solara.DataFrame(df_neighbors, items_per_page=10)
|
50 |
+
solara.Markdown("Dataset: 'Fake and real news' from [kaggle](https://www.kaggle.com/datasets/clmentbisaillon/fake-and-real-news-dataset)")
|
51 |
+
else:
|
52 |
+
color_array = ["texts" for _ in range(500)]
|
53 |
+
solara.AltairChart(plot_text(X2, texts[:500], color_array=color_array))
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
solara
|
2 |
+
numpy
|
3 |
+
pandas
|
4 |
+
sentence-transformers
|
5 |
+
annoy
|
6 |
+
cluestar
|
7 |
+
umap
|
8 |
+
umap-learn
|