Spaces:
Running
Running
AdityaKhalkar
commited on
Commit
•
fc4e82c
1
Parent(s):
d1d080a
added files
Browse files- README.md +5 -12
- datasets.csv +0 -0
- requirements.txt +5 -0
- streamlit_app.py +61 -0
README.md
CHANGED
@@ -1,13 +1,6 @@
|
|
1 |
-
|
2 |
-
title: Dataset Finder
|
3 |
-
emoji: 💻
|
4 |
-
colorFrom: pink
|
5 |
-
colorTo: indigo
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.33.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: mit
|
11 |
-
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
1 |
+
# Welcome to Streamlit!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
Edit `/streamlit_app.py` to customize this app to your heart's desire. :heart:
|
4 |
+
|
5 |
+
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
6 |
+
forums](https://discuss.streamlit.io).
|
datasets.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
altair
|
2 |
+
pandas
|
3 |
+
streamlit
|
4 |
+
tensorflow
|
5 |
+
transformers
|
streamlit_app.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from transformers import pipeline
|
4 |
+
|
5 |
+
# Load the zero-shot classification model
|
6 |
+
classifier = pipeline("zero-shot-classification",
|
7 |
+
model="facebook/bart-large-mnli")
|
8 |
+
|
9 |
+
# Sample dataset (replace this with your actual dataset)
|
10 |
+
df = pd.read_csv('/content/Dataset-finder/datasets.csv')
|
11 |
+
|
12 |
+
def tag_finder(user_input):
|
13 |
+
keywords = df['Keyword'].unique()
|
14 |
+
result = classifier(user_input, keywords)
|
15 |
+
threshold = result['scores'][0]
|
16 |
+
for score in result['scores']:
|
17 |
+
if score == threshold:
|
18 |
+
continue
|
19 |
+
if (threshold - score) >= threshold / 10:
|
20 |
+
threshold = score
|
21 |
+
else:
|
22 |
+
break
|
23 |
+
useful_tags = [result['labels'][idx] for idx, score in enumerate(result['scores']) if score >= threshold]
|
24 |
+
relevant_datasets = []
|
25 |
+
for tag in useful_tags:
|
26 |
+
relevant_datasets.extend(df[df['Keyword'] == tag]['Datasets'].tolist())
|
27 |
+
return useful_tags, relevant_datasets
|
28 |
+
|
29 |
+
# Define the Streamlit app
|
30 |
+
def main():
|
31 |
+
# Set title and description
|
32 |
+
st.title("Dataset Tagging System")
|
33 |
+
st.write("Enter your text below and get relevant tags for your dataset.")
|
34 |
+
# Get user input
|
35 |
+
user_input = st.text_input("Enter your text:")
|
36 |
+
|
37 |
+
if st.button("Submit"):
|
38 |
+
# Find relevant tags and datasets
|
39 |
+
relevant_tags, relevant_datasets = tag_finder(user_input)
|
40 |
+
|
41 |
+
# Display relevant tags
|
42 |
+
if relevant_tags:
|
43 |
+
st.subheader("Datasets:")
|
44 |
+
for dataset in relevant_datasets:
|
45 |
+
tag = df[df['Datasets'] == dataset]['Keyword'].iloc[0]
|
46 |
+
st.markdown(f'''
|
47 |
+
<div style="border: 2px solid #555; border-radius: 10px; padding: 10px; margin-bottom: 10px; background-color: #333; color: white; display: flex; justify-content: space-between; align-items: center;">
|
48 |
+
<div>{dataset}</div>
|
49 |
+
<div style="padding: 5px 10px; border: #fff 2px solid; border-radius: 5px;transition: background-color 0.3s;"><a href="https://datasetsearch.research.google.com/search?search&src=0&query={dataset}" style = "text-decoration: none; color: white;">link</a></div>
|
50 |
+
<div style="border: 1px solid #666; padding: 5px; background-color: #444; border-radius: 12px;">
|
51 |
+
<img width="20" height="20" style="margin: 5px;" src="https://img.icons8.com/ios/50/ffffff/price-tag--v2.png" alt="price-tag--v2"/>{tag}
|
52 |
+
</div>
|
53 |
+
</div>
|
54 |
+
|
55 |
+
''', unsafe_allow_html=True)
|
56 |
+
else:
|
57 |
+
st.warning("No relevant tags found.")
|
58 |
+
|
59 |
+
# Run the app
|
60 |
+
if __name__ == "__main__":
|
61 |
+
main()
|