Jan Mühlnikel
commited on
Commit
·
1de85ab
1
Parent(s):
1693bac
test
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +0 -36
- .streamlit/config.toml +0 -14
- README.md +0 -13
- __pycache__/app.cpython-310.pyc +0 -0
- __pycache__/crs.cpython-310.pyc +0 -0
- __pycache__/home.cpython-310.pyc +0 -0
- __pycache__/sector.cpython-310.pyc +0 -0
- __pycache__/similarity.cpython-310.pyc +0 -0
- __pycache__/similarity_page.cpython-310.pyc +0 -0
- app.py +0 -1
- functions/__pycache__/calc_matches.cpython-310.pyc +0 -0
- functions/__pycache__/filter_projects.cpython-310.pyc +0 -0
- functions/__pycache__/semantic_search.cpython-310.pyc +0 -0
- functions/__pycache__/single_similar.cpython-310.pyc +0 -0
- functions/calc_matches.py +0 -37
- functions/different_orga_filter.py +0 -12
- functions/filter_projects.py +0 -54
- functions/filter_single.py +0 -22
- functions/same_country_filter.py +0 -16
- functions/semantic_search.py +0 -27
- functions/single_similar.py +0 -25
- modules/__pycache__/crs_table.cpython-310.pyc +0 -0
- modules/__pycache__/filter_modules.cpython-310.pyc +0 -0
- modules/__pycache__/filter_projects.cpython-310.pyc +0 -0
- modules/__pycache__/navbar.cpython-310.pyc +0 -0
- modules/__pycache__/result_table.cpython-310.pyc +0 -0
- modules/__pycache__/sdg_table.cpython-310.pyc +0 -0
- modules/__pycache__/semantic_search.cpython-310.pyc +0 -0
- modules/__pycache__/similarity_table.cpython-310.pyc +0 -0
- modules/multimatch_result_table.py +0 -134
- modules/navbar.py +0 -39
- requirements.txt +0 -10
- similarity_page.py +4 -10
- src/codelists/country_codes_ISO3166-1alpha-2.csv +0 -3
- src/codelists/crs3_codes.csv +0 -3
- src/codelists/crs5_codes.csv +0 -3
- src/codelists/flags/AC.png +0 -0
- src/codelists/flags/AD.png +0 -0
- src/codelists/flags/AE.png +0 -0
- src/codelists/flags/AF.png +0 -0
- src/codelists/flags/AG.png +0 -0
- src/codelists/flags/AI-alt.png +0 -0
- src/codelists/flags/AI.png +0 -0
- src/codelists/flags/AL.png +0 -0
- src/codelists/flags/AM.png +0 -0
- src/codelists/flags/AO.png +0 -0
- src/codelists/flags/AQ.png +0 -0
- src/codelists/flags/AR.png +0 -0
- src/codelists/flags/AS.png +0 -0
- src/codelists/flags/AT.png +0 -0
.gitattributes
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
-
*.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.streamlit/config.toml
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
[global]
|
2 |
-
|
3 |
-
[server]
|
4 |
-
headless = true
|
5 |
-
|
6 |
-
[client]
|
7 |
-
initialSidebarState = "expanded"
|
8 |
-
|
9 |
-
[theme]
|
10 |
-
primaryColor="#c30f08"
|
11 |
-
backgroundColor="#ffffff"
|
12 |
-
secondaryBackgroundColor="#eceded"
|
13 |
-
textColor="#000000"
|
14 |
-
font="sans serif"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Development Banks Collaboration Analyzer
|
3 |
-
emoji: 🐢
|
4 |
-
colorFrom: pink
|
5 |
-
colorTo: red
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.32.2
|
8 |
-
app_file: app.py
|
9 |
-
pinned: true
|
10 |
-
license: mit
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__pycache__/app.cpython-310.pyc
DELETED
Binary file (664 Bytes)
|
|
__pycache__/crs.cpython-310.pyc
DELETED
Binary file (3.71 kB)
|
|
__pycache__/home.cpython-310.pyc
DELETED
Binary file (447 Bytes)
|
|
__pycache__/sector.cpython-310.pyc
DELETED
Binary file (6.06 kB)
|
|
__pycache__/similarity.cpython-310.pyc
DELETED
Binary file (3.66 kB)
|
|
__pycache__/similarity_page.cpython-310.pyc
DELETED
Binary file (9.48 kB)
|
|
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import streamlit as st
|
2 |
-
|
3 |
# PAGE CONFIG
|
4 |
st.set_page_config(
|
5 |
page_title='Development Banks Collaboration Analyzer',
|
|
|
1 |
import streamlit as st
|
|
|
2 |
# PAGE CONFIG
|
3 |
st.set_page_config(
|
4 |
page_title='Development Banks Collaboration Analyzer',
|
functions/__pycache__/calc_matches.cpython-310.pyc
DELETED
Binary file (922 Bytes)
|
|
functions/__pycache__/filter_projects.cpython-310.pyc
DELETED
Binary file (1.81 kB)
|
|
functions/__pycache__/semantic_search.cpython-310.pyc
DELETED
Binary file (1.07 kB)
|
|
functions/__pycache__/single_similar.cpython-310.pyc
DELETED
Binary file (672 Bytes)
|
|
functions/calc_matches.py
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import numpy as np
|
3 |
-
|
4 |
-
def calc_matches(filtered_df, project_df, similarity_matrix, top_x):
|
5 |
-
# matching project2 can be nay project
|
6 |
-
# indecies (rows) = project1
|
7 |
-
# columns = project2
|
8 |
-
# -> find matches
|
9 |
-
|
10 |
-
# filter out all row considering the filter
|
11 |
-
filtered_df_indecies_list = filtered_df.index
|
12 |
-
project_df_indecies_list = project_df.index
|
13 |
-
|
14 |
-
np.fill_diagonal(similarity_matrix, 0)
|
15 |
-
match_matrix = similarity_matrix[filtered_df_indecies_list, :][:, project_df_indecies_list]
|
16 |
-
|
17 |
-
best_matches_list = np.argsort(match_matrix, axis=None)
|
18 |
-
|
19 |
-
if len(best_matches_list) < top_x:
|
20 |
-
top_x = len(best_matches_list)
|
21 |
-
|
22 |
-
# get row (project1) and column (project2) with highest similarity in filtered df
|
23 |
-
top_indices = np.unravel_index(best_matches_list[-top_x:], match_matrix.shape)
|
24 |
-
|
25 |
-
# get the corresponding similarity values
|
26 |
-
top_values = match_matrix[top_indices]
|
27 |
-
|
28 |
-
p1_df = filtered_df.iloc[top_indices[0]]
|
29 |
-
p1_df["similarity"] = top_values
|
30 |
-
p2_df = project_df.iloc[top_indices[1]]
|
31 |
-
p2_df["similarity"] = top_values
|
32 |
-
|
33 |
-
return p1_df, p2_df
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
functions/different_orga_filter.py
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
|
3 |
-
def different_orga_filter(df, orga):
|
4 |
-
# FILTER COUNTRY
|
5 |
-
country_filtered_df = pd.DataFrame()
|
6 |
-
for c in country_code_list:
|
7 |
-
c_df = df[df["country"].str.contains(c, na=False)]
|
8 |
-
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
|
9 |
-
|
10 |
-
df = country_filtered_df
|
11 |
-
|
12 |
-
return country_filtered_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
functions/filter_projects.py
DELETED
@@ -1,54 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
from functions.semantic_search import search
|
3 |
-
|
4 |
-
def contains_code(crs_codes, code_list):
|
5 |
-
codes = str(crs_codes).split(';')
|
6 |
-
return any(code in code_list for code in codes)
|
7 |
-
|
8 |
-
def filter_projects(df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list,
|
9 |
-
#query,
|
10 |
-
model,
|
11 |
-
#embeddings,
|
12 |
-
TOP_X_PROJECTS=30):
|
13 |
-
# Check if filters where not all should be selected are empty
|
14 |
-
if crs3_list != [] or crs5_list != [] or sdg_str != "": #or query != "":
|
15 |
-
|
16 |
-
# FILTER CRS
|
17 |
-
if crs3_list and not crs5_list:
|
18 |
-
df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))]
|
19 |
-
elif crs3_list and crs5_list:
|
20 |
-
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
|
21 |
-
elif not crs3_list and crs5_list:
|
22 |
-
df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
|
23 |
-
|
24 |
-
# FILTER SDG
|
25 |
-
if sdg_str != "":
|
26 |
-
df = df[df["sgd_pred_code"] == int(sdg_str)]
|
27 |
-
|
28 |
-
# FILTER COUNTRY
|
29 |
-
if country_code_list != []:
|
30 |
-
country_filtered_df = pd.DataFrame()
|
31 |
-
for c in country_code_list:
|
32 |
-
c_df = df[df["country"].str.contains(c, na=False)]
|
33 |
-
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
|
34 |
-
|
35 |
-
df = country_filtered_df
|
36 |
-
|
37 |
-
# FILTER ORGANIZATION
|
38 |
-
if orga_code_list != []:
|
39 |
-
df = df[df['orga_abbreviation'].isin(orga_code_list)]
|
40 |
-
|
41 |
-
# FILTER QUERY
|
42 |
-
"""
|
43 |
-
if query != "" and len(df) > 0:
|
44 |
-
if len(df) < TOP_X_PROJECTS:
|
45 |
-
TOP_X_PROJECTS = len(df)
|
46 |
-
df = search(query, model, embeddings, df, TOP_X_PROJECTS)
|
47 |
-
"""
|
48 |
-
|
49 |
-
|
50 |
-
return df
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
functions/filter_single.py
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
from functions.semantic_search import search
|
3 |
-
|
4 |
-
def contains_code(crs_codes, code_list):
|
5 |
-
codes = str(crs_codes).split(';')
|
6 |
-
return any(code in code_list for code in codes)
|
7 |
-
|
8 |
-
def filter_single(df, country_code_list, orga_code_list):
|
9 |
-
# FILTER COUNTRY
|
10 |
-
if country_code_list != []:
|
11 |
-
country_filtered_df = pd.DataFrame()
|
12 |
-
for c in country_code_list:
|
13 |
-
c_df = df[df["country"].str.contains(c, na=False)]
|
14 |
-
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
|
15 |
-
|
16 |
-
df = country_filtered_df
|
17 |
-
|
18 |
-
# FILTER ORGANIZATION
|
19 |
-
if orga_code_list != []:
|
20 |
-
df = df[df['orga_abbreviation'].isin(orga_code_list)]
|
21 |
-
|
22 |
-
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
functions/same_country_filter.py
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
from functions.semantic_search import search
|
3 |
-
|
4 |
-
def same_country_filter(df, country_code_list):
|
5 |
-
# FILTER COUNTRY
|
6 |
-
if country_code_list != []:
|
7 |
-
country_filtered_df = pd.DataFrame()
|
8 |
-
for c in country_code_list:
|
9 |
-
c_df = df[df["country"].str.contains(c, na=False)]
|
10 |
-
country_filtered_df = pd.concat([country_filtered_df, c_df], ignore_index=False)
|
11 |
-
|
12 |
-
df = country_filtered_df
|
13 |
-
|
14 |
-
return country_filtered_df
|
15 |
-
else:
|
16 |
-
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
functions/semantic_search.py
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
import pickle
|
2 |
-
import faiss
|
3 |
-
import streamlit as st
|
4 |
-
from sentence_transformers import SentenceTransformer
|
5 |
-
import pandas as pd
|
6 |
-
|
7 |
-
def search(query, model, embeddings, filtered_df, top_x=20):
|
8 |
-
|
9 |
-
filtered_df_indecies_list = filtered_df.index
|
10 |
-
filtered_embeddings = embeddings[filtered_df_indecies_list]
|
11 |
-
|
12 |
-
# Load or create FAISS index
|
13 |
-
dimension = filtered_embeddings.shape[1]
|
14 |
-
faiss_index = faiss.IndexFlatL2(dimension)
|
15 |
-
faiss_index.add(filtered_embeddings)
|
16 |
-
|
17 |
-
# Convert query to embedding
|
18 |
-
query_embedding = model.encode([query])[0].reshape(1, -1)
|
19 |
-
|
20 |
-
# Perform search
|
21 |
-
D, I = faiss_index.search(query_embedding, k=top_x) # Search for top x similar items
|
22 |
-
|
23 |
-
# Extract the sentences corresponding to the top indices
|
24 |
-
#print(filtered_df.columns())
|
25 |
-
top_indecies = [i for i in I[0]]
|
26 |
-
|
27 |
-
return filtered_df.iloc[top_indecies]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
functions/single_similar.py
DELETED
@@ -1,25 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import numpy as np
|
3 |
-
|
4 |
-
def find_similar(p_index, similarity_matrix, filtered_df, top_x):
|
5 |
-
|
6 |
-
# filter out just projects from filtered df
|
7 |
-
filtered_indices = filtered_df.index.tolist()
|
8 |
-
|
9 |
-
index_position_mapping = {position: index for position, index in enumerate(filtered_indices)}
|
10 |
-
|
11 |
-
filtered_column_sim_matrix = similarity_matrix[:, filtered_indices]
|
12 |
-
|
13 |
-
# filter out the row of the selected poject
|
14 |
-
project_row = filtered_column_sim_matrix[p_index]
|
15 |
-
sorted_indices = np.argsort(project_row)
|
16 |
-
top_10_indices_descending = sorted_indices[-10:][::-1]
|
17 |
-
#top_10_original_indices = [index_position_mapping[position] for position in top_10_indices_descending]
|
18 |
-
top_10_values_descending = project_row[top_10_indices_descending]
|
19 |
-
|
20 |
-
result_df = filtered_df.iloc[top_10_indices_descending]
|
21 |
-
result_df["similarity"] = top_10_values_descending
|
22 |
-
|
23 |
-
return result_df
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/__pycache__/crs_table.cpython-310.pyc
DELETED
Binary file (1.21 kB)
|
|
modules/__pycache__/filter_modules.cpython-310.pyc
DELETED
Binary file (997 Bytes)
|
|
modules/__pycache__/filter_projects.cpython-310.pyc
DELETED
Binary file (979 Bytes)
|
|
modules/__pycache__/navbar.cpython-310.pyc
DELETED
Binary file (784 Bytes)
|
|
modules/__pycache__/result_table.cpython-310.pyc
DELETED
Binary file (2.65 kB)
|
|
modules/__pycache__/sdg_table.cpython-310.pyc
DELETED
Binary file (1.19 kB)
|
|
modules/__pycache__/semantic_search.cpython-310.pyc
DELETED
Binary file (1.17 kB)
|
|
modules/__pycache__/similarity_table.cpython-310.pyc
DELETED
Binary file (1.41 kB)
|
|
modules/multimatch_result_table.py
DELETED
@@ -1,134 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
-
|
4 |
-
def show_multi_table(p1_df, p2_df):
|
5 |
-
st.write("------------------")
|
6 |
-
|
7 |
-
p1_df = p1_df.reset_index(drop=True)
|
8 |
-
p2_df = p2_df.reset_index(drop=True)
|
9 |
-
|
10 |
-
actual_ind = 0
|
11 |
-
for i in range(len(p1_df) - 1, -1, -2): # stepsize because project matchs in both ways and it should only display a match one time
|
12 |
-
actual_ind += 1
|
13 |
-
match_df = pd.DataFrame()
|
14 |
-
row_from_p1 = p1_df.iloc[[i]]
|
15 |
-
row_from_p2 = p2_df.iloc[[i]]
|
16 |
-
|
17 |
-
# INTEGRATE IN PREPROCESSING !!!
|
18 |
-
# transform strings to list
|
19 |
-
try:
|
20 |
-
row_from_p1["crs_3_code_list"] = [row_from_p1['crs_3_name'].item().split(";")[:-1]]
|
21 |
-
row_from_p2["crs_3_code_list"] = [row_from_p2['crs_3_name'].item().split(";")[:-1]]
|
22 |
-
except:
|
23 |
-
row_from_p1["crs_3_code_list"] = [""]
|
24 |
-
row_from_p2["crs_3_code_list"] = [""]
|
25 |
-
|
26 |
-
try:
|
27 |
-
row_from_p1["crs_5_code_list"] = [row_from_p1['crs_5_name'].item().split(";")[:-1]]
|
28 |
-
row_from_p2["crs_5_code_list"] = [row_from_p2['crs_5_name'].item().split(";")[:-1]]
|
29 |
-
except:
|
30 |
-
row_from_p1["crs_5_code_list"] = [""]
|
31 |
-
row_from_p2["crs_5_code_list"] = [""]
|
32 |
-
|
33 |
-
row_from_p1["sdg_list"] = [row_from_p1['sgd_pred_code'].item()]
|
34 |
-
row_from_p2["sdg_list"] = [row_from_p2['sgd_pred_code'].item()]
|
35 |
-
|
36 |
-
try:
|
37 |
-
row_from_p1["flag"] = f"https://flagicons.lipis.dev/flags/4x3/{row_from_p1['country'].item()[:2].lower()}.svg"
|
38 |
-
row_from_p2["flag"] = f"https://flagicons.lipis.dev/flags/4x3/{row_from_p2['country'].item()[:2].lower()}.svg"
|
39 |
-
except:
|
40 |
-
row_from_p1["flag"] = "https://flagicons.lipis.dev/flags/4x3/xx.svg"
|
41 |
-
row_from_p2["flag"] = "https://flagicons.lipis.dev/flags/4x3/xx.svg"
|
42 |
-
|
43 |
-
#print(row_from_p1["flag"].item())
|
44 |
-
|
45 |
-
# Correctly append rows to match_df
|
46 |
-
#st.subheader(f"#{actual_ind}")
|
47 |
-
#st.caption(f"Similarity: {round(row_from_p1['similarity'].item(), 4) * 100}%")
|
48 |
-
match_df = pd.concat([row_from_p1, row_from_p2], ignore_index=True)
|
49 |
-
|
50 |
-
col1, col2 = st.columns([1, 12])
|
51 |
-
with col1:
|
52 |
-
|
53 |
-
# remove arrow from standart st.metric()
|
54 |
-
st.write(
|
55 |
-
"""
|
56 |
-
<style>
|
57 |
-
[data-testid="stMetricDelta"] svg {
|
58 |
-
display: none;
|
59 |
-
}
|
60 |
-
</style>
|
61 |
-
""",
|
62 |
-
unsafe_allow_html=True,
|
63 |
-
)
|
64 |
-
|
65 |
-
st.metric(label="Match", value=f"{actual_ind}", delta=f"~ {str(round(row_from_p1['similarity'].item(), 5) * 100)[:4]} %")
|
66 |
-
|
67 |
-
with col2:
|
68 |
-
st.write(" ")
|
69 |
-
st.dataframe(
|
70 |
-
match_df[["iati_id", "title_main", "orga_abbreviation", "client", "description_main", "country_name", "flag", "sdg_list", "crs_3_code_list", "crs_5_code_list"]],
|
71 |
-
use_container_width = True,
|
72 |
-
height = 35 + 35 * len(match_df),
|
73 |
-
column_config={
|
74 |
-
"iati_id": st.column_config.TextColumn(
|
75 |
-
"IATI ID",
|
76 |
-
help="IATI Project ID",
|
77 |
-
disabled=True,
|
78 |
-
width="small"
|
79 |
-
),
|
80 |
-
"orga_abbreviation": st.column_config.TextColumn(
|
81 |
-
"Organization",
|
82 |
-
help="If description not in English, description in other language provided",
|
83 |
-
disabled=True,
|
84 |
-
width="small"
|
85 |
-
),
|
86 |
-
"client": st.column_config.TextColumn(
|
87 |
-
"Client",
|
88 |
-
help="Client organization of customer",
|
89 |
-
disabled=True,
|
90 |
-
width="small"
|
91 |
-
),
|
92 |
-
"title_main": st.column_config.TextColumn(
|
93 |
-
"Title",
|
94 |
-
help="If title not in English, title in other language provided",
|
95 |
-
disabled=True,
|
96 |
-
width="large"
|
97 |
-
),
|
98 |
-
"description_main": st.column_config.TextColumn(
|
99 |
-
"Description",
|
100 |
-
help="If description not in English, description in other language provided",
|
101 |
-
disabled=True,
|
102 |
-
width="large"
|
103 |
-
),
|
104 |
-
"country_name": st.column_config.TextColumn(
|
105 |
-
"Country",
|
106 |
-
help="Country of project",
|
107 |
-
disabled=True,
|
108 |
-
width="small"
|
109 |
-
),
|
110 |
-
"flag": st.column_config.ImageColumn(
|
111 |
-
"Flag",
|
112 |
-
help="country flag",
|
113 |
-
width="small"
|
114 |
-
),
|
115 |
-
"sdg_list": st.column_config.ListColumn(
|
116 |
-
"SDG Prediction",
|
117 |
-
help="Prediction of SDG's",
|
118 |
-
width="small"
|
119 |
-
),
|
120 |
-
"crs_3_code_list": st.column_config.ListColumn(
|
121 |
-
"CRS 3",
|
122 |
-
help="CRS 3 code given by organization",
|
123 |
-
width="medium"
|
124 |
-
),
|
125 |
-
"crs_5_code_list": st.column_config.ListColumn(
|
126 |
-
"CRS 5",
|
127 |
-
help="CRS 5 code given by organization",
|
128 |
-
width="medium"
|
129 |
-
),
|
130 |
-
},
|
131 |
-
hide_index=True,
|
132 |
-
)
|
133 |
-
|
134 |
-
st.write("------------------")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/navbar.py
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import similarity_page
|
3 |
-
|
4 |
-
# giz-dsc colors
|
5 |
-
# orange: #e5b50d
|
6 |
-
# green: #48d47b
|
7 |
-
# blue: #0da2dc
|
8 |
-
# grey: #dadada
|
9 |
-
|
10 |
-
# giz colors https://www.giz.de/cdc/en/html/59638.html
|
11 |
-
# red: #c80f0f
|
12 |
-
# grey: #6f6f6f
|
13 |
-
# light_grey: #b2b2b2
|
14 |
-
# light_red: #eba1a3
|
15 |
-
|
16 |
-
def show_navbar():
|
17 |
-
#st.markdown("<h1 style='color: red;'>THIS APP IS WORK IN PROGRESS ...</h1>", unsafe_allow_html=True)
|
18 |
-
|
19 |
-
#st.title("Development Bank Synergy Mapper")
|
20 |
-
|
21 |
-
# enlarge tab fontsizes
|
22 |
-
css = '''
|
23 |
-
<style>
|
24 |
-
.stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p {
|
25 |
-
font-size:1rem;
|
26 |
-
}
|
27 |
-
</style>
|
28 |
-
'''
|
29 |
-
st.markdown(css, unsafe_allow_html=True)
|
30 |
-
tab1, tab2 = st.tabs([
|
31 |
-
"🔍 Multi-Project Matching",
|
32 |
-
"🎯 Single-Project Matching"
|
33 |
-
])
|
34 |
-
|
35 |
-
with tab1:
|
36 |
-
similarity_page.show_multi_matching_page()
|
37 |
-
|
38 |
-
with tab2:
|
39 |
-
similarity_page.show_single_matching_page()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
numpy==1.26.4
|
2 |
-
pandas==2.1.4
|
3 |
-
streamlit==1.32.2
|
4 |
-
streamlit-option-menu==0.3.12
|
5 |
-
scipy==1.12.0
|
6 |
-
faiss-cpu==1.8.0
|
7 |
-
faiss-gpu==1.7.2
|
8 |
-
sentence-transformers==2.5.1
|
9 |
-
streamlit-aggrid==0.3.4
|
10 |
-
psutil==5.9.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
similarity_page.py
CHANGED
@@ -111,7 +111,6 @@ def load_model():
|
|
111 |
return model
|
112 |
|
113 |
# Load Embeddings
|
114 |
-
"""
|
115 |
@st.cache_data
|
116 |
def load_embeddings_and_index():
|
117 |
# Load embeddings
|
@@ -120,7 +119,7 @@ def load_embeddings_and_index():
|
|
120 |
embeddings = stored_data["embeddings"]
|
121 |
|
122 |
return embeddings
|
123 |
-
|
124 |
|
125 |
# USE CACHE FUNCTIONS
|
126 |
sim_matrix = load_sim_matrix()
|
@@ -135,7 +134,7 @@ COUNTRY_OPTION_LIST = getCountry()
|
|
135 |
|
136 |
# LOAD MODEL FROM CACHE FO SEMANTIC SEARCH
|
137 |
model = load_model()
|
138 |
-
|
139 |
|
140 |
def show_multi_matching_page():
|
141 |
#st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
|
@@ -199,7 +198,7 @@ def show_multi_matching_page():
|
|
199 |
)
|
200 |
|
201 |
# SEARCH BOX
|
202 |
-
|
203 |
|
204 |
with col3:
|
205 |
# COUNTRY SELECTION
|
@@ -243,11 +242,7 @@ def show_multi_matching_page():
|
|
243 |
|
244 |
# FILTER DF WITH SELECTED FILTER OPTIONS
|
245 |
TOP_X_PROJECTS = 30
|
246 |
-
filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list,
|
247 |
-
#query,
|
248 |
-
model,
|
249 |
-
#embeddings,
|
250 |
-
TOP_X_PROJECTS)
|
251 |
if isinstance(filtered_df, pd.DataFrame) and len(filtered_df) != 0:
|
252 |
# FIND MATCHES
|
253 |
## If only same country checkbox i sactivated
|
@@ -317,7 +312,6 @@ def show_single_matching_page():
|
|
317 |
else:
|
318 |
search_list = title_search_list
|
319 |
|
320 |
-
|
321 |
project_option = st.selectbox(
|
322 |
label = 'Search for a project',
|
323 |
index = None,
|
|
|
111 |
return model
|
112 |
|
113 |
# Load Embeddings
|
|
|
114 |
@st.cache_data
|
115 |
def load_embeddings_and_index():
|
116 |
# Load embeddings
|
|
|
119 |
embeddings = stored_data["embeddings"]
|
120 |
|
121 |
return embeddings
|
122 |
+
|
123 |
|
124 |
# USE CACHE FUNCTIONS
|
125 |
sim_matrix = load_sim_matrix()
|
|
|
134 |
|
135 |
# LOAD MODEL FROM CACHE FO SEMANTIC SEARCH
|
136 |
model = load_model()
|
137 |
+
embeddings = load_embeddings_and_index()
|
138 |
|
139 |
def show_multi_matching_page():
|
140 |
#st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
|
|
|
198 |
)
|
199 |
|
200 |
# SEARCH BOX
|
201 |
+
query = st.text_input("Search Query")
|
202 |
|
203 |
with col3:
|
204 |
# COUNTRY SELECTION
|
|
|
242 |
|
243 |
# FILTER DF WITH SELECTED FILTER OPTIONS
|
244 |
TOP_X_PROJECTS = 30
|
245 |
+
filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str, country_code_list, orga_code_list, query, model, embeddings, TOP_X_PROJECTS)
|
|
|
|
|
|
|
|
|
246 |
if isinstance(filtered_df, pd.DataFrame) and len(filtered_df) != 0:
|
247 |
# FIND MATCHES
|
248 |
## If only same country checkbox i sactivated
|
|
|
312 |
else:
|
313 |
search_list = title_search_list
|
314 |
|
|
|
315 |
project_option = st.selectbox(
|
316 |
label = 'Search for a project',
|
317 |
index = None,
|
src/codelists/country_codes_ISO3166-1alpha-2.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1ff1ad92034a4a593138fcbb7570ec5015c3c28a4476f95015a39d0bf257382a
|
3 |
-
size 13113
|
|
|
|
|
|
|
|
src/codelists/crs3_codes.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:cfd7bf86baf7bbc54c880c098b89b803adfb060c2c9ba55ee976cc47c2be426a
|
3 |
-
size 3218
|
|
|
|
|
|
|
|
src/codelists/crs5_codes.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:84a522ad573ad1866835cb24efc7984016ef17b9990ac2484345705ac82a0d80
|
3 |
-
size 100133
|
|
|
|
|
|
|
|
src/codelists/flags/AC.png
DELETED
Binary file (182 kB)
|
|
src/codelists/flags/AD.png
DELETED
Binary file (56.2 kB)
|
|
src/codelists/flags/AE.png
DELETED
Binary file (973 Bytes)
|
|
src/codelists/flags/AF.png
DELETED
Binary file (107 kB)
|
|
src/codelists/flags/AG.png
DELETED
Binary file (16 kB)
|
|
src/codelists/flags/AI-alt.png
DELETED
Binary file (14.9 kB)
|
|
src/codelists/flags/AI.png
DELETED
Binary file (18.6 kB)
|
|
src/codelists/flags/AL.png
DELETED
Binary file (34.8 kB)
|
|
src/codelists/flags/AM.png
DELETED
Binary file (717 Bytes)
|
|
src/codelists/flags/AO.png
DELETED
Binary file (20.5 kB)
|
|
src/codelists/flags/AQ.png
DELETED
Binary file (13.8 kB)
|
|
src/codelists/flags/AR.png
DELETED
Binary file (46.9 kB)
|
|
src/codelists/flags/AS.png
DELETED
Binary file (101 kB)
|
|
src/codelists/flags/AT.png
DELETED
Binary file (292 Bytes)
|
|