Spaces:
Running
Running
roni
commited on
Commit
·
6f068fd
1
Parent(s):
9ec67f0
deduplicating on uniprot id
Browse files- app.py +21 -15
- get_index.py +2 -1
app.py
CHANGED
@@ -42,20 +42,19 @@ def limit_n_results(n):
|
|
42 |
|
43 |
|
44 |
def aggregate_search_results(raw_results: List[dict], max_res: int) -> Dict[str, dict]:
|
45 |
-
|
46 |
for raw_result in raw_results:
|
47 |
entry = select_keys(
|
48 |
raw_result,
|
49 |
-
keys=["pdb_name", "chain_id", "score", "organism", "uniprot_id"]
|
50 |
)
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
return dict(aggregated_by_gene)
|
59 |
|
60 |
|
61 |
def select_keys(d: dict, keys: List[str]):
|
@@ -64,21 +63,28 @@ def select_keys(d: dict, keys: List[str]):
|
|
64 |
|
65 |
def format_search_results(agg_search_results):
|
66 |
formatted_search_results = {}
|
67 |
-
for
|
68 |
entry = entries[0]
|
69 |
organism = entry["organism"]
|
70 |
score = entry["score"]
|
71 |
-
|
72 |
-
key = f"
|
73 |
formatted_search_results[key] = score
|
74 |
return formatted_search_results
|
75 |
|
76 |
|
77 |
def update_dropdown_menu(agg_search_res):
|
78 |
choices = []
|
79 |
-
for
|
80 |
for entry in entries:
|
81 |
-
choice = choice_sep.join(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
choices.append(choice)
|
83 |
|
84 |
if choices:
|
|
|
42 |
|
43 |
|
44 |
def aggregate_search_results(raw_results: List[dict], max_res: int) -> Dict[str, dict]:
|
45 |
+
aggregated_by_uniprot = collections.defaultdict(list)
|
46 |
for raw_result in raw_results:
|
47 |
entry = select_keys(
|
48 |
raw_result,
|
49 |
+
keys=["pdb_name", "chain_id", "score", "organism", "uniprot_id", "genes"],
|
50 |
)
|
51 |
+
uniprot_id = raw_result["uniprot_id"]
|
52 |
+
|
53 |
+
if uniprot_id is not None:
|
54 |
+
aggregated_by_uniprot[uniprot_id].append(entry)
|
55 |
+
if len(aggregated_by_uniprot) >= max_res:
|
56 |
+
return dict(aggregated_by_uniprot)
|
57 |
+
return dict(aggregated_by_uniprot)
|
|
|
58 |
|
59 |
|
60 |
def select_keys(d: dict, keys: List[str]):
|
|
|
63 |
|
64 |
def format_search_results(agg_search_results):
|
65 |
formatted_search_results = {}
|
66 |
+
for uniprot_id, entries in agg_search_results.items():
|
67 |
entry = entries[0]
|
68 |
organism = entry["organism"]
|
69 |
score = entry["score"]
|
70 |
+
genes = entry["genes"]
|
71 |
+
key = f"Uniprot ID: {uniprot_id} | Organism: {organism} | Gene Names: {genes}"
|
72 |
formatted_search_results[key] = score
|
73 |
return formatted_search_results
|
74 |
|
75 |
|
76 |
def update_dropdown_menu(agg_search_res):
|
77 |
choices = []
|
78 |
+
for uniprot_id, entries in agg_search_res.items():
|
79 |
for entry in entries:
|
80 |
+
choice = choice_sep.join(
|
81 |
+
[
|
82 |
+
uniprot_id,
|
83 |
+
entry["pdb_name"],
|
84 |
+
entry["chain_id"],
|
85 |
+
entry["genes"] or "",
|
86 |
+
]
|
87 |
+
)
|
88 |
choices.append(choice)
|
89 |
|
90 |
if choices:
|
get_index.py
CHANGED
@@ -18,7 +18,8 @@ def get_engines(index_repo: str, model_repo: str):
|
|
18 |
)
|
19 |
sys.path.append(str(local_arch_path))
|
20 |
from protein_index import ( # pylint: disable=import-error,import-outside-toplevel
|
21 |
-
ProteinSearchEngine,
|
|
|
22 |
)
|
23 |
|
24 |
subindex_paths = glob(str(index_path / "*/"))
|
|
|
18 |
)
|
19 |
sys.path.append(str(local_arch_path))
|
20 |
from protein_index import ( # pylint: disable=import-error,import-outside-toplevel
|
21 |
+
ProteinSearchEngine,
|
22 |
+
ProteinIndexError,
|
23 |
)
|
24 |
|
25 |
subindex_paths = glob(str(index_path / "*/"))
|