roni commited on
Commit
27e2770
1 Parent(s): 35d4339

initial protein search engine implementation

Browse files
Files changed (9) hide show
  1. .gitignore +4 -0
  2. Makefile +15 -0
  3. __init__.py +0 -0
  4. app.py +36 -0
  5. credentials.py +5 -0
  6. get_index.py +23 -0
  7. pylintrc +20 -0
  8. requirements-dev.txt +5 -0
  9. requirements.txt +3 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .idea
2
+ venv
3
+ __pycache__
4
+ scratch
Makefile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setup:
2
+ python -m venv venv
3
+ venv/bin/pip install -r requirements.txt
4
+ venv/bin/pip install -r requirements-dev.txt
5
+
6
+ format:
7
+ venv/bin/black .
8
+
9
+ check-code: lint-python check-formatting
10
+
11
+ check-formatting:
12
+ venv/bin/black --check .
13
+
14
+ lint-python:
15
+ venv/bin/pylint --rcfile=pylintrc .
__init__.py ADDED
File without changes
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from get_index import get_engine
4
+
5
+ index_repo = "ronig/siamese_protein_index"
6
+ model_repo = "ronig/protein_search_engine"
7
+ engine = get_engine(index_repo, model_repo)
8
+
9
+
10
+ def search(seq, n_res):
11
+ n_res = int(limit_n_results(n_res))
12
+ search_results = engine.search_by_sequence(seq, n=n_res)
13
+ outputs = {}
14
+ for res in search_results:
15
+ prot = res["protein_name"][0]
16
+ chain = res["chain_id"][0]
17
+ value = res["score"]
18
+ key = f"Protein: {prot} | Chain: {chain}"
19
+ outputs[key] = value
20
+ return outputs
21
+
22
+
23
+ def limit_n_results(n):
24
+ return max(min(n, 20), 1)
25
+
26
+
27
+ with gr.Blocks() as demo:
28
+ with gr.Row():
29
+ with gr.Column():
30
+ seq_input = gr.Textbox("KFLIYQMECSTMIFGL", label="Input Sequence")
31
+ n_results = gr.Number(5, label="N Results")
32
+ search_button = gr.Button("Search")
33
+ output = gr.Label(num_top_classes=20, label="Search Results")
34
+ search_button.click(search, inputs=[seq_input, n_results], outputs=output)
35
+
36
+ demo.launch()
credentials.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import os
2
+
3
+
4
+ def get_token():
5
+ return os.environ.get("TOKEN", True)
get_index.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from pathlib import Path
3
+
4
+ from huggingface_hub import snapshot_download
5
+
6
+ from credentials import get_token
7
+
8
+
9
+ def get_engine(index_repo: str, model_repo: str):
10
+ index_path = Path(
11
+ snapshot_download(
12
+ index_repo, use_auth_token=get_token(), repo_type="dataset"
13
+ )
14
+ )
15
+ local_arch_path = Path(
16
+ snapshot_download(
17
+ model_repo, use_auth_token=get_token(), repo_type="model"
18
+ )
19
+ )
20
+ sys.path.append(str(local_arch_path))
21
+ from protein_index import ProteinSearchEngine # pylint: disable=import-error,import-outside-toplevel
22
+
23
+ return ProteinSearchEngine(data_path=index_path)
pylintrc ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [MESSAGES CONTROL]
2
+ disable=missing-docstring,invalid-name,logging-fstring-interpolation
3
+
4
+ [DESIGN]
5
+ min-public-methods=1
6
+
7
+ [FORMAT]
8
+ max-line-length=88
9
+
10
+ [SIMILARITIES]
11
+ min-similarity-lines=10
12
+
13
+ [TYPECHECK]
14
+
15
+ [MASTER]
16
+ init-hook=import sys; sys.path.append(".")
17
+ extension-pkg-whitelist=pydantic,cassandra
18
+ generated-members=torch.*,cv2.*,np.random.*
19
+ ignore-patterns=setup,py,tasks.py
20
+ max-args=6
requirements-dev.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pytest
2
+ pylint
3
+ black
4
+ mypy
5
+ huggingface_hub
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ transformers
3
+ annoy