Make filtering default, add warnings
Browse files
app.py
CHANGED
@@ -57,9 +57,14 @@ def run(r_filepath:Path, q_filepath:Path,
|
|
57 |
assert q_filepath is not None, "Query file is missing."
|
58 |
|
59 |
refs, ques = list(load_from_mgf(str(r_filepath))), list(load_from_mgf(str(q_filepath)))
|
|
|
60 |
if do_preprocess:
|
61 |
refs = preprocess_spectra(refs)
|
62 |
ques = preprocess_spectra(ques)
|
|
|
|
|
|
|
|
|
63 |
|
64 |
# If we have small spectra, don't make a huge batch
|
65 |
if batch_size > max(len(refs), len(ques)):
|
@@ -75,6 +80,7 @@ def run(r_filepath:Path, q_filepath:Path,
|
|
75 |
if similarity_method == 'ModifiedCosine':
|
76 |
kwargs.pop('shift')
|
77 |
|
|
|
78 |
similarity_class = CudaCosineGreedy if similarity_method == 'CosineGreedy' else CudaModifiedCosine
|
79 |
|
80 |
scores_obj = calculate_scores(
|
@@ -112,6 +118,8 @@ with gr.Blocks() as demo:
|
|
112 |
|
113 |
Calculate cosine greedy similarity matrix using CUDA. See the [main repo](https://github.com/pangeai/simms) for this project.
|
114 |
This approach is x100-x500 faster than [MatchMS](https://github.com/matchms/matchms). Upload your MGF files below, or run the sample `pesticides.mgf` files against each other.
|
|
|
|
|
115 |
""")
|
116 |
with gr.Row():
|
117 |
refs = gr.File(label="Upload REFERENCES.mgf",
|
@@ -135,7 +143,7 @@ with gr.Blocks() as demo:
|
|
135 |
match_limit = gr.Number(value=2048, label="Match Limit",
|
136 |
info="Consider this many pairs of m/z before stopping. "
|
137 |
"In practice, a value of 2048 gives more than 99.99% accuracy on GNPS")
|
138 |
-
do_preprocess = gr.Checkbox(value=
|
139 |
info="If you want to filter spectra before processing, we can do that. Look at the code to see details.")
|
140 |
with gr.Row():
|
141 |
array_type = gr.Radio(['numpy', 'sparse'],
|
|
|
57 |
assert q_filepath is not None, "Query file is missing."
|
58 |
|
59 |
refs, ques = list(load_from_mgf(str(r_filepath))), list(load_from_mgf(str(q_filepath)))
|
60 |
+
|
61 |
if do_preprocess:
|
62 |
refs = preprocess_spectra(refs)
|
63 |
ques = preprocess_spectra(ques)
|
64 |
+
if not refs: gr.Error("References are empty after filtering")
|
65 |
+
if not ques: gr.Error("Queries are empty after filtering")
|
66 |
+
else:
|
67 |
+
gr.Warning("Filtering is skipped. Malformed spectra can cause errors.")
|
68 |
|
69 |
# If we have small spectra, don't make a huge batch
|
70 |
if batch_size > max(len(refs), len(ques)):
|
|
|
80 |
if similarity_method == 'ModifiedCosine':
|
81 |
kwargs.pop('shift')
|
82 |
|
83 |
+
|
84 |
similarity_class = CudaCosineGreedy if similarity_method == 'CosineGreedy' else CudaModifiedCosine
|
85 |
|
86 |
scores_obj = calculate_scores(
|
|
|
118 |
|
119 |
Calculate cosine greedy similarity matrix using CUDA. See the [main repo](https://github.com/pangeai/simms) for this project.
|
120 |
This approach is x100-x500 faster than [MatchMS](https://github.com/matchms/matchms). Upload your MGF files below, or run the sample `pesticides.mgf` files against each other.
|
121 |
+
|
122 |
+
**In case of errors, check the "logs" above - malformed spectra will cause errors**
|
123 |
""")
|
124 |
with gr.Row():
|
125 |
refs = gr.File(label="Upload REFERENCES.mgf",
|
|
|
143 |
match_limit = gr.Number(value=2048, label="Match Limit",
|
144 |
info="Consider this many pairs of m/z before stopping. "
|
145 |
"In practice, a value of 2048 gives more than 99.99% accuracy on GNPS")
|
146 |
+
do_preprocess = gr.Checkbox(value=True, label="filter spectra",
|
147 |
info="If you want to filter spectra before processing, we can do that. Look at the code to see details.")
|
148 |
with gr.Row():
|
149 |
array_type = gr.Radio(['numpy', 'sparse'],
|