TornikeO commited on
Commit
15498d2
·
1 Parent(s): 97989c2

Make filtering default, add warnings

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -57,9 +57,14 @@ def run(r_filepath:Path, q_filepath:Path,
57
  assert q_filepath is not None, "Query file is missing."
58
 
59
  refs, ques = list(load_from_mgf(str(r_filepath))), list(load_from_mgf(str(q_filepath)))
 
60
  if do_preprocess:
61
  refs = preprocess_spectra(refs)
62
  ques = preprocess_spectra(ques)
 
 
 
 
63
 
64
  # If we have small spectra, don't make a huge batch
65
  if batch_size > max(len(refs), len(ques)):
@@ -75,6 +80,7 @@ def run(r_filepath:Path, q_filepath:Path,
75
  if similarity_method == 'ModifiedCosine':
76
  kwargs.pop('shift')
77
 
 
78
  similarity_class = CudaCosineGreedy if similarity_method == 'CosineGreedy' else CudaModifiedCosine
79
 
80
  scores_obj = calculate_scores(
@@ -112,6 +118,8 @@ with gr.Blocks() as demo:
112
 
113
  Calculate cosine greedy similarity matrix using CUDA. See the [main repo](https://github.com/pangeai/simms) for this project.
114
  This approach is x100-x500 faster than [MatchMS](https://github.com/matchms/matchms). Upload your MGF files below, or run the sample `pesticides.mgf` files against each other.
 
 
115
  """)
116
  with gr.Row():
117
  refs = gr.File(label="Upload REFERENCES.mgf",
@@ -135,7 +143,7 @@ with gr.Blocks() as demo:
135
  match_limit = gr.Number(value=2048, label="Match Limit",
136
  info="Consider this many pairs of m/z before stopping. "
137
  "In practice, a value of 2048 gives more than 99.99% accuracy on GNPS")
138
- do_preprocess = gr.Checkbox(value=False, label="filter spectra",
139
  info="If you want to filter spectra before processing, we can do that. Look at the code to see details.")
140
  with gr.Row():
141
  array_type = gr.Radio(['numpy', 'sparse'],
 
57
  assert q_filepath is not None, "Query file is missing."
58
 
59
  refs, ques = list(load_from_mgf(str(r_filepath))), list(load_from_mgf(str(q_filepath)))
60
+
61
  if do_preprocess:
62
  refs = preprocess_spectra(refs)
63
  ques = preprocess_spectra(ques)
64
+ if not refs: gr.Error("References are empty after filtering")
65
+ if not ques: gr.Error("Queries are empty after filtering")
66
+ else:
67
+ gr.Warning("Filtering is skipped. Malformed spectra can cause errors.")
68
 
69
  # If we have small spectra, don't make a huge batch
70
  if batch_size > max(len(refs), len(ques)):
 
80
  if similarity_method == 'ModifiedCosine':
81
  kwargs.pop('shift')
82
 
83
+
84
  similarity_class = CudaCosineGreedy if similarity_method == 'CosineGreedy' else CudaModifiedCosine
85
 
86
  scores_obj = calculate_scores(
 
118
 
119
  Calculate cosine greedy similarity matrix using CUDA. See the [main repo](https://github.com/pangeai/simms) for this project.
120
  This approach is x100-x500 faster than [MatchMS](https://github.com/matchms/matchms). Upload your MGF files below, or run the sample `pesticides.mgf` files against each other.
121
+
122
+ **In case of errors, check the "logs" above - malformed spectra will cause errors**
123
  """)
124
  with gr.Row():
125
  refs = gr.File(label="Upload REFERENCES.mgf",
 
143
  match_limit = gr.Number(value=2048, label="Match Limit",
144
  info="Consider this many pairs of m/z before stopping. "
145
  "In practice, a value of 2048 gives more than 99.99% accuracy on GNPS")
146
+ do_preprocess = gr.Checkbox(value=True, label="filter spectra",
147
  info="If you want to filter spectra before processing, we can do that. Look at the code to see details.")
148
  with gr.Row():
149
  array_type = gr.Radio(['numpy', 'sparse'],