pup-py commited on
Commit
f60672b
·
1 Parent(s): 8b57881

parsing formula

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +116 -25
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
app.py CHANGED
@@ -2,40 +2,131 @@ import altair as alt
2
  import molmass
3
  import gradio as gr
4
  import pandas as pd
 
5
 
6
- def ms(formula):
7
- mf = molmass.Formula(formula)
8
- return mf.spectrum().dataframe().round(4).query("Fraction > .001").drop(columns=["Fraction"])
9
 
10
- def plot_ms(spec_df):
11
- chart = alt.Chart(spec_df).mark_bar().encode(
12
- x=alt.X("m\/z:O").title('mz'),
13
- y=alt.Y("Intensity\ %:Q").title("relative intensity (%)")
14
- )
15
- return chart
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
 
18
  with gr.Blocks() as demo:
19
- example_mf = "FmocCysGlyLysCONH2"
20
- formula = gr.Textbox(label="formula", value=example_mf)
21
  with gr.Row():
22
- example_ms = ms(example_mf)
23
- example_plot = plot_ms(example_ms)
24
- spec_df = gr.Dataframe(value=example_ms, label="MS")
25
- spec_plot = gr.Plot(value=example_plot, label="MS")
 
 
 
26
 
27
- @gr.on(triggers=[formula.submit], inputs=[formula], outputs=[spec_df])
28
- def calc_ms(formula):
29
  try:
30
- return ms(formula)
 
31
  except Exception as e:
32
  return pd.DataFrame({"error": [e.__str__()]})
33
 
34
- @gr.on(triggers=[formula.submit], inputs=[spec_df], outputs=[spec_plot])
35
- def calc_plot(spec_df):
36
- try:
37
- return plot_ms(spec_df)
38
- except Exception:
39
- return example_plot.properties(width=480, height=240)
40
-
41
  demo.launch()
 
2
  import molmass
3
  import gradio as gr
4
  import pandas as pd
5
+ import re
6
 
7
+ # def ms(formula):
8
+ # mf = molmass.Formula(formula)
9
+ # return mf.spectrum().dataframe().round(4).query("Fraction > .001").drop(columns=["Fraction"])
10
 
11
+ # def plot_ms(spec_df):
12
+ # chart = alt.Chart(spec_df).mark_bar().encode(
13
+ # x=alt.X("m\/z:O").title('mz'),
14
+ # y=alt.Y("Intensity\ %:Q").title("relative intensity (%)")
15
+ # )
16
+ # return chart
17
+
18
+
19
+ example1 = """# Lines starting with '#' are comments
20
+ # Lines with "=" are fragment definitions
21
+ Azide = H2NCH(CH2CH2N3)COOH
22
+ Alkyne = HCCCH2NH2
23
+ Click = Azide + Alkyne
24
+ Click - H+
25
+ Click + H+
26
+ Click + Na+
27
+ Click + 2H+
28
+ 2*Click + H+
29
+ """
30
+
31
+ example2 = """NH4+"""
32
+ examples_dict = {
33
+ "addition": example1,
34
+ "substitution": example2,
35
+ }
36
+
37
+ def make_formula(line):
38
+ """Recursively splits line on ' + ' and ' - ' (signs padded with spaces) to construct formula.
39
+
40
+ Parsing precedence:
41
+ 1) '+' over '-'
42
+ 2) if a group is recognized as a key in GROUPS, the GROUPS[line] value is returned
43
+
44
+ Don't go crazy, it's not a full string calculator.
45
+ """
46
+ if " + " in line:
47
+ term1, term2 = line.split(" + ", maxsplit=1)
48
+ return make_formula(term1) + make_formula(term2)
49
+ elif " - " in line:
50
+ term1, term2 = line.split(" - ", maxsplit=1)
51
+ return make_formula(term1) - make_formula(term2)
52
+ else:
53
+ group_lookup = molmass.GROUPS.get(line)
54
+ return group_lookup or molmass.Formula(line)
55
+
56
+
57
+ class FormulaBox:
58
+ """Parsing formulabox"""
59
+
60
+ def __init__(self, inputs, **kwargs):
61
+ self.min_intensity = kwargs.get("min_intensity") or 1e-4
62
+ self.lines = inputs.split("\n")
63
+ self.ions = {}
64
+ self.parse()
65
+
66
+
67
+ def parse(self):
68
+ """lines are either definitions ('=') or ions (no '=')"""
69
+ for _line in self.lines:
70
+ line = re.sub("\s+", "", _line)
71
+ if line == "" or line.startswith("#"):
72
+ continue
73
+ elif "=" in line:
74
+ self.add_group(line)
75
+ else:
76
+ self.ions[line] = make_formula(_line)
77
+ return
78
+
79
+ def add_group(self, line):
80
+ alias, value = line.split("=")
81
+ # self.groups[alias] = make_formula(value)
82
+ molmass.GROUPS[alias] = make_formula(value)
83
+ # GROUPS.update(**self.groups)
84
+ return
85
+
86
+ def get_spectra(self):
87
+ self.spectra = {
88
+ k: v.spectrum(min_intensity=self.min_intensity)
89
+ for k, v in self.ions.items()
90
+ }
91
+ return
92
+
93
+ @property
94
+ def df(self):
95
+ frame = pd.DataFrame([
96
+ {
97
+ "molecule": k,
98
+ "mf": v.formula,
99
+ "charge": v.charge,
100
+ "monoisotopic_mass": v.monoisotopic_mass,
101
+ "mz": v.spectrum(min_intensity=1e-4).peak.mz,
102
+ "top_isotope_fraction": v.spectrum(min_intensity=1e-4).peak.fraction,
103
+ }
104
+ for k, v in self.ions.items()
105
+ ]).round(5)
106
+ return frame
107
+
108
+ def spectra(self):
109
+ return
110
+
111
+ fb1 = FormulaBox(example1)
112
 
113
 
114
  with gr.Blocks() as demo:
 
 
115
  with gr.Row():
116
+ with gr.Column(scale=1):
117
+ formula_box = gr.Textbox(value=example1, lines=12, max_lines=256, label="FormulaBox")
118
+ btn = gr.Button("Submit")
119
+ with gr.Column(scale=2):
120
+ # examples = gr.Examples(examples=["addition", "substitution"], inputs=formula_box, run_on_click=True, fn=get_example)
121
+ # examples2 = gr.Examples(examples=[example1, example2], inputs=formula_box, run_on_click=True, fn=lambda x:x, preprocess=get_example)
122
+ ms_df_box = gr.Dataframe(value=fb1.df, label="MS DF")
123
 
124
+ @gr.on(triggers=[formula_box.blur, btn.click], inputs=[formula_box], outputs=[ms_df_box])
125
+ def make_ms_df(formula_box):
126
  try:
127
+ fb = FormulaBox(formula_box)
128
+ return fb.df
129
  except Exception as e:
130
  return pd.DataFrame({"error": [e.__str__()]})
131
 
 
 
 
 
 
 
 
132
  demo.launch()