Grant commited on
Commit
8cacd93
1 Parent(s): 30e243d

get rid of old data

Browse files
Files changed (5) hide show
  1. .gitattributes +0 -1
  2. app.py +0 -1
  3. del_sub_data.csv.gz +0 -3
  4. process_data.py +0 -25
  5. processing.log +0 -0
.gitattributes CHANGED
@@ -34,5 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  ALL_hum_isoforms_ESM1b_dels.zip filter=lfs diff=lfs merge=lfs -text
37
- del_sub_data.csv.gz filter=lfs diff=lfs merge=lfs -text
38
  ALL_hum_isoforms_ESM1b_del_sub.zip filter=lfs diff=lfs merge=lfs -text
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  ALL_hum_isoforms_ESM1b_dels.zip filter=lfs diff=lfs merge=lfs -text
 
37
  ALL_hum_isoforms_ESM1b_del_sub.zip filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -141,7 +141,6 @@ if show_line_plots:
141
  st.plotly_chart(lt_apllr)
142
  st.plotly_chart(lt_llr)
143
 
144
-
145
  st.download_button(
146
  label=f"Download {selection_uid} data as CSV",
147
  data=ud.reset_index(drop=True).to_csv(),
 
141
  st.plotly_chart(lt_apllr)
142
  st.plotly_chart(lt_llr)
143
 
 
144
  st.download_button(
145
  label=f"Download {selection_uid} data as CSV",
146
  data=ud.reset_index(drop=True).to_csv(),
del_sub_data.csv.gz DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b646c188438b5cb4508b95c59592cf094d670d60d24da1db9cda6b3eb11ddc2
3
- size 316120453
 
 
 
 
process_data.py DELETED
@@ -1,25 +0,0 @@
1
- import pandas as pd
2
- import zipfile
3
- import logging
4
-
5
- logging.basicConfig(level=logging.INFO,
6
- format='%(asctime)s - %(levelname)s - %(message)s',
7
- filename='processing.log')
8
-
9
- df = pd.read_csv("del_sub_data.csv.gz")
10
-
11
- prots = list(df.prot.unique())
12
-
13
- def write_chunk_to_zip(sub_df, zf, uid):
14
- with zf.open(f"{uid}.csv", "w") as f:
15
- sub_df.to_csv(f, index=False)
16
-
17
- with zipfile.ZipFile("ALL_hum_isoforms_ESM1b_del_sub.zip", "w") as zip_file:
18
- # Iterate over chunks of the DataFrame
19
- for p in prots:
20
- print(p)
21
- prot_data = df[df.prot == p]
22
- write_chunk_to_zip(prot_data, zip_file, p)
23
- logging.info(f"Finished {p}")
24
-
25
- logging.info("Finished all")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
processing.log DELETED
The diff for this file is too large to render. See raw diff