Spaces:

ntranoslab
/

diff-tol

Running

Grant commited on Jun 8

Commit

62de390

•

1 Parent(s): de333ca

add zip

Files changed (6) hide show

.gitattributes CHANGED Viewed

@@ -35,5 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 ALL_hum_isoforms_ESM1b_dels.zip filter=lfs diff=lfs merge=lfs -text
 del_sub_data.csv.gz filter=lfs diff=lfs merge=lfs -text
-rand_samp_gw_del_sub.csv.gz filter=lfs diff=lfs merge=lfs -text
 ALL_hum_isoforms_ESM1b_del_sub.zip filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 ALL_hum_isoforms_ESM1b_dels.zip filter=lfs diff=lfs merge=lfs -text
 del_sub_data.csv.gz filter=lfs diff=lfs merge=lfs -text
 ALL_hum_isoforms_ESM1b_del_sub.zip filter=lfs diff=lfs merge=lfs -text

.ipynb_checkpoints/process_data-checkpoint.py ADDED Viewed

+import pandas as pd
+import zipfile
+import logging
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s - %(levelname)s - %(message)s',
+                   filename='processing.log')
+df = pd.read_csv("del_sub_data.csv.gz")
+prots = list(df.prot.unique())
+def write_chunk_to_zip(sub_df, zf, uid):
+    with zf.open(f"{uid}.csv", "w") as f:
+        sub_df.to_csv(f, index=False)
+with zipfile.ZipFile("ALL_hum_isoforms_ESM1b_del_sub.zip", "w") as zip_file:
+    # Iterate over chunks of the DataFrame
+    for p in prots:
+        print(p)
+        prot_data = df[df.prot == p]
+        write_chunk_to_zip(prot_data, zip_file, p)
+        logging.info(f"Finished {p}")
+logging.info("Finished all")

ALL_hum_isoforms_ESM1b_del_sub.zip ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f36e5f30fce46d55545061d3b4ad53b608a4cc8915a80ce051067f22f38fe06
+size 1020611720

process_data.py ADDED Viewed

+import pandas as pd
+import zipfile
+import logging
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s - %(levelname)s - %(message)s',
+                   filename='processing.log')
+df = pd.read_csv("del_sub_data.csv.gz")
+prots = list(df.prot.unique())
+def write_chunk_to_zip(sub_df, zf, uid):
+    with zf.open(f"{uid}.csv", "w") as f:
+        sub_df.to_csv(f, index=False)
+with zipfile.ZipFile("ALL_hum_isoforms_ESM1b_del_sub.zip", "w") as zip_file:
+    # Iterate over chunks of the DataFrame
+    for p in prots:
+        print(p)
+        prot_data = df[df.prot == p]
+        write_chunk_to_zip(prot_data, zip_file, p)
+        logging.info(f"Finished {p}")
+logging.info("Finished all")

processing.log ADDED Viewed

The diff for this file is too large to render. See raw diff

rand_samp_gw_del_sub.csv.gz ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:21057b72026d1e112137a3e1e1390ed00b547e725fe4917e8549d99984196677
+size 417309