Jayesh13 commited on
Commit
ef8ec9c
·
verified ·
1 Parent(s): 5a1e198

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -23
app.py CHANGED
@@ -54,31 +54,44 @@ def process_csv(file):
54
 
55
  return homorepeats, sequence_data
56
 
57
- # Function to generate and download Excel workbook
58
- def create_excel(sequences_data, homorepeats):
59
  output = BytesIO()
60
  workbook = xlsxwriter.Workbook(output, {'in_memory': True})
61
  worksheet = workbook.add_worksheet()
62
 
63
- # Write the header
64
- worksheet.write(0, 0, "Entry ID")
65
- worksheet.write(0, 1, "Protein Name")
66
- col = 2
67
- for repeat in sorted(homorepeats):
68
- worksheet.write(0, col, repeat)
69
- col += 1
70
-
71
- # Write data for each sequence
72
- row = 1
73
- for entry_id, protein_name, freq in sequences_data:
74
- worksheet.write(row, 0, entry_id)
75
- worksheet.write(row, 1, protein_name)
76
  col = 2
77
  for repeat in sorted(homorepeats):
78
- worksheet.write(row, col, freq.get(repeat, 0))
79
  col += 1
80
  row += 1
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  workbook.close()
83
  output.seek(0)
84
  return output
@@ -93,18 +106,20 @@ uploaded_files = st.file_uploader("Upload CSV files", accept_multiple_files=True
93
  if uploaded_files:
94
  all_homorepeats = set()
95
  all_sequences_data = []
 
96
 
97
  for file in uploaded_files:
98
  homorepeats, sequence_data = process_csv(file)
99
  if homorepeats is not None:
100
  all_homorepeats.update(homorepeats)
101
- all_sequences_data.extend(sequence_data)
 
102
 
103
  if all_sequences_data:
104
  st.success(f"Processed {len(uploaded_files)} files successfully!")
105
 
106
  # Step 3: Generate and download the Excel report
107
- excel_file = create_excel(all_sequences_data, all_homorepeats)
108
 
109
  # Download the Excel file
110
  st.download_button(
@@ -118,10 +133,12 @@ if uploaded_files:
118
  if st.checkbox("Show Results Table"):
119
  # Convert the sequences data into a DataFrame for easy display
120
  rows = []
121
- for entry_id, protein_name, freq in all_sequences_data:
122
- row = {"Entry ID": entry_id, "Protein Name": protein_name}
123
- row.update({repeat: freq.get(repeat, 0) for repeat in sorted(all_homorepeats)})
124
- rows.append(row)
 
 
125
 
126
  result_df = pd.DataFrame(rows)
127
- st.dataframe(result_df)
 
54
 
55
  return homorepeats, sequence_data
56
 
57
+ # Function to generate and download Excel workbook with file names as separators
58
+ def create_excel(sequences_data, homorepeats, filenames):
59
  output = BytesIO()
60
  workbook = xlsxwriter.Workbook(output, {'in_memory': True})
61
  worksheet = workbook.add_worksheet()
62
 
63
+ row = 0
64
+
65
+ # Iterate through sequences data grouped by filenames
66
+ for file_index, file_data in enumerate(sequences_data):
67
+ filename = filenames[file_index]
68
+
69
+ # Write filename as a separator row
70
+ worksheet.write(row, 0, f"File: {filename}")
71
+ row += 1
72
+
73
+ # Write the header for the current file
74
+ worksheet.write(row, 0, "Entry ID")
75
+ worksheet.write(row, 1, "Protein Name")
76
  col = 2
77
  for repeat in sorted(homorepeats):
78
+ worksheet.write(row, col, repeat)
79
  col += 1
80
  row += 1
81
 
82
+ # Write data for each sequence in the current file
83
+ for entry_id, protein_name, freq in file_data:
84
+ worksheet.write(row, 0, entry_id)
85
+ worksheet.write(row, 1, protein_name)
86
+ col = 2
87
+ for repeat in sorted(homorepeats):
88
+ worksheet.write(row, col, freq.get(repeat, 0))
89
+ col += 1
90
+ row += 1
91
+
92
+ # Add an empty row as a separator between files
93
+ row += 1
94
+
95
  workbook.close()
96
  output.seek(0)
97
  return output
 
106
  if uploaded_files:
107
  all_homorepeats = set()
108
  all_sequences_data = []
109
+ filenames = []
110
 
111
  for file in uploaded_files:
112
  homorepeats, sequence_data = process_csv(file)
113
  if homorepeats is not None:
114
  all_homorepeats.update(homorepeats)
115
+ all_sequences_data.append(sequence_data)
116
+ filenames.append(file.name)
117
 
118
  if all_sequences_data:
119
  st.success(f"Processed {len(uploaded_files)} files successfully!")
120
 
121
  # Step 3: Generate and download the Excel report
122
+ excel_file = create_excel(all_sequences_data, all_homorepeats, filenames)
123
 
124
  # Download the Excel file
125
  st.download_button(
 
133
  if st.checkbox("Show Results Table"):
134
  # Convert the sequences data into a DataFrame for easy display
135
  rows = []
136
+ for file_index, file_data in enumerate(all_sequences_data):
137
+ filename = filenames[file_index]
138
+ for entry_id, protein_name, freq in file_data:
139
+ row = {"Filename": filename, "Entry ID": entry_id, "Protein Name": protein_name}
140
+ row.update({repeat: freq.get(repeat, 0) for repeat in sorted(all_homorepeats)})
141
+ rows.append(row)
142
 
143
  result_df = pd.DataFrame(rows)
144
+ st.dataframe(result_df)