Spaces:

Jayesh13
/

Get_Genome

Sleeping

App Files Files Community

Jayesh13 commited on Nov 10, 2024

Commit

ca02cf1

verified ·

1 Parent(s): 96d0940

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -13

app.py CHANGED Viewed

@@ -16,7 +16,15 @@ def extract_sequences(genome_data):
     # Extract the sequence part from the FASTA format
     lines = genome_data.splitlines()
     sequence = ''.join(lines[1:])  # Join all lines except the first (which is the header)
-    return sequence  # Return the full sequence
 def main():
     st.title("NCBI Genome Sequence Extractor")
@@ -28,22 +36,26 @@ def main():
         # Read and split accession numbers from the uploaded file
         accession_numbers = uploaded_file.read().decode("utf-8").splitlines()
-        output_sequences = []
         for accession_number in accession_numbers:
             genome_data = get_genome_from_ncbi(accession_number)
             if genome_data:
-                # Extract the sequence
-                sequence = extract_sequences(genome_data)
-                # Format the output as per the requirement
-                output_sequence = f">{accession_number}\n{sequence}"
-                output_sequences.append(output_sequence)
-        # Create output text with the specified format
-        output_text = "\n".join(output_sequences)
-        # Provide a download button for the formatted output
-        st.download_button("Download Sequences", output_text, file_name="formatted_sequences.txt", mime="text/plain")
 if __name__ == "__main__":
     main()

     # Extract the sequence part from the FASTA format
     lines = genome_data.splitlines()
     sequence = ''.join(lines[1:])  # Join all lines except the first (which is the header)
+    # Get the first 55 and last 114 base pairs
+    start_sequence = sequence[:55]  # First 55 base pairs
+    end_sequence = sequence[-114:]   # Last 114 base pairs
+    return start_sequence, end_sequence
+def format_fasta(accession_number, sequence):
+    return f">{accession_number}\n{sequence}"
 def main():
     st.title("NCBI Genome Sequence Extractor")
         # Read and split accession numbers from the uploaded file
         accession_numbers = uploaded_file.read().decode("utf-8").splitlines()
+        # Prepare lists to store formatted sequences
+        starting_sequences = []
+        ending_sequences = []
         for accession_number in accession_numbers:
             genome_data = get_genome_from_ncbi(accession_number)
             if genome_data:
+                start_sequence, end_sequence = extract_sequences(genome_data)
+                formatted_start_sequence = format_fasta(accession_number, start_sequence)
+                formatted_end_sequence = format_fasta(accession_number, end_sequence)
+                starting_sequences.append(formatted_start_sequence)
+                ending_sequences.append(formatted_end_sequence)
+        # Write starting sequences to a file for download
+        start_sequences_text = "\n".join(starting_sequences)
+        st.download_button("Download Starting Sequences", start_sequences_text, file_name="starting_sequences.txt", mime="text/plain")
+        # Write ending sequences to a file for download
+        end_sequences_text = "\n".join(ending_sequences)
+        st.download_button("Download Ending Sequences", end_sequences_text, file_name="ending_sequences.txt", mime="text/plain")
 if __name__ == "__main__":
     main()