Spaces:

arikat
/

Glydentify

Sleeping

arikat commited on Oct 11, 2023

Commit

df5c4a7

•

1 Parent(s): 9f810ba

minor changes to headers

Files changed (1) hide show

app.py CHANGED Viewed

@@ -360,9 +360,9 @@ def mask_residue(sequence, position):
     return sequence[:position] + 'X' + sequence[position+1:]
 def generate_heatmap(protein_fasta):
-    # Split the fasta string into header and sequence
-    header, *sequence_parts = protein_fasta.split('\n')
-    protein_sequence = ''.join(sequence_parts)
     # Check if the header is valid
     if not header.startswith('>'):
@@ -373,7 +373,6 @@ def generate_heatmap(protein_fasta):
     if not set(protein_sequence).issubset(valid_characters):
         return None, "Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids.", None
     # Tokenize and predict for original sequence
     encoded_input = tokenizer([protein_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
     with torch.no_grad():

     return sequence[:position] + 'X' + sequence[position+1:]
 def generate_heatmap(protein_fasta):
+    lines = protein_fasta.strip().split('\n')
+    header = lines[0]
+    protein_sequence = ''.join(lines[1:])
     # Check if the header is valid
     if not header.startswith('>'):
     if not set(protein_sequence).issubset(valid_characters):
         return None, "Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids.", None
     # Tokenize and predict for original sequence
     encoded_input = tokenizer([protein_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
     with torch.no_grad():