Spaces:

arikat
/

Glydentify

Sleeping

arikat commited on Oct 11, 2023

Commit

06f6357

•

1 Parent(s): df5c4a7

minor changes process_family_sequence function

Files changed (1) hide show

app.py CHANGED Viewed

@@ -207,14 +207,14 @@ def process_family_sequence(protein_fasta):
     headers = [line for line in lines if line.startswith('>')]
     if len(headers) > 1:
-        return None, "Multiple fasta sequences detected. Please upload a fasta file with multiple sequences, otherwise only include one fasta sequence.", None
     protein_sequence = ''.join(line for line in lines if not line.startswith('>'))
     # Check for invalid characters
     valid_characters = set("ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy")  # the 20 standard amino acids
     if not set(protein_sequence).issubset(valid_characters):
-        return None, "Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids. Does your sequence contain gaps?", None
     encoded_input = tokenizer([protein_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
     input_idsfam = encoded_input["input_ids"]

     headers = [line for line in lines if line.startswith('>')]
     if len(headers) > 1:
+        return None, None, None, "Multiple fasta sequences detected. Please upload a fasta file with multiple sequences, otherwise only include one fasta sequence."
     protein_sequence = ''.join(line for line in lines if not line.startswith('>'))
     # Check for invalid characters
     valid_characters = set("ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy")  # the 20 standard amino acids
     if not set(protein_sequence).issubset(valid_characters):
+        return None, None, None, "Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids. Does your sequence contain gaps?"
     encoded_input = tokenizer([protein_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
     input_idsfam = encoded_input["input_ids"]