arikat commited on
Commit
06f6357
1 Parent(s): df5c4a7

minor changes process_family_sequence function

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -207,14 +207,14 @@ def process_family_sequence(protein_fasta):
207
 
208
  headers = [line for line in lines if line.startswith('>')]
209
  if len(headers) > 1:
210
- return None, "Multiple fasta sequences detected. Please upload a fasta file with multiple sequences, otherwise only include one fasta sequence.", None
211
 
212
  protein_sequence = ''.join(line for line in lines if not line.startswith('>'))
213
 
214
  # Check for invalid characters
215
  valid_characters = set("ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy") # the 20 standard amino acids
216
  if not set(protein_sequence).issubset(valid_characters):
217
- return None, "Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids. Does your sequence contain gaps?", None
218
 
219
  encoded_input = tokenizer([protein_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
220
  input_idsfam = encoded_input["input_ids"]
 
207
 
208
  headers = [line for line in lines if line.startswith('>')]
209
  if len(headers) > 1:
210
+ return None, None, None, "Multiple fasta sequences detected. Please upload a fasta file with multiple sequences, otherwise only include one fasta sequence."
211
 
212
  protein_sequence = ''.join(line for line in lines if not line.startswith('>'))
213
 
214
  # Check for invalid characters
215
  valid_characters = set("ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy") # the 20 standard amino acids
216
  if not set(protein_sequence).issubset(valid_characters):
217
+ return None, None, None, "Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids. Does your sequence contain gaps?"
218
 
219
  encoded_input = tokenizer([protein_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
220
  input_idsfam = encoded_input["input_ids"]