Spaces:

geored
/

gtmio

Running

App Files Files Community

gtmio / gtm /lib /python3.12 /site-packages /RUST /synergy.py

geored

Upload folder using huggingface_hub

fe41391 verified 11 months ago

raw

history blame

6.58 kB

	#!/usr/bin/python
	#####################################################################################
	# rust_synergy, Identifies tripeptides that are candidates for synergistic interactions
	# Copyright (C) 2015 Patrick O'Connor

	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.

	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.

	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <https://www.gnu.org/licenses/>.
	#####################################################################################

	import numpy as np
	import argparse, os, sys
	from RUST.methods import *


	def main(args):
	amino_acids = [
	"A",
	"C",
	"E",
	"D",
	"G",
	"F",
	"I",
	"H",
	"K",
	"M",
	"L",
	"N",
	"Q",
	"P",
	"S",
	"R",
	"T",
	"W",
	"V",
	"Y",
	]

	infileopen = open(args.tri)
	infileopen.readline()
	list_amino = []
	list_zscores = []
	list_fold_change = []
	list_loc = []

	for line in infileopen:
	linesplit = line[:-1].split(",")
	if len(linesplit) == 1:
	break
	amino = linesplit[0]
	coverage = list(map(float, linesplit[1:]))
	coverage_a = coverage[0]
	if coverage_a == 0:
	continue
	coverage_n = [n / coverage_a for n in coverage[1:]]

	if len(amino) != 3 or len(set(amino) - set(amino_acids)) != 0:
	sys.stderr.write(
	"Tripeptide metafootprint file not in correct, check input file\n"
	)
	# if os.path.exists( tmp_dir ): shutil.rmtree( tmp_dir )
	exit()
	aminoA = amino[0]
	aminoB = amino[1]
	aminoC = amino[2]

	infileopen2 = open(args.aa)
	infileopen2.seek(0)
	infileopen2.readline()
	for line2 in infileopen2:
	linesplit = line2[:-1].split(",")
	if len(linesplit) == 1:
	break
	amino2 = linesplit[0]
	if len(amino2) != 1 or len(set(amino2) - set(amino_acids)) != 0:
	sys.stderr.write(
	"Amino acid metafootprint file not correct, check input file\n"
	)
	# if os.path.exists( tmp_dir ): shutil.rmtree( tmp_dir )
	exit()
	if amino2 in amino:
	coverage = list(map(float, linesplit[1:]))
	coverage_a = coverage[0]
	if coverage_a == 0:
	continue
	if amino2 == aminoA:
	coverage_n1 = [n / coverage_a for n in coverage[1:]]
	if amino2 == aminoB:
	coverage_n2 = [n / coverage_a for n in coverage[1:]]
	if amino2 == aminoC:
	coverage_n3 = [n / coverage_a for n in coverage[1:]]
	infileopen2.close()

	coverage_n_e = 0
	differences = []

	# for number_i in range(11):
	##coverage_n_e = coverage_n1[number_i]coverage_n2[number_i+1]coverage_n3[number_i+2]
	# differences.append(abs(coverage_n[number_i]) - abs(coverage_n_e))
	for number_i in range(58):
	coverage_n_e = (
	coverage_n1[number_i]
	* coverage_n2[number_i + 1]
	* coverage_n3[number_i + 2]
	)
	differences.append(abs(coverage_n[number_i]) - abs(coverage_n_e))

	std_diff = np.std(differences)

	line_count = 0
	for number_i in range(0, len(coverage_n) - 2):
	coverage_n_e = (
	coverage_n1[number_i]
	* coverage_n2[number_i + 1]
	* coverage_n3[number_i + 2]
	)

	list_amino.append(amino)
	list_zscores.append((coverage_n[number_i] - coverage_n_e) / std_diff)
	list_loc.append(number_i)
	if coverage_n_e == 0:
	list_fold_change.append("not defined")
	else:
	list_fold_change.append(coverage_n[number_i] / coverage_n_e)

	if not os.path.exists(args.Path):
	os.mkdir(args.Path)
	if "/" in args.aa:
	amino_file_split = args.aa.split("/")[-1]
	else:
	amino_file_split = args.aao
	if "RUST_amino_file_" in amino_file_split:
	amino_file = amino_file_split[16:]
	else:
	amino_file = amino_file_split

	if "/" in args.tri:
	tripeptide_file_split = args.tri.split("/")[-1]
	else:
	tripeptide_file_split = args.tri
	if "RUST_tripeptide_file_" in tripeptide_file_split:
	tripeptide_file = tripeptide_file_split[21:]
	else:
	tripeptide_file = tripeptide_file_split

	outfile = open("%s/synergy_%s_%s" % (args.Path, amino_file, tripeptide_file), "w")
	outfile.write(
	"Tripeptide, Standard score, distance of 1st residue from A-site, fold change\n"
	)
	zipped_list = list(zip(list_zscores, list_amino, list_loc, list_fold_change))
	zipped_list.sort()
	zipped_list.reverse()
	for zscore, amino, loc, fold_change in zipped_list:
	if abs(zscore) > 5:
	outfile.write("%s, %s, %s, %s\n" % (amino, zscore, loc - 40, fold_change))
	outfile.close()


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description="Identifies tripeptides that are candidates for synergistic interactions"
	)
	parser.add_argument(
	"-t",
	"--transcriptome",
	help="fasta file of transcripts, CDS start and end may be provided on description line using tab separation e.g. >NM_0001 10 5000, otherwise it searches for longest ORF"
	", required=True",
	)
	parser.add_argument(
	"--aa", help='path to file produced from "rust_amino"', required=True
	)
	parser.add_argument(
	"--tri", help='path to file produced from "rust_tripeptide"', required=True
	)
	parser.add_argument(
	"-P",
	"--Path",
	help='path to outputfile, default is "synergy"',
	default="synergy",
	)
	parser.add_argument("--version", action="version", version="%(prog)s 1.2")
	args = parser.parse_args(None)

	main(args)