#!/usr/bin/env python # -*- coding: utf-8 -*- """ uses line by line matching files for DE matrix and GO groups to provide DE values for each GO entry portionate to the number of GO groups for that particular sequence" Usage: % GO_DE.py matrix.txt GO.txt Written by jason macrander """ import sys import re import collections #Filament_write = open("Filament.txt",'w') gastric_cirri_write = open("gastric_cirri.txt",'w') Tentacle_write = open("Tentacle.txt",'w') try: from Bio import SeqIO except: print "This program requires the Biopython library" sys.exit(0) try: matrix_file = sys.argv[1] #Inputs the matrix file GO_file = sys.argv[2] #inputs the GO file except: print __doc__ sys.exit(0) GO = {} with open (GO_file) as n: for line in n: if line != (""): line = line.strip() line = line.split("\t") ID = line[0] print ID GO_IDs = line[1].split("; ") GO[ID] = GO.get(ID,[]) + list(set(GO_IDs)) #print GO matrix = {} #Fentry = collections.defaultdict(list) GCentry = collections.defaultdict(list) Tentry = collections.defaultdict(list) with open(matrix_file, 'r') as m: for line in m: if line != (""): line = line.strip() line = line.split("\t") transcript = line[0] ID = line[3] #Filament = float(line[3]) gastric_cirri= float(line[1]) Tentacle = float(line[2]) for key, value in GO.iteritems(): if key == ID: #Filament_value = Filament/len(value) gastric_cirri_value = gastric_cirri/len(value) Tentacle_value = Tentacle/len(value) for item in value: #Fentry[item].append(Filament_value) GCentry[item].append(gastric_cirri_value) Tentry[item].append(Tentacle_value) #print Fentry #for key,value in Fentry.iteritems(): # if sum(value) > 0: # #print "%s\t%f" % (key,sum(value)) # Filament_write.write("%s\t%f\n" % (key,sum(value))) #print "Filament done" for key,value in GCentry.iteritems(): if sum(value) > 0: #print "%s\t%f" % (key,sum(value)) gastric_cirri_write.write("%s\t%f\n" % (key,sum(value))) print "gastric_cirri done" for key,value in Tentry.iteritems(): if sum(value) > 0: #print "%s\t%f" % (key,sum(value)) Tentacle_write.write("%s\t%f\n" % (key,sum(value))) print "Tentacle done" #print matrix #with open(matrix_file, 'r') as m: # for line in m: # if line != (""): # line = line.strip() # line = line.split("\t") # transcript = line[0] # entry = {"ID": line[4], # "Filament":line[1], # "gastric_cirri":line[2], # "Tentacle":line[3] # } # matrix[transcript] = entry # ##print matrix #