#!/usr/bin/env python #get_gene_names.py is a script for mapping gene symbols #and names to Entrez Gene ids #Copyright (C) 2007 Daniel Shriner #This program is free software; you can redistribute it and/or #modify it under the terms of the GNU General Public License #as published by the Free Software Foundation; either version 2 #of the License, or (at your option) any later version. #This program is distributed in the hope that it will be useful, #but WITHOUT ANY WARRANTY; without even the implied warranty of #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #GNU General Public License for more details. #The text of the GNU General Public License, version 2, is available #as http://www.gnu.org/copyleft or by writing to the Free Software #Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import sys,gzip,os names = [] #Load the gene_info file try: f1 = gzip.open('gene_info.hs.09May06.gz','rb') except IOError: print >> sys.stderr, 'Error opening gene_info file.\n' sys.exit(1) entry = [] while 1: line = f1.readline() if not line: break line = line.split('\t') entry = [line[1],line[2],line[8]] names.append(entry) f1.close() #Open the input file of gene ids try: f2 = open('adj_genes.txt','r') except IOError: print >> sys.stderr, 'Error opening adj_genes.txt.\n' sys. exit(1) #Open an output file for printing out gene info try: f3 = open('adj_gene_names.txt','w') except IOError: print >> sys.stderr, 'Error opening output file.\n' sys.exit(1) while 1: line = f2.readline() if not line: break line = line.split('\t') line[1] = line[1][:-1] if line[1]=='NA': print >> f3, '%s\tNA\tNA\tNA' % line[0] else: i = 0 while 1: if i < len(names): if line[1]==names[i][0]: print >> f3, '%s\t%s\t%s\t%s' % (line[0],line[1],names[i][1],names[i][2]) i += 1 else: break f2.close() f3.close() os.remove('adj_genes.txt') os.rename('adj_gene_names.txt','adj_genes.txt')