#!/usr/bin/env python #get_coordinates.py is a script for retrieving genomic positions #for markers from the International HapMap Project and setting #intervals centered on those positions #Copyright (C) 2007 Daniel Shriner #This program is free software; you can redistribute it and/or #modify it under the terms of the GNU General Public License #as published by the Free Software Foundation; either version 2 #of the License, or (at your option) any later version. #This program is distributed in the hope that it will be useful, #but WITHOUT ANY WARRANTY; without even the implied warranty of #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #GNU General Public License for more details. #The text of the GNU General Public License, version 2, is available #as http://www.gnu.org/copyleft or by writing to the Free Software #Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import sys,gzip,string #Load the map files #try: # f1 = gzip.open('dbsnp_b124_on_B34.gff3.gz','rb') #except IOError: # print >> sys.stderr, 'Error opening map file.\n' # sys.exit(1) entry=[] #line = f1.readline() #while 1: # line = f1.readline() # if not line: break # line = line.split('\t') # line[8] = line[8].split(';') # entry.append(line) #f1.close() try: f2 = gzip.open('stsMap.gff.gz','rb') except IOError: print >> sys.stderr, 'Error opening map file.\n' sys.exit(1) line = f2.readline() while 1: line = f2.readline() if not line: break line = line.split('\t') line[8] = line[8].split(';') entry.append(line) f2.close() #Open the input file of markers try: if len(sys.argv) > 1: f3 = open(sys.argv[1],'r') else: f3 = open('markers.txt','r') except IOError: print >> sys.stderr, 'Error opening marker file.\n' sys.exit(1) #Open an output file for printing out genomic coordinates try: f4 = open('coordinates.txt','w') except IOError: print >> sys.stderr, 'Error opening output file.\n' sys.exit(1) #Open an output file for printing out ranges try: f5 = open('ranges.txt','w') except IOError: print >> sys.stderr, 'Error opening output file.\n' sys.exit(1) #Read in the markers and test them one at a time j = 1 while 1: line = f3.readline() if not line: break markername = line[:-1] term1 = 'ID=SNP:' + markername term2 = 'Alias=' + markername term3 = 'ID=STS:' + markername i = 0 try: while 1: if term1 in entry[i][8] or term2 in entry[i][8] or term3 in entry[i][8]: print >> f4,'%u\t%s\t%s\t%s\t%s' % (j,markername,string.lower(entry[i][0]),entry[i][3],entry[i][4]) markerstart = int(entry[i][3]) markerend = int(entry[i][4]) if len(sys.argv) > 2: offset = int(sys.argv[2]) else: offset = 10000000 midpoint = (markerstart + markerend)/2 if (midpoint - offset) < 1: rangestart = 1 else: rangestart = midpoint - offset rangeend = midpoint + offset print >> f5, '%u\t%s\t%u\t%u' % (j,string.lower(entry[i][0]),rangestart,rangeend) break else: i += 1 except IndexError: print >> f4, '%u\t%s\tNA\tNA\tNA' % (j,markername) j += 1 f3.close() f4.close() f5.close()