from BeautifulSoup import BeautifulSoup import urllib import sys import re soup = BeautifulSoup(urllib.urlopen(sys.argv[1]).read()) categories = raw_input("Categories, seperated by ','? ").split(',') categories = '\r\n'.join(categories) for i in soup('table')[1]('tr'): data = {'content':'', 'summary':'', 'fax':'', 'website':'', 'hours_text':'', 'postcode':'', 'latitude':'', 'longitude':'', 'map_link':'', 'username':'ChristopherSchmidt', 'comment':'Added via Zami', 'edit_type': 'Normal edit', 'Save':'Save changes'} if (len(i('td')[0].p.b)): if (len(i('td')[0]('p')[0].b.a)): data['title'] = i('td')[0]('p')[0].b.a.contents[0] else: data['title'] = i('td')[0]('p')[0].b.contents[0] data['address'] = i('td')[0]('p')[0].contents[2].strip() m = re.search("(.*?) - (.*?), MA *([^ ]*?) *$", data['address']) (data['address'], data['city'], data['postcode']) = (m.group(1), m.group(2), m.group(3)) data['phone'] = i('td')[1]('p')[0].contents[0].strip() data['locales'] = i('td')[2]('p')[1].contents[0].strip() data['categories'] = categories urllib.urlopen("http://boston.openguides.org/", urllib.urlencode(data)).read() print "Done: %s." % data['title']