# Get contents of a web page # # 6/30/2005 # Scott Hendrickson # import urllib, re, datetime # request the page def getUrl(serverAdr): f = urllib.urlopen(serverAdr) return f.read() reExp = re.compile("\([0-9]+\)") serverAdr=['boise','saltlakecity','neworleans','modesto','portland','minneapolis','chicago','lasvegas','newjersey','www','losangeles','newyork','denver','allentown','dallas','houston','jacksonville','tallahassee','stockton','sacramento','sanantonio','orlando','nashville','miami','memphis','tampa','westpalmbeach','fortmyers','indianapolis','louisville','inlandempire'] # print "date,url,calendar,community,perosnals,housing,forsale,services,jobs,gigs,resumes" for adr in serverAdr: url = "http://" + adr + ".craigslist.org" m = reExp.findall(getUrl(url)) buf = str(datetime.date.today()) + "," + url + "," for i in m: buf += i.strip("()") + "," print buf