[Solved] extract contact information from html with python

Question

I use this code to extract information

# _*_ coding:utf-8 _*_
import urllib2
import urllib
import re
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

def grabHref(url,localfile):
    html = urllib2.urlopen(url).read()
    html = unicode(html,'gb2312','ignore').encode('utf-8','ignore')
    soup = BeautifulSoup(html)
    myfile = open(localfile,'wb')
    for link in soup.select("div >            a[href^=http://www.karmaloop.com/kazbah/browse]"):
        for item in BeautifulSoup(urllib2.urlopen(link['href']).read()).select("div > a[href^=mailto]"):
            contactInfo = item.get_text()
            print link['href']
            print contactInfo

        myfile.write(link['href'])
        myfile.write('\r\n')
        myfile.write(contactInfo)
        myfile.write('\r\n')
    myfile.close()



def main():
    url = "http://www.karmaloop.com/brands"
    localfile="Contact.txt"
    grabHref(url,localfile)
if __name__=="__main__":
    main()

But I still can only get email address here, how can I get phone number and address? Thanks

Accepted Answer

I use this code to extract information

# _*_ coding:utf-8 _*_
import urllib2
import urllib
import re
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

def grabHref(url,localfile):
    html = urllib2.urlopen(url).read()
    html = unicode(html,'gb2312','ignore').encode('utf-8','ignore')
    soup = BeautifulSoup(html)
    myfile = open(localfile,'wb')
    for link in soup.select("div >            a[href^=http://www.karmaloop.com/kazbah/browse]"):
        for item in BeautifulSoup(urllib2.urlopen(link['href']).read()).select("div > a[href^=mailto]"):
            contactInfo = item.get_text()
            print link['href']
            print contactInfo

        myfile.write(link['href'])
        myfile.write('\r\n')
        myfile.write(contactInfo)
        myfile.write('\r\n')
    myfile.close()



def main():
    url = "http://www.karmaloop.com/brands"
    localfile="Contact.txt"
    grabHref(url,localfile)
if __name__=="__main__":
    main()

But I still can only get email address here, how can I get phone number and address? Thanks