Hello everyone,
I am now trying to learn Python BeautifulSoup framework. I want to get all
the mobile name with price in flipkart.com. I have got the following code
on internet. But this code get only current page details... I have to get
all the phone details in my document... What is the way to do this possible?
Here is my code...
----------------------------------------------------------------------------------------------------
import urllib
import re
from bs4 import BeautifulSoup
#part1
url="
http://www.flipkart.com/mobiles/samsung~brand/pr?sid=tyy,4io&otracker=hp_nmenu_sub_electronics_0_Samsung
"
regex = '<a class="fk-display-block" data-tracking-id="prd_title"
href=(.+?)title' # it will find the title
pattern=re.compile(regex)
htmlfile = urllib.urlopen(url)
htmltext= htmlfile.read()
docSoup=BeautifulSoup(htmltext)
abc=docSoup.findAll('a')
c=str(abc)
count=0
#------part 2 it goes to each link and gathers the mobile
specificattions
title=re.findall(pattern,c)
temp=1
file2=open('c:/Python27/samsung.txt','w')
for i in title:
print i
file2.write(i)
file2.write("\n")
count=count+1
print "\n1\n"
#print i
if temp>0 :
mob_url='http://www.flipkart.com'+i[1:len(i)-2]
htmlfile = urllib.urlopen(mob_url)
htmltext= htmlfile.read()
htmlfile.close()
# htmltext
docSoup=BeautifulSoup(htmltext)
abc=docSoup.find_all('td')
file=open('C:\Documents and
Settings\praveen.s\Desktop\Details\samsung'+str(count)+'.txt','w')
mod=0
count=count+1
pr=-1
for j in abc:
if j.text == 'Brand':
pr=3
if mod ==1:
file2.write((j).text)
file2.write("\n")
mod=0
if j.text == 'Model ID':
mod=1
#sprint j.text
if pr>0 :
file.write(j.text)
file.write('\n')
file.close
else :
temp=temp+1
print count
file2.close
----------------------------------------------------------------------------------------------------
With Regards
S. Praveen
http://praveenlearner.wordpress.com