Python3.4版本的抓淘宝妹子图代码
- 作者: 烟熏色-_-素锦流年
- 来源: 51数据库
- 2022-08-12
import urllib.request
mmurl = "http://mm.taobao.com/json/request_top_list.htm?type=0&page="
SUCC = 0
FAIL = 0
def getPicUrl(htmlurl,num):
global SUCC , FAIL
i = 0
flg = 1
tmpa = 0
tmpb = 0
while flg == 1:
try:
percontHTML = urllib.request.urlopen(htmlurl).read().decode('GBK')
pica = '''src="http://img0'''
picb = ".jpg"
tmpa = percontHTML.find(pica,tmpb)
tmpb = percontHTML.find(picb,tmpa)
imgurl = percontHTML[tmpa + 5:tmpb + 4]
#print(imgurl)
if imgurl == "":
flg = 0
print("第" , num , "个美女的图片下载完毕")
else:
i += 1
print("正在下载第" , num , "个美女的第" , i , "张图片")
urllib.request.urlretrieve(imgurl,"beautiful\\" + str(num) + "-" + str(i) + ".jpg")
SUCC += 1
except:
print("图片获取失败,可能是服务器自动屏蔽掉了")
FAIL += 1
pass
def getPageUrl(mmurl):
i = 1
j = 1
pa = 0
pb = 0
hreflist = []
while i < 81:
url = mmurl + str(i)
#print(url)
cont = urllib.request.urlopen(url).read().decode('GBK')
diva = '''<div class="pic s60">'''
divb = '</div>'
while j<11:
pa = cont.find(diva, pb)#div截取
pb = cont.find(divb, pa)
divcont = cont[pa:pb]
#print(divcont)
hrefa = "<a href="
hrefb = ".htm"
aa = divcont.find(hrefa)#网址截取
ab = divcont.find(hrefb)
acont = divcont[aa + 9:ab + 4]
hreflist.append(acont)
#print(acont)
j += 1
j = 1
pa = 0
pb = 0
print("正在读取第" + str(i) + "页的美女图片地址")
i += 1
return hreflist
if __name__ == "__main__":
global SUCC , FAIL
hreflist = getPageUrl(mmurl)
print("共有美女个人图片页面", len(hreflist))
for i in range(0,len(hreflist)):
num = i + 1
print("开始下载第" , num , "个美女图")
test = hreflist[i]
getPicUrl(test,num)
print("成功下载图片:" , SUCC , "****下载图片失败:" , FAIL)
推荐阅读
热点文章
Discord.py(重写)on_member_update 无法正常工作
0
Discord.py 在 vc 中获取用户分钟数
0
discord.py 重写 |为我的命令出错
0
Discord.py rewrite 如何 DM 命令?
0
播放音频时,最后一部分被切断.如何解决这个问题?(discord.py)
0
在消息删除消息 Discord.py
0
如何使 discord.py 机器人私人/直接消息不是作者的人?
0
(Discord.py) 如何获取整个嵌入内容?
0
Discord bot 尽管获得了许可,但不能提及所有人
0
Discord.py discord.NotFound 异常
0
