Python抓妹子图+多进程
- 作者: 老湿kfG
- 来源: 51数据库
- 2022-08-12
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 07 17:30:58 2015
@author: Dreace
"""
import urllib2
import sys
import time
import os
import random
from multiprocessing.dummy import Pool as ThreadPool
type_ = sys.getfilesystemencoding()
def rename():
return time.strftime("%Y%m%d%H%M%S")
def rename_2(name):
if len(name) == 2:
name = '0' + name + '.jpg'
elif len(name) == 1:
name = '00' + name + '.jpg'
else:
name = name + '.jpg'
return name
def download_pic(i):
global count
global time_out
if Filter(i):
try:
content = urllib2.urlopen(i,timeout = time_out)
url_content = content.read()
file_name = repr(random.randint(10000,999999999)) + "_" + rename_2(repr(count))
f = open(file_name,"wb")
f.write(url_content)
f.close()
if os.path.getsize(file_name) >= 1024*11:
count += 1
else:
os.remove(file_name)
except Exception, e:
print e
def Filter(content):
for line in Filter_list:
if content.find(line) == -1:
return True
def get_pic(url_address):
global pic_list
global time_out
global headers
try:
req = urllib2.Request(url = url_address,headers = headers)
str_ = urllib2.urlopen(req, timeout = time_out).read()
url_content = str_.split("\'")
for i in url_content:
if i.find(".jpg") != -1:
pic_list.append(i)
except Exception, e:
print e
MAX = 100
count = 0
time_out = 60
thread_num = 50
pic_list = []
page_list = []
pic_kind = ["hot","share","mm","taiwan","japan","model"]
Filter_list = ["imgsize.ph.126.net","img.ph.126.net","img2.ph.126.net"]
dir_name = "C:\Photos\\"+rename()
os.makedirs(dir_name)
os.chdir(dir_name)
start_time = time.time()
url_address = "http://www.mzitu.com/model/page/"
headers = {"User-Agent":" Mozilla/5.0 (Windows NT 10.0; rv:39.0) Gecko/20100101 Firefox/39.0"}
for pic_i in pic_kind:
for i in range(1,MAX + 1):
page_list.append(url_address + pic_i + "/page/" + repr(i))
page_pool = ThreadPool(thread_num)
page_pool.map(get_pic,page_list)
page_pool.close()
page_pool.join()
print "获取到".decode("utf-8").encode(type_),len(pic_list),"张图片,开始下载!".decode("utf-8").encode(type_)
pool = ThreadPool(thread_num)
pool.map(download_pic,pic_list)
pool.close()
pool.join()
print count,"张图片保存在".decode("utf-8").encode(type_) + dir_name
print "共耗时".decode("utf-8").encode(type_),time.time() - start_time,"s"
推荐阅读
热点文章
Discord.py(重写)on_member_update 无法正常工作
0
Discord.py 在 vc 中获取用户分钟数
0
discord.py 重写 |为我的命令出错
0
Discord.py rewrite 如何 DM 命令?
0
播放音频时,最后一部分被切断.如何解决这个问题?(discord.py)
0
在消息删除消息 Discord.py
0
如何使 discord.py 机器人私人/直接消息不是作者的人?
0
(Discord.py) 如何获取整个嵌入内容?
0
Discord bot 尽管获得了许可,但不能提及所有人
0
Discord.py discord.NotFound 异常
0
