下载漫画小脚本
- 作者: 狗男女中的狗男
- 来源: 51数据库
- 2022-08-12
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Copyright (c) 2015, The Sun Technology
This Program could download files from the internet
"""
import urllib2
import os
import time
from urllib2 import HTTPError
from bs4 import BeautifulSoup
from urlparse import urlparse
BASE_URL="/Users/mac/Documents%s"
def get_file_name(req_url):
path_obj=urlparse(req_url)
return os.path.split(path_obj.path)
def get_save_path(save_dir):
dirs=get_file_name(save_dir)
save_path=BASE_URL%dirs[0]
if not os.path.exists(save_path):
os.mkdir(save_path)
def save_files(file_url,file_path):
start=time.time()
response=urllib2.urlopen(file_url)
html=response.read()
response.close()
with open(file_path,"wb") as handler:
handler.write(html)
print "%s has been downloaded successfully "%file_url
print "Total cost:%.3f ms"%(time.time()-start)
def download(url_path):
start = 82
for pageNum in range(start,start+10):
try:
combine_url=url_path%pageNum
response=urllib2.urlopen(combine_url)
page=response.read() if response.getcode()==200 else None
""" Start parsing the HTML from web page"""
if not page:
return
soup = BeautifulSoup(page,"html.parser")
img_url=soup.find_all('img',id="main-comic")
#parse the url
url_parse=urlparse(url_path)
#rebuild the url
rebuild_url= url_parse.scheme+':'+img_url[0].get('src')
#download comic from url
get_name=get_file_name(rebuild_url)
save_files(rebuild_url, BASE_URL%'/'.join(get_name))
except HTTPError, e:
print "An error has accour",e
continue
finally:
response.close()
if __name__ == '__main__':
req_url="http://www.51sjk.com/Upload/Articles/1/0/320/320597_20220812155049289.jpg"
get_save_path(req_url)
download(req_url)
推荐阅读
热点文章
Discord.py(重写)on_member_update 无法正常工作
0
Discord.py 在 vc 中获取用户分钟数
0
discord.py 重写 |为我的命令出错
0
Discord.py rewrite 如何 DM 命令?
0
播放音频时,最后一部分被切断.如何解决这个问题?(discord.py)
0
在消息删除消息 Discord.py
0
如何使 discord.py 机器人私人/直接消息不是作者的人?
0
(Discord.py) 如何获取整个嵌入内容?
0
Discord bot 尽管获得了许可,但不能提及所有人
0
Discord.py discord.NotFound 异常
0
