avatar

目录
花瓣网图片


这个爬虫是18年过年在家无聊写的,具体咋分析已经差不多忘记了,直接上代码了。

这是一份对花瓣网图片爬取的 爬虫,具体就是去花瓣网查找自己想找的图片,然后复制链接就可以运行了。

Code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# -*- coding: utf-8 -*-
#data:2018.2.20
import requests
import re
import os
from requests.exceptions import RequestException
url = str(input("请输入网址:"))
image_url = "http://hbimg.b0.upaiyun.com/"
d_next_url = url+'?jdsmifcz&limit=20&wfl=1&max='
l_next_url = url+'&jdtyxl2z&per_page=20&wfl=1&page='
pwd = './huaban/'
b = 0
def get_main(url):
try:
result = requests.get(url)
if result.status_code == 200:
result.encoding = 'utf-8'
return result.text
return None
except RequestException as r:
print("error: ", r)
def down_image(filename, url):
try:
result = requests.get(url)
with open(filename, 'wb')as f:
for r in result.iter_content():
f.write(r)
except Exception as e:
print("error: ", e)
def get_page(url):
global b
print('正在请求的网址是:',url)
text = get_main(url)
find = re.compile(r'app\.page\["pins"\].*')
null = None
true =True
pins = re.findall(find, text)
page = eval(pins[0][19:-1])
photo_num = 0
#print(page)
for item in page:
info = {}
info['pin_id'] = item['pin_id']
info['image_url'] =image_url + item['file']['key']+'_fw658'
info['like_count'] = item['like_count']
info['repin_count'] = item['repin_count']
if (info['like_count'] > 0 and info['repin_count'] > 2) or info['repin_count'] > 10 or info['like_count'] > 10:
print('正在下载第{0}张图片'.format(photo_num))
url_pin_id = info['pin_id']
filename = pwd + str(info['pin_id']) + '.jpg'
if os.path.isfile(filename):
print('文件已经存在,正在下载下一张 ', filename)
continue
down_image(filename, str(info['image_url']))
photo_num += 1
while b <= 100:
if 'search' not in url:
get_page(d_next_url + str(url_pin_id))
else:
b += 1
get_page(l_next_url + str(b))



def main():
if not os.path.exists(pwd):
os.mkdir(pwd)
get_page(url)

if __name__ == '__main__':
print( """
By:
_ __ _ _ _
| |/ /___ _ _| |__ ___ / \ _ __ __| |
| ' // _ \ | | | '_ \ / _ \ / _ \ | '__/ _` |
| . \ __/ |_| | |_) | (_) / ___ \| | | (_| |
|_|\_\___|\__, |_.__/ \___/_/ \_\_| \__,_|
|___/ (适用于Python3.6)

""")
main()
文章作者: KeyboArd
文章链接: https://www.wrpzkb.cn/huabanSpider/
版权声明: 本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 KeyboArd's Blog
打赏
  • 微信
    微信
  • 支付寶
    支付寶

评论