用Python爬ACG Art的图

今天闲下来了,实在不想做工作上的东西,闲下来爬一下agc的图吧。

进手机APP 然后用fiddler察看流量

先抓包,看头文件

找到这条

GET http://acg.sugling.in/json_list.php?device=iphone5&token=8646050102010500900165&type=r18&version=a.6.2 HTTP/1.1
Host: acg.sugling.in
Accept: */*
Connection: keep-alive
Cookie: __cfduid=d9b4ca840b3739d8b6693c376cab600ad1454386060
User-Agent: ACGArtFree/6.2 (iPhone; iOS 10.1; Scale/3.00)
Accept-Language: zh-Hans-GB;q=1, en-US;q=0.9
Accept-Encoding: gzip, deflate
Connection: keep-alive

这个是获取图片列表的头

有个Cooike一定要传过去不然得不到图片列表

然后去获取图片保存即可

感谢 anuminantral@百度贴吧 以下代码是在他帖子的基础上修改的。


#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
import os.path
import sys

ACGHOST = 'http://acg.sugling.in'
IMGURLPATH = '/_uploadfiles/iphone5/640/'
DAILYIMGURL = '/json_daily.php'
IMGLISTPATH = '/json_list.php'
HEADERS = {
"Cookies": "__cfduid=d9b4ca840b3739d8b6693c376cab600ad1454386060",
"User-Agent": "ACGArtFree/6.2 (iPhone; iOS 10.1; Scale/3.00)",
"Accept": "*/*"
}
PAYLOAD = {
'device': 'iphone5',
'page': '1',
'token': '8646050102010500900165',
'version': 'a.6.2'
}
SAVEPATH = os.path.join(os.path.split(sys.argv[0])[0], 'ACGArt')

request_session = requests.session()
request_session.headers.update(HEADERS)

formaturl = lambda x: ACGHOST + IMGURLPATH + x

def GetDailyImgList():
PAYLOAD['sexyfilter'] = 'yes'
print 'GetDailyImgList....',
request = request_session.get(ACGHOST + DAILYIMGURL, params=PAYLOAD)
request.raise_for_status()
DailyList = request.json()
DailyImgList = []
for x in DailyList['data']:
DailyImgList.extend(x['imgs'])
DailyImgList = map(formaturl, DailyImgList)
print 'Done.'
return DailyImgList

def GetR18ImgList():
PAYLOAD['sexyfilter'] = 'no'
PAYLOAD['type'] = 'security_sexy'
print 'GetR18ImgList....',
request = request_session.get(ACGHOST + IMGLISTPATH, params=PAYLOAD)
request.raise_for_status()
R18ImgList = request.json()
R18ImgList = map(formaturl, R18ImgList)
print 'Done.'
return R18ImgList

def GetGIFImgList():
PAYLOAD['sexyfilter'] = 'no'
PAYLOAD['type'] = 'gif'
print 'GetGIFImgList....',
request = request_session.get(ACGHOST + IMGLISTPATH, params=PAYLOAD)
request.raise_for_status()
GIFImgList = request.json()
print 'Done.'
return GIFImgList

def DownloadImg(url, path):
if not os.path.isfile(path):
request = request_session.get(
url, headers={'Accept': 'image/*;q=0.8'}, stream=True)
request.raise_for_status()
try:
if request.status_code == 200:
with open(path, 'wb') as data:
for chunk in request.iter_content(chunk_size=1024):
data.write(chunk)
except (KeyboardInterrupt, SystemExit), e:
if os.path.isfile(path):
os.remove(path)
raise e

def check_path(path):
print 'check path....',
if not os.path.isdir(path):
print 'Not Exist'
os.makedirs(path)
print 'mkdir %s' % path
else:
print 'Done.'

def Download(urllist, dirname):
path = os.path.join(SAVEPATH, dirname)
check_path(path)
imgcount = len(urllist)
count = 0
print 'Download ...', dirname
for url in urllist:
print '%s / %s \r' % (imgcount, count),
filename = str(os.path.basename(url))
DownloadImg(url, os.path.join(path, filename))
count += 1
else:
print '%s Count:%s ... Done.' % (dirname, len(urllist))

def main():
print 'begin....'
ImgDict = {
'Daily': GetDailyImgList(),
'R18': GetR18ImgList(),
'GIF': GetGIFImgList()
}
for dirname, urllist in ImgDict.iteritems():
Download(urllist, dirname)

if __name__ == '__main__':
main()

Leave a Reply

Your email address will not be published. Required fields are marked *