###django使用echarts报表1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77# 在urls生成路由
from django.contrib import admin
from django.urls import path
from comic.views import report, report_data
urlpatterns = [
path('admin/', admin.site.urls),
path('report/', report),
path('report_data/', report_data),
]
# views.py界面内容
from django.shortcuts import render
from django.http import HttpResponse, JsonResponse
def report_data(request):
json_data = {'xdata': ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], 'ydata': [820, 932, 901, 934, 1290, 1330, 1320]}
return JsonResponse(json_data)
def report(request):
context = {'xdata': ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], 'ydata': [820, 932, 901, 934, 1290, 1330, 1320]}
return render(request, 'report.html', context)
# report界面展示echarts报表
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>ECharts</title>
<!-- 引入 echarts.js -->
<script src="/static/js/echarts.min.js"></script>
<script src="/static/js/jquery.min.js"></script>
</head>
<body>
<!-- 为ECharts准备一个具备大小(宽高)的Dom -->
<div id="main" style="width: 600px;height:400px;"></div>
<script type="text/javascript">
// 基于准备好的dom,初始化echarts实例
var myChart = echarts.init(document.getElementById('main'));
// 指定图表的配置项和数据
<!--option = {-->
<!--xAxis: {-->
<!--type: 'category',-->
<!--data: {{ xdata | safe }}-->
<!--},-->
<!--yAxis: {-->
<!--type: 'value'-->
<!--},-->
<!--series: [{-->
<!--data: {{ ydata }},-->
<!--type: 'bar'-->
<!--}]-->
<!--};-->
<!--<!–// 使用刚指定的配置项和数据显示图表。–>-->
<!--myChart.setOption(option);-->
$.get('/report_data', function (data) {
option = {
xAxis: {
type: 'category',
data: data['xdata']
},
yAxis: {
type: 'value'
},
series: [{
data: data['ydata'],
type: 'bar'
}]
};
myChart.setOption(option)
});
</script>
</body>
</html>
###使用scrapy爬取京东(使用middlewares)1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#settings.py内容
#取消55-57的注释,如下
DOWNLOADER_MIDDLEWARES = {
'jd.middlewares.SeleniumMiddleware': 543,
}
KEYWORDS = ['月饼']
MAX_PAGE = 110
SELENIUM_TIMEOUT = 10
IPPOOL=[
{"ipaddr":"115.223.202.210:9000"},
{"ipaddr":"115.223.252.198:9000"},
{"ipaddr":"114.234.82.76:9000"},
{"ipaddr":"115.223.241.43:9000"},
{"ipaddr":"180.118.92.248:9000"},
{"ipaddr":"115.223.241.109:9000"},
{"ipaddr":"27.206.74.114:9000"}
]
FEED_EXPORT_ENCODING = 'utf-8'
###middlewares.py代码如下1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# -*- coding: utf-8 -*-
# Define here the models for your spider middleware
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/spider-middleware.html
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from scrapy.http import HtmlResponse
from logging import getLogger
import time
import random
from jd.settings import IPPOOL
from scrapy import signals
import time
class SeleniumMiddleware():
def __init__(self, timeout=None, service_args=[]):
self.timeout = timeout
chromeOptions = webdriver.ChromeOptions()
# chromeOptions.add_argument("--proxy-server=%s" % request.meta["proxy"])
self.browser = webdriver.Chrome(chrome_options = chromeOptions)
self.browser.set_window_size(1400, 700)
self.wait = WebDriverWait(self.browser, self.timeout)
def __del__(self):
self.browser.close()
def process_request(self, request, spider):
"""
抓取页面
:param request: Request对象
:param spider: Spider对象
:return: HtmlResponse
"""
page = request.meta.get('page', 1)
try:
# print(request.meta["proxy"])
self.browser.get(request.url)
self.browser.execute_script('window.scrollTo(0, document.body.scrollHeight / 8)')
time.sleep(2)
self.browser.execute_script('window.scrollTo(0, 2 * document.body.scrollHeight / 8)')
time.sleep(2)
self.browser.execute_script('window.scrollTo(0, 3 * document.body.scrollHeight / 8)')
time.sleep(2)
self.browser.execute_script('window.scrollTo(0, 4 * document.body.scrollHeight / 8)')
time.sleep(2)
self.browser.execute_script('window.scrollTo(0, 5 * document.body.scrollHeight / 8)')
time.sleep(2)
self.browser.execute_script('window.scrollTo(0, 6 * document.body.scrollHeight / 8)')
time.sleep(2)
self.browser.execute_script('window.scrollTo(0, 7 * document.body.scrollHeight / 8)')
time.sleep(2)
self.browser.execute_script('window.scrollTo(0, 8 * document.body.scrollHeight / 8)')
if page > 1:
input = self.wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, '#J_bottomPage .input-txt')))
submit = self.wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, '#J_bottomPage a.btn-default')))
input.clear()
input.send_keys(page)
submit.click()
self.wait.until(
EC.text_to_be_present_in_element((By.CSS_SELECTOR, '#J_topPage b'), str(page)))
self.wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, '#J_bottomPage .input-txt')))
# self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.m-itemlist .items .item')))
return HtmlResponse(url=request.url, body=self.browser.page_source, request=request, encoding='utf-8',
status=200)
except TimeoutException:
return HtmlResponse(url=request.url, status=500, request=request)
@classmethod
def from_crawler(cls, crawler):
return cls(timeout=crawler.settings.get('SELENIUM_TIMEOUT'))
###items.py文件如下1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html
from scrapy import Item, Field
class JdItem(Item):
# define the fields for your item here like:
collection = 'products'
image = Field()
price = Field()
deal = Field()
title = Field()
shop = Field()
location = Field()
spiders下的派取京东数据代码
1 | # -*- coding: utf-8 -*- |