目录
前言
成品展示(UI写的确实有点糊弄,太懒了不想弄了)
Vue部分
App.vue(中间感觉还行,不算难看)
result组件:
路由:
Django部分
view
functionset(自己建的)
前言
本来一开始没想弄这个
我一开始想弄爬虫取爬携程的网站然后做一个全国机场的最短路
为什么想弄这个呢,因为去年这个时候一个学长跟我说他们同学弄了个北京市的各个位置的最短路,但他们弄的是虚拟数据,我就想着能不能用爬虫弄点真实数据来做一个类似的,就决定用django和vue+爬虫弄一个全国机场的最短路
但是!!
携程这个网站反爬太猛了,普通爬虫肯定是弄不下来的
我曾尝试过用selenium驱动浏览器来爬数据
这样是可以爬下来,但爬取每一对机场的信息就要3,4s左右
全国200多个机场,n方暴力出一张邻接矩阵就要好一两天的时间
我之后也曾想过只保留top30城市的机场,这样爬倒是能爬出来,但感觉太low了,没啥意思
就弃掉了
附上半成品-爬虫代码
(好像最后还给我ip封了,想用的加个代理池啥的)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
base1='https://flights.ctrip.com/online/list/oneway-'
base2='-'
base3='?depdate='
base4='&cabin=y_s_c_f&adult=1&child=0&infant=0'
city ={
'北京': 'bjs', '澳门': 'mfm', '长春': 'cgq', '重庆': 'ckg', '长沙': 'csx',
'成都': 'ctu', '大连': 'dlc', '大庆': 'dqa', '福州': 'foc', '广州': 'can',
'桂林': 'kwl', '合肥': 'hfe', '杭州': 'hgh','哈尔滨': 'hrb',
'佳木斯': 'jmu','锦州': 'jnz', '济南': 'tna', '昆明': 'kmg',
'兰州': 'lhw', '洛阳': 'lya', '连云港': 'lyg','南昌': 'khn','宁波': 'ngb', '南京': 'nkg','秦皇岛': 'bpe',
'泉州': 'jjn', '青岛': 'tao', '上海': 'sha', '沈阳': 'she', '石家庄': 'sjw','三亚': 'syx',
'深圳': 'szx','天津': 'tsn', '太原': 'tyn', '乌鲁木齐': 'urc', '威海': 'weh','武汉': 'wuh',
'香港': 'hkg', '西安': 'sia', '咸阳': 'sia', '厦门': 'xmn', '郑州': 'cgo',
}
citynum={'北京': 0, '澳门': 1, '长春': 2, '重庆': 3, '长沙': 4, '成都': 5, '大连': 6, '大庆': 7,
'福州': 8, '广州': 9, '桂林': 10, '合肥': 11, '杭州': 12, '哈尔滨': 13, '佳木斯': 14,
'锦州': 15, '济南': 16, '昆明': 17, '兰州': 18, '洛阳': 19, '连云港': 20, '南昌': 21, '宁波': 22,
'南京': 23, '秦皇岛': 24, '泉州': 25, '青岛': 26, '上海': 27, '沈阳': 28, '石家庄': 29, '三亚': 30,
'深圳': 31, '天津': 32, '太原': 33, '乌鲁木齐': 34, '威海': 35, '武汉': 36, '香港': 37, '西安': 38,
'咸阳': 39, '厦门': 40, '郑州': 41
}
numcity={0: '北京', 1: '澳门', 2: '长春', 3: '重庆', 4: '长沙', 5: '成都', 6: '大连', 7: '大庆', 8: '福州',
9: '广州', 10: '桂林', 11: '合肥', 12: '杭州', 13: '哈尔滨', 14: '佳木斯', 15: '锦州', 16: '济南',
17: '昆明', 18: '兰州', 19: '洛阳', 20: '连云港', 21: '南昌', 22: '宁波', 23: '南京', 24: '秦皇岛',
25: '泉州', 26: '青岛', 27: '上海', 28: '沈阳', 29: '石家庄', 30: '三亚', 31: '深圳', 32: '天津',
33: '太原', 34: '乌鲁木齐', 35: '威海', 36: '武汉', 37: '香港', 38: '西安', 39: '咸阳', 40: '厦门',
41: '郑州'
}
import time
def calw(star,fina,data):
try:
url=base1+city[star]+base2+city[fina]+base3+data+base4
path=r'C:\Program Files\Google\Chrome\Application\chrome.exe'
chrom_options = Options()
chrom_options.add_argument('--headless')
chrom_options.add_argument(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.46')
chrom_options.add_argument('--disable-gpu')
chrom_options.binary_location = path
driver = webdriver.Chrome(options=chrom_options)
driver.get(url)
time.sleep(3.3)
lower=driver.find_elements_by_xpath('//div[@class="tabs-content"]//div')
tim=data.split('-')
tim=tim[1]+'-'+tim[2]
for item in range(len(lower)):
if tim in lower[item].get_attribute('textContent'):
res=""
for i in lower[item+1].get_attribute('textContent'):
if i in '1234567890':
res=res+i
try:
return int(res)
except:
return 998244353
return 998244353
except:
return 998244353
ans=[]
for s1 in city:
tmp = []
ans.append(tmp)
import pandas as pd
ans=pd.DataFrame(ans)
ans.to_csv('12-17.csv', index=False)
于是怎么在这么短的时间内中途改题还能做一个老师和我都满意的大作业呢
将目标转向上周刚结了的python课大作业
大致设想是这样的
将原来训练弄好的model持久化
丢到django里
前端获取用户buctoj和codeforcesid
然后传到后端,后端取爬取信息
丢model里,跑出数据
在返回给前端
前端利用返回的数据展示结果(就是套了个皮)
原理大概如下图
成品展示(UI写的确实有点糊弄,太懒了不想弄了)
Vue部分
App.vue(中间感觉还行,不算难看)
<script>
import axios from 'axios'
export default {
data(){
return{
studentsum:"",
codeforcesid:"",
}
},
methods:{
gotoback(){
const obj={studentsum:this.studentsum,codeforcesid:this.codeforcesid}
axios.post(
'/deal/',
obj
).then(response=>{
// this.studentsum=""
// this.codeforcesid=""
if(response.data!=-1){
this.$router.push({path:"/result",query:{score:response.data}})
}
else{
alert("学号或cfid有误")
}
}).catch(error=>{
console.log(error)
})
},
onSubmit(){
return false
}
}
}
</script>
<template>
<div class="container">
<div class="drop">
<div class="content">
<h2>算 法 竞 赛 获 奖 预 测</h2>
<form autocomplete="off" @submit.prevent="onSubmit">
<div class="inputBox">
<input type="text" v-model="studentsum" placeholder="学号">
</div>
<div class="inputBox">
<input type="text" v-model="codeforcesid" placeholder="Codeforces ID">
</div>
<router-view></router-view>
<div class="inputBox">
<input type="submit" value="查询" @click="gotoback" @keyup.enter="gotoback">
</div>
</form>
</div>
<a href="#" class="btns signup"></a>
</div>
<a href="#" class="btns"></a>
<a href="#" class="btns signup"></a>
</div>
</template>
<style>
*{
margin: 0;
padding: 0;
font-family: 'Poppins', sans-serif;
}
body{
display: flex;
justify-content: center;
align-items: center;
min-height: 100vh;
background-color: #dcdee4;
}
.container{
position: relative;
justify-content: center;
align-items: center;
left: -80px;
}
.container .drop{
position: relative;
width: 390px;
height: 430px;
box-shadow: inset 20px 20px 20px rgba(0, 0, 0, .05),
25px 35px 20px rgba(0, 0, 0, .05),
25px 30px 30px rgba(0, 0, 0, .05),
inset -20px -20px 25px rgba(255, 255, 255, 0.9);
transition: 0.5s ease-in-out;
display: flex;
align-items: center;
justify-content: center;
border-radius: 52% 48% 33% 67% / 38% 45% 55% 62%;
}
.container .drop:hover{
border-radius: 50%;
}
.container .drop::before{
content: '';
position: absolute;
top:50px;
left: 85px;
width: 35px;
height: 35px;
border-radius: 50%;
background-color: #fff;
opacity: 0.9;
}
.container .drop::after{
content: '';
position: absolute;
top:90px;
left: 110px;
width: 15px;
height: 15px;
border-radius: 50%;
background-color: #fff;
opacity: 0.9;
}
.container .drop .content{
position: relative;
display: flex;
align-items: center;
justify-content: center;
align-items: center;
flex-direction: column;
text-align: center;
padding: 40px;
gap: 15px;
}
.container .drop .content h2{
position: relative;
color: #333;
font-size: 1.5em;
}
.container .drop .content form {
display: flex;
flex-direction: column;
gap: 20px;
justify-content: center;
align-items: center;
}
.container .drop .content form .inputBox{
position: relative;
width: 225px;
box-shadow: inset 2px 5px 10px rgba(0, 0, 0, .1),
inset -2px -5px 10px rgba(255, 255, 255, 1),
15px 15px 10px rgba(0, 0, 0, .05),
15px 10px 15px rgba(0, 0, 0, .05);
border-radius: 25px;
}
.container .drop .content form .inputBox::before{
content: '';
position: absolute;
top: 8px;
left: 50%;
transform: translateX(-50%);
width: 75%;
height: 4px;
background-color: rgba(255, 255, 255, 0.5);
border-radius: 5px;
}
.container .drop .content form .inputBox input{
border: none;
outline: none;
background-color: transparent;
width: 100%;
font-size: 1em;
padding: 10px 15px;
}
.container .drop .content form .inputBox input[type="submit"] {
color: #fff;
text-transform: uppercase;
cursor: pointer;
letter-spacing: 0.1em;
font-weight: 500;
}
.container .drop .content form .inputBox:last-child {
width: 120px;
background-color: #ff0f5b;
box-shadow: inset 2px 5px 10px rgba(0, 0, 0, .1),
15px 15px 10px rgba(0, 0, 0, .05),
15px 10px 15px rgba(0, 0, 0, .05);
transition: 0.5s;
}
.container .drop .content form .inputBox:last-child:hover{
width: 150px;
}
.btns{
position: absolute;
width: 120px;
height: 120px;
right: -120px;
bottom: 0;
background-color:#c61dff;
display: flex;
justify-content: center;
align-items: center;
cursor: pointer;
text-decoration: none;
color: #fff;
line-height: 1.2em;
letter-spacing: 0.1em;
font-size: 0.8em;
transition: 0.25s;
text-align: center;
box-shadow: inset 10px 10px 10px rgba(190, 1, 254, .05),
15px 25px 10px rgba(190, 1, 254, .1),
15px 20px 20px rgba(190, 1, 254, .1),
inset -10px -10px 15px rgba(255, 255, 255, 0.5);
border-radius: 44% 56% 65% 35% / 57% 58% 42% 43%;
}
.btns:hover{
border-radius: 50%;
}
.btns::before{
content: '';
position: absolute;
top:15px;
left: 30px;
width: 20px;
height: 20px;
border-radius: 50%;
background-color: #fff;
opacity: 0.45;
}
.btns.signup{
bottom: 150px;
right: -140px;
width: 80px;
height: 80px;
border-radius: 49% 51% 52% 48% / 63% 59% 41% 37%;
background-color: #01b4ff;
box-shadow: inset 10px 10px 10px rgba(1, 180, 255, .05),
15px 25px 10px rgba(1, 180, 255, .1),
15px 20px 20px rgba(1, 180, 255, .1),
inset -10px -10px 15px rgba(255, 255, 255, 0.5);
}
.btns.signup::before{
left: 20%;
width: 15px;
height: 15px;
}
.btns:hover{
border-radius: 50%;
}
</style>
result组件:
<template>
<div class="result"><h5>Orz</h5>
您将很有机会获得{{map(Number(String(this.$route.query.score)[0]))}}</div>
<div class="result"><h5>Orz</h5>
同时,您还有百分之{{(0.8*Number(String(Number(this.$route.query.score).toFixed(2)).slice(-2))+10).toFixed(2)}}的概率获得{{map(1+Number(String(this.$route.query.score)[0]))}}</div>
</template>
<script>
export default {
methods:{
map(num){
if(num==1 || num==0){
return "蓝桥杯省三等奖"
}
else if(num==2){
return "蓝桥杯省级二等奖/天梯赛国家三等奖"
}
else if(num==3){
return "xcpc区域赛铜奖/蓝桥杯国家级奖项"
}
else if(num==4){
return "xcpc区域赛银奖"
}
else{
return "xcpc区域赛金奖"
}
}
}
}
</script>
<style>
.result h5{
font-size:xx-small;
border-radius: 30px;
margin: 1px;
background-color: #e1e2e2;
height: 8px;
width: 240px;
}
.result{
border-radius: 30px;
font-size:xx-small;
width: 250px;
height: 45px;
margin: 0px auto;
border:1px solid rgba(255, 255, 255, 0.5);
background-color: rgba(255, 255, 255, 0.5);
}
</style>
路由:
import ResultIndex from '../pages/ResultIndex'
import { createRouter,createWebHashHistory } from 'vue-router'
const routes=[
{path:'/result',component:ResultIndex},
]
const router=createRouter({
history: createWebHashHistory(),
routes,
})
export default router
Django部分
view
import json
from django.shortcuts import render,HttpResponse,redirect
from myApp import fuctionset
# Create your views here.
def deal(request):
if request.method=="POST":
try:
data=json.loads(request.body)
arr=fuctionset.crawl(data['codeforcesid'],data['studentsum'])
print(arr)
y=fuctionset.predict(arr)
print(y)
return HttpResponse(y)
except:
return HttpResponse(-1)
return HttpResponse(-1)
functionset(自己建的)
import requests
from lxml import etree
import pandas as pd
def get_content(name):
url = 'https://codeforces.com/profile/'+str(name)
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53'
}
request = requests.get(url=url,headers=headers)
request.encoding = 'utf-8'
content = request.text
content = etree.HTML(content)
return content
def get_max_score(content):
color_set = ['gray', 'green', 'cyan', 'blue', 'violet', 'orange', 'red', 'legendary']
for color in color_set[::-1]:
targetstr = "//span[@class=\"user-" + str(color) + "\"]/text()"
maxscore_result = content.xpath(targetstr)
if len(maxscore_result) == 4:
maxscore_result = maxscore_result[3]
return int(maxscore_result)
elif len(maxscore_result) == 2:
maxscore_result = maxscore_result[1]
return int(maxscore_result)
return 0
def get_solve_problem(content):
solve_result = content.xpath('//div[@class="_UserActivityFrame_counterValue"]/text()')
solve_result = solve_result[0]
solve_result = solve_result.split(' ')[0]
return int(solve_result)
def get_age_time(content):
time_result = content.xpath('//span[@class="format-humantime"]/text()')
time_result = time_result[-1].split(' ')
if time_result[1] == 'months':
time_result = int(time_result[0]) * 4 * 7
elif time_result[1] == 'years':
time_result = int(time_result[0]) * 12 * 4 * 7
elif time_result[1] == 'week':
time_result = int(time_result[0]) * 7
else:
time_result = int(time_result[0])
return int(time_result)
def crawl_buct(name):
url = 'https://buctcoder.com/userinfo.php?user='+str(name)
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53',
'Cookie': 'Hm_lvt_bfecd9dd681e05b42e4a227c42453c15=1669591415,1669735400; resolveIDs=0; order_dir_list_by=1A; lastlang=6; PHPSESSID=p28q7m7bkn07c69oq7nvivuigj'
}
request = requests.get(url=url,headers=headers)
request.encoding = 'utf-8'
content = request.text
content = etree.HTML(content)
solve_result = content.xpath('//div[@class="extra content"]/a/text()')
try:
solve_result = solve_result[0].split(" ")[1]
return int(solve_result)
except:
return 0
def crawl(name,studentnum):
res=[]
content=get_content(name)
cfrating=(get_max_score(content))
res.append((cfrating-565)/(2409-565))
cfsolve=(get_solve_problem(content))
res.append(cfsolve/1714)
tmp=get_age_time(content)
tmp=(tmp-112)/(2688-112)
res.append(tmp)
buctsolve=(crawl_buct(studentnum))
res.append(buctsolve/1088)
res.append(tmp)
ans=[]
ans.append(res)
return ans
import joblib
import pandas as pd
def predict(arr):
model1 = joblib.load(filename="myApp/XGB.pkl")
df = pd.DataFrame(arr, columns=["cf_max_rating", "cf_solve", "cf_time", "buct_solve", "cf_real_time"])
y = model1.predict(df)
return y