用Python分析元旦旅游热门城市,告诉你哪些景点性价比更高
导读:元旦马上就要到了,难得的3天小长假,玩肯定是要去玩的,但去哪儿玩是个问题。于是,我以旅游热门城市厦门为例,用Python获取了去哪儿网的相关景点数据,包括景点名称、地区、评分、销量、价格、坐标等字段,对数据进行可视化并作简单分析,以求找到性价比较高的景点。
01 数据获取
# -*- coding = uft-8 -*-
# @Time : 2020/12/25 9:47 下午
# @Author : 公众号 菜J学Python
# @File : 去哪儿.py
import requests
import random
from time import sleep
import csv
import pandas as pd
from fake_useragent import UserAgent
def get_data(keyword,page):
ua = UserAgent(verify_ssl=False)
headers = {"User-Agent": ua.random}
url = f'http://piao.qunar.com/ticket/list.json?keyword={keyword}®ion=&from=mpl_search_suggest&page={page}'
res = requests.request("GET", url,headers=headers)
sleep(random.uniform(1, 2))
try:
res_json = res.json()
#print(res_json)
sight_List = res_json['data']['sightList']
print(sight_List)
except:
pass
if __name__ == '__main__':
keyword = "厦门"
for page in range(1,100): #控制页数
print(f"正在提取第{page}页")
sleep(random.uniform(1, 2))
get_data(keyword,page)
02 数据处理
1. 导入相关包
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.rcParams['font.sans-serif'] = ['SimHei'] # 设置加载的字体名
plt.rcParams['axes.unicode_minus'] = False# 解决保存图像是负号'-'显示为方块的问题
import jieba
import re
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.globals import ThemeType
import stylecloud
from IPython.display import Image
2. 导入景点数据
df = pd.read_csv("/菜J学Python/旅游/厦门旅游景点.csv",names=['name', 'star', 'score','qunarPrice','saleCount','districts','point','intro'])
df.head()
3. 删除重复数据
df = df.drop_duplicates()
4. 查看数据信息
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index:422 entries, 0 to 423
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 name 422 non-null object
1 star 422 non-null object
2 score 422 non-null float64
3 qunarPrice 422 non-null float64
4 saleCount 422 non-null int64
5 districts 422 non-null object
6 point 422 non-null object
7 intro 377 non-null object
dtypes: float64(2), int64(1), object(5)
memory usage: 29.7+ KB
5. 描述性统计
color_map = sns.light_palette('orange', as_cmap=True) # light_palette调色板
df.describe().style.background_gradient(color_map)
03 可视化分析
1. 景点介绍
#绘制词云图
text1 = get_cut_words(content_series=df['intro'])
stylecloud.gen_stylecloud(text=' '.join(text1), max_words=100,
collocations=False,
font_path='simhei.ttf',
icon_name='fas fa-heart',
size=653,
#palette='matplotlib.Inferno_9',
output_name='./offer.png')
Image(filename='./xiamen.png')
2. 景点分布
df["lon"] = df["point"].str.split(",",expand=True)[0]
df["lat"] = df["point"].str.split(",",expand=True)[1]
df.to_csv("/菜J学Python/data.csv")
3. 评分TOP10景点
df_score = df.pivot_table(index='name',values='score')
df_score.sort_values('score',inplace=True,ascending=False)
df_score[:10]
4. 月销量TOP10景点
df_saleCount = df.pivot_table(index='name',values='saleCount')
df_saleCount.sort_values('saleCount',inplace=True,ascending=False)
df_saleCount[:10]
5. 价格TOP20景点
df_qunarPrice = df.pivot_table(index='name',values='qunarPrice')
df_qunarPrice.sort_values('qunarPrice',inplace=True,ascending=False)
df_qunarPrice[:20]
6. 月销售额TOP20景点
df["saleTotal"] = df["qunarPrice"]*df["saleCount"]
df_saleTotal = df.pivot_table(index='name',values='saleTotal')
df_saleTotal.sort_values('saleTotal',inplace=True,ascending=False)
df_saleTotal[:20]
7. 景点等级分布
df_star = df["star"].value_counts()
df_star = df_star.sort_values(ascending=False)
#print(df_star)
c = (
Pie(init_opts=opts.InitOpts(theme=ThemeType.WALDEN))
.add(
"",
[list(z) for z in zip(df_star.index.to_list(),df_star.to_list())]
)
.set_global_opts(legend_opts = opts.LegendOpts(is_show = False),title_opts=opts.TitleOpts(title="景点等级分布",subtitle="数据来源:去哪儿网\n制图:菜J学Python",pos_top="0.5%",pos_left = 'left'))
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{d}%",font_size=16))
)
c.render_notebook()
df[df["star"]!='无'].sort_values("star",ascending=False)
小结
厦门是典型的海滨休闲城市,具有丰富的海洋和人文景观;
厦门旅游景点主要集中分布在思明区,其他区域较为分散;
厦门大学口碑最高,其次才是鼓浪屿;
鼓浪屿门票销量遥遥领先厦门其他景点;
消费较高的景点或活动包括游艇、帆船和方特。
评论