8000字 | Python数据可视化,完整版实操指南 !
数据管道
共 11244字,需浏览 23分钟
· 2021-05-11
点击“数据管道”,选择“置顶/星标公众号”
福利干货,第一时间送达
1. 前言
2. pandas
import pandas as pd
df = pd.read_csv('temporal.csv')
df.head(10) #View first 10 data rows
df.describe()
df.info()
pd.set_option('display.max_rows',500)
pd.set_option('display.max_columns',500)
pd.set_option('display.width',1000)
format_dict = {'data science':'${0:,.2f}', 'Mes':'{:%m-%Y}', 'machine learning':'{:.2%}'}
#We make sure that the Month column has datetime format
df['Mes'] = pd.to_datetime(df['Mes'])
#We apply the style to the visualization
df.head().style.format(format_dict)
format_dict = {'Mes':'{:%m-%Y}'} #Simplified format dictionary with values that do make sense for our data
df.head().style.format(format_dict).highlight_max(color='darkgreen').highlight_min(color='#ff0000')
df.head(10).style.format(format_dict).background_gradient(subset=['data science', 'machine learning'], cmap='BuGn')
df.head().style.format(format_dict).bar(color='red', subset=['data science', 'deep learning'])
df.head(10).style.format(format_dict).background_gradient(subset = ['data science','machine learning'],cmap ='BuGn')。highlight_max(color ='yellow')
from pandas_profiling import ProfileReport
prof = ProfileReport(df)
prof.to_file(output_file='report.html')
3. matplotlib
import matplotlib.pyplot as plt
plt.plot(df['Mes'], df['data science'], label='data science')
# The parameter label is to indicate the legend. This doesn't mean that it will be shown, we'll have to use another command that I'll explain later.
plt.plot(df ['Mes'],df ['data science'],label ='data science')
plt.plot(df ['Mes'],df ['machine learning'],label ='machine learning ')
plt.plot(df ['Mes'],df ['deep learning'],label ='deep learning')
plt.plot(df['Mes'], df['data science'], label='data science')
plt.plot(df['Mes'], df['machine learning'], label='machine learning')
plt.plot(df['Mes'], df['deep learning'], label='deep learning')
plt.xlabel('Date')
plt.ylabel('Popularity')
plt.title('Popularity of AI terms by date')
plt.grid(True)
plt.legend()
fig, axes = plt.subplots(2,2)
axes[0, 0].hist(df['data science'])
axes[0, 1].scatter(df['Mes'], df['data science'])
axes[1, 0].plot(df['Mes'], df['machine learning'])
axes[1, 1].plot(df['Mes'], df['deep learning'])
plt.plot(df ['Mes'],df ['data science'],'r-')
plt.plot(df ['Mes'],df ['data science'] * 2,'bs')
plt .plot(df ['Mes'],df ['data science'] * 3,'g ^')
plt.scatter(df['data science'], df['machine learning'])
plt.bar(df ['Mes'],df ['machine learning'],width = 20)
plt.hist(df ['deep learning'],bins = 15)
plt.plot(df['Mes'], df['data science'], label='data science')
plt.plot(df['Mes'], df['machine learning'], label='machine learning')
plt.plot(df['Mes'], df['deep learning'], label='deep learning')
plt.xlabel('Date')
plt.ylabel('Popularity')
plt.title('Popularity of AI terms by date')
plt.grid(True)
plt.text(x='2010-01-01', y=80, s=r'$\lambda=1, r^2=0.8$') #Coordinates use the same units as the graph
plt.annotate('Notice something?', xy=('2014-01-01', 30), xytext=('2006-01-01', 50), arrowprops={'facecolor':'red', 'shrink':0.05}
4. seaborn
import seaborn as sns
sns.set()
sns.scatterplot(df['Mes'], df['data science'])
sns.relplot(x='Mes', y='deep learning', hue='data science', size='machine learning', col='categorical', data=df)
sns.heatmap(df.corr(),annot = True,fmt ='。2f')
sns.pairplot(df)
sns.pairplot(df,hue ='categorical')
sns.jointplot(x='data science', y='machine learning', data=df)
sns.catplot(x='categorical', y='data science', kind='violin', data=df)
fig, axes = plt.subplots(1, 2, sharey=True, figsize=(8, 4))
sns.scatterplot(x="Mes", y="deep learning", hue="categorical", data=df, ax=axes[0])
axes[0].set_title('Deep Learning')
sns.scatterplot(x="Mes", y="machine learning", hue="categorical", data=df, ax=axes[1])
axes[1].set_title('Machine Learning')
5. Bokeh
from bokeh.plotting import figure, output_file, save
output_file('data_science_popularity.html')
p = figure(title='data science', x_axis_label='Mes', y_axis_label='data science')
p.line(df['Mes'], df['data science'], legend='popularity', line_width=2)
save(p)
output_file('multiple_graphs.html')
s1 = figure(width=250, plot_height=250, title='data science')
s1.circle(df['Mes'], df['data science'], size=10, color='navy', alpha=0.5)
s2 = figure(width=250, height=250, x_range=s1.x_range, y_range=s1.y_range, title='machine learning') #share both axis range
s2.triangle(df['Mes'], df['machine learning'], size=10, color='red', alpha=0.5)
s3 = figure(width=250, height=250, x_range=s1.x_range, title='deep learning') #share only one axis range
s3.square(df['Mes'], df['deep learning'], size=5, color='green', alpha=0.5)
p = gridplot([[s1, s2, s3]])
save(p)
6. altair
7. folium
import folium
m1 = folium.Map(location=[41.38, 2.17], tiles='openstreetmap', zoom_start=18)
m1.save('map1.html')
m2 = folium.Map(location=[41.38, 2.17], tiles='openstreetmap', zoom_start=16)
folium.Marker([41.38, 2.176], popup='<i>You can use whatever HTML code you want</i>', tooltip='click here').add_to(m2)
folium.Marker([41.38, 2.174], popup='<b>You can use whatever HTML code you want</b>', tooltip='dont click here').add_to(m2)
m2.save('map2.html')
from geopandas.tools import geocode
df2 = pd.read_csv('mapa.csv')
df2.dropna(axis=0, inplace=True)
df2['geometry'] = geocode(df2['País'], provider='nominatim')['geometry'] #It may take a while because it downloads a lot of data.
df2['Latitude'] = df2['geometry'].apply(lambda l: l.y)
df2['Longitude'] = df2['geometry'].apply(lambda l: l.x)
m3 = folium.Map(location=[39.326234,-4.838065], tiles='openstreetmap', zoom_start=3)
def color_producer(val):
if val <= 50:
return 'red'
else:
return 'green'
for i in range(0,len(df2)):
folium.Circle(location=[df2.iloc[i]['Latitud'], df2.iloc[i]['Longitud']], radius=5000*df2.iloc[i]['data science'], color=color_producer(df2.iloc[i]['data science'])).add_to(m3)
m3.save('map3.html')
推荐阅读
欢迎长按扫码关注「数据管道」
评论
Python写3D游戏,这样也可以?
来源丨网络vizard介绍Vizard是一款虚拟现实开发平台软件,从开发至今已走过十个年头。它基于C/C++,运用新近OpenGL拓展模块开发出的高性能图形引擎。当运用Python语言执行开发时,Vizard同时自动将编写的程式转换为字节码抽象层(LAXMI),进而运行渲染核心。vizard入门1、
Python大数据分析
0
Python 潮流周刊#49:谷歌裁员 Python 团队,微软开源 MS-DOS 4.0
△△请给“Python猫”加星标 ,以免错过文章推送本周刊由 Python猫 出品,精心筛选国内外的 250+ 信息源,为你挑选最值得分享的文章、教程、开源项目、软件工具、播客和视频、热门话题等内容。愿景:帮助所有读者精进 Python 技术,并增长职业和副业的收入。本期周刊分享了 12
Python猫
0
如何画出漂亮的神经网络图?神经网络可视化工具集锦搜集
点击上方“小白学视觉”,选择加"星标"或“置顶”重磅干货,第一时间送达1. draw_convnet一个用于画卷积神经网络的Python脚本https://github.com/gwding/draw_convnet2. NNSVGhttp://alexlenail.me/NN-SVG/LeNet.
小白学视觉
0
让Python for循环飞起来!
来源:deephub在本文中,我将介绍一些简单的方法,可以将Python for循环的速度提高1.3到900倍。Python内建的一个常用功能是timeit模块。下面几节中我们将使用它来度量循环的当前性能和改进后的性能。对于每种方法,我们通过运行测试来建立基线,该测试包括在10次测试运行中运行被测函
Python大数据分析
1
你需要知道的20个常用的Python技巧
点击上方“小白学视觉”,选择加"星标"或“置顶”重磅干货,第一时间送达来源:https://medium.com翻译:小马Python的可读性和简单性是其广受欢迎的两大原因,本文介绍20个常用的Python技巧来提高代码的可读性,并能帮助你节省大量时间,下面的技巧将在你的日常编码练习中非常实用。1.
小白学视觉
0
CVPR 2024|大视觉模型的开山之作!无需任何语言数据即可打造大视觉模型
点击上方“小白学视觉”,选择加"星标"或“置顶”重磅干货,第一时间送达作者丨科技猛兽编辑丨极市平台极市导读 本文提出一种序列建模 (sequential modeling) 的方法,不使用任何语言数据,训练大视觉模型。本文目录1 序列建模打造大视觉模型(来自 UCB,Johns Hopki
小白学视觉
0
金融研究 | 使用Python测量关键审计事项的「信息含量」
Tips: 公众号推送后内容只能更改一次,且只能改20字符。如果内容出问题,或者想更新内容, 只能重复推送。为了更好的阅读体验,建议阅读本文博客版, 链接地址https://textdata.cn/blog/2023-01-13-information-content-of-critical-aud
大邓和他的Python
0
极简 Python:10分钟会用 OpenAI / Kimi API
背景之前写了一份:写给不会代码的你:20分钟上手 Python + AI,但感觉还不够简单粗暴。于是就有了这篇 10 分钟版的,让你快速的开始使用各 AI 厂的 API。作为准备工作,请先打开 Colab 的网站,并新建一个笔记本。网址: https://colab.resear
Python猫
1