Python代码|Python做数据可视化的代码
数据科学与人工智能
共 5362字,需浏览 11分钟
·
2020-06-26 23:23
前言:
从代码中学习Python知识和Python与数据相关的知识,是一个有效的方法。例如:想了解Python做数据可视化的工作。我们可以从互联网找一些Python做数据可视化的代码进行阅读,调试和迁移。这样做的好处,突出实用性。同时,我们在结合联想的学习方法,对所用到的可视化函数,做个更深入地了解和使用。我借用《数据科学和人工智能》这个公众号,分享一些我在实际的数据问题时,从网上找到的Python代码,希望这些代码对大家有作用和启发。
Python做数据可视化代码
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
df = pd.read_csv('./datasets/temporal.csv')
df.head(10)
# In[2]:
df.describe()
# In[3]:
df.info()
# In[5]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# In[6]:
format_dict = {'data science':'${0:,.2f}', 'Mes':'{:%m-%Y}', 'machine learning':'{:.2%}'}
#We make sure that the Month column has datetime format
df['Mes'] = pd.to_datetime(df['Mes'])
#We apply the style to the visualization
df.head().style.format(format_dict)
# In[7]:
format_dict = {'Mes':'{:%m-%Y}'} #Simplified format dictionary with values that do make sense for our data
df.head().style.format(format_dict).highlight_max(color='darkgreen').highlight_min(color='#ff0000')
# In[8]:
df.head(10).style.format(format_dict).background_gradient(subset=['data science', 'machine learning'], cmap='BuGn')
# In[9]:
df.head().style.format(format_dict).bar(color='red', subset=['data science', 'deep learning'])
# In[10]:
df.head(10).style.format(format_dict).background_gradient(subset=['data science', 'machine learning'], cmap='BuGn').highlight_max(color='yellow')
# In[11]:
from pandas_profiling import ProfileReport
prof = ProfileReport(df)
prof.to_file(output_file='report.html')
# In[17]:
import matplotlib.pyplot as plt
#The parameter label is to indicate the legend. This doesn't mean that it will be shown, we'll have to use another command that I'll explain later.
plt.plot(df['Mes'], df['data_science'], label='data science')
# In[14]:
df
# In[18]:
plt.plot(df['Mes'], df['data_science'], label='data science')
plt.plot(df['Mes'], df['machine_learning'], label='machine learning')
plt.plot(df['Mes'], df['deep_learning'], label='deep learning')
# In[19]:
plt.plot(df['Mes'], df['data_science'], label='data science')
plt.plot(df['Mes'], df['machine_learning'], label='machine learning')
plt.plot(df['Mes'], df['deep_learning'], label='deep learning')
plt.xlabel('Date')
plt.ylabel('Popularity')
plt.title('Popularity of AI terms by date')
plt.grid(True)
plt.legend()
# In[20]:
fig, axes = plt.subplots(2,2)
axes[0, 0].hist(df['data_science'])
axes[0, 1].scatter(df['Mes'], df['data_science'])
axes[1, 0].plot(df['Mes'], df['machine_learning'])
axes[1, 1].plot(df['Mes'], df['deep_learning'])
# In[21]:
plt.plot(df['Mes'], df['data_science'], 'r-')
plt.plot(df['Mes'], df['data_science']*2, 'bs')
plt.plot(df['Mes'], df['data_science']*3, 'g^')
# In[23]:
plt.scatter(df['data_science'], df['machine_learning'])
# In[24]:
plt.bar(df['Mes'], df['machine_learning'], width=20)
# In[25]:
plt.hist(df['deep_learning'], bins=15)
# In[26]:
plt.plot(df['Mes'], df['data_science'], label='data science')
plt.plot(df['Mes'], df['machine_learning'], label='machine learning')
plt.plot(df['Mes'], df['deep_learning'], label='deep learning')
plt.xlabel('Date')
plt.ylabel('Popularity')
plt.title('Popularity of AI terms by date')
plt.grid(True)
plt.text(x='2010-01-01', y=80, s=r'$\lambda=1, r^2=0.8$') #Coordinates use the same units as the graph
plt.annotate('Notice something?', xy=('2014-01-01', 30), xytext=('2006-01-01', 50), arrowprops={'facecolor':'red', 'shrink':0.05})
# In[28]:
import seaborn as sns
sns.set()
sns.scatterplot(df['Mes'], df['data_science'])
# In[29]:
sns.relplot(x='Mes', y='deep_learning', hue='data_science', size='machine_learning', col='categorical', data=df)
# In[30]:
sns.heatmap(df.corr(), annot=True, fmt='.2f')
# In[31]:
sns.pairplot(df)
# In[32]:
sns.pairplot(df, hue='categorical')
# In[34]:
sns.jointplot(x='data_science', y='machine_learning', data=df)
# In[35]:
sns.catplot(x='categorical', y='data_science', kind='violin', data=df)
# In[36]:
fig, axes = plt.subplots(1, 2, sharey=True, figsize=(8, 4))
sns.scatterplot(x="Mes", y="deep_learning", hue="categorical", data=df, ax=axes[0])
axes[0].set_title('Deep Learning')
sns.scatterplot(x="Mes", y="machine_learning", hue="categorical", data=df, ax=axes[1])
axes[1].set_title('Machine Learning')
# In[37]:
from bokeh.plotting import figure, output_file, save
output_file('data_science_popularity.html')
# In[38]:
p = figure(title='data science', x_axis_label='Mes', y_axis_label='data science')
p.line(df['Mes'], df['data_science'], legend='popularity', line_width=2)
save(p)
# In[40]:
from bokeh.layouts import gridplot
output_file('multiple_graphs.html')
s1 = figure(width=250, plot_height=250, title='data science')
s1.circle(df['Mes'], df['data_science'], size=10, color='navy', alpha=0.5)
s2 = figure(width=250, height=250, x_range=s1.x_range, y_range=s1.y_range, title='machine learning') #share both axis range
s2.triangle(df['Mes'], df['machine_learning'], size=10, color='red', alpha=0.5)
s3 = figure(width=250, height=250, x_range=s1.x_range, title='deep learning') #share only one axis range
s3.square(df['Mes'], df['deep_learning'], size=5, color='green', alpha=0.5)
p = gridplot([[s1, s2, s3]])
save(p)
# 参考资料:
# - https://towardsdatascience.com/complete-guide-to-data-visualization-with-python-2dd74df12b5e
这份Python代码我经过notebook调试测试通过的。
用到Python的库有pandas, pandas_profiling, matplotlib, seaborn和bokeh。
配套的数据集和notebook下载链接。
http://47.112.229.252:80/f/97e449826d0a44468a8e/
我创建了Python语言群,需要加入的朋友,请扫码添加我的微信,备注Python语言。
评论