100 个 pandas 数据分析函数总结
(点击上方快速关注并设置为星标,一起学Python)
来源:数据分析1480
data:image/s3,"s3://crabby-images/d8551/d855102ee0f0780d76776c326e5a19b5eca0065e" alt=""
data:image/s3,"s3://crabby-images/c515c/c515c5bb2d403789744588c0e6e5b2d1bd474900" alt=""
data:image/s3,"s3://crabby-images/f0fde/f0fde124a6aa698361ce269ca06c8c34f7338298" alt=""
import pandas as pd
import numpy as np
x = pd.Series(np.random.normal(2,3,1000))
y = 3*x + 10 + pd.Series(np.random.normal(1,2,1000))
# 计算x与y的相关系数
print(x.corr(y))
# 计算y的偏度
print(y.skew())
# 计算y的统计描述值
print(x.describe())
z = pd.Series(['A','B','C']).sample(n = 1000, replace = True)
# 重新修改z的行索引
z.index = range(1000)
# 按照z分组,统计y的组内平均值
y.groupby(by = z).aggregate(np.mean)
data:image/s3,"s3://crabby-images/0c154/0c1547161548a45ff52e1db696804915648ed8ad" alt=""
data:image/s3,"s3://crabby-images/811af/811af3a97f2302cb22f58016355d5f3db5be29df" alt=""
# 统计z中个元素的频次
print(z.value_counts())
a = pd.Series([1,5,10,15,25,30])
# 计算a中各元素的累计百分比
print(a.cumsum() / a.cumsum()[a.size - 1])
data:image/s3,"s3://crabby-images/b21d7/b21d7522060f5607d4ad9154346a87a3c63524ff" alt=""
x = pd.Series([10,13,np.nan,17,28,19,33,np.nan,27])
#检验序列中是否存在缺失值
print(x.hasnans)
# 将缺失值填充为平均值
print(x.fillna(value = x.mean()))
# 前向填充缺失值
print(x.ffill())
data:image/s3,"s3://crabby-images/22550/22550172c3dd65797bbf71f9ebe26aed147ce8ea" alt=""
data:image/s3,"s3://crabby-images/b3dfa/b3dfa4dce5bb9b1b70e772753b4481ebf94168cb" alt=""
income = pd.Series(['12500元','8000元','8500元','15000元','9000元'])
# 将收入转换为整型
print(income.str[:-1].astype(int))
gender = pd.Series(['男','女','女','女','男','女'])
# 性别因子化处理
print(gender.factorize())
house = pd.Series(['大宁金茂府 | 3室2厅 | 158.32平米 | 南 | 精装',
'昌里花园 | 2室2厅 | 104.73平米 | 南 | 精装',
'纺大小区 | 3室1厅 | 68.38平米 | 南 | 简装'])
# 取出二手房的面积,并转换为浮点型
house.str.split('|').str[2].str.strip().str[:-2].astype(float)
data:image/s3,"s3://crabby-images/03940/0394013a06200ea187a6a8ac64ac4d1d3815ae35" alt=""
data:image/s3,"s3://crabby-images/4008f/4008fc550c50240695f8b2e72bc69cdc7851e1b1" alt=""
np.random.seed(1234)
x = pd.Series(np.random.randint(10,20,10))
# 筛选出16以上的元素
print(x.loc[x > 16])
print(x.compress(x > 16))
# 筛选出13~16之间的元素
print(x[x.between(13,16)])
# 取出最大的三个元素
print(x.nlargest(3))
y = pd.Series(['ID:1 name:张三 age:24 income:13500',
'ID:2 name:李四 age:27 income:25000',
'ID:3 name:王二 age:21 income:8000'])
# 取出年龄,并转换为整数
print(y.str.findall('age:(\d+)').str[0].astype(int))
data:image/s3,"s3://crabby-images/fe208/fe208315ed9c86a287ad41d690f156912cc6b38e" alt=""
np.random.seed(123)
import matplotlib.pyplot as plt
x = pd.Series(np.random.normal(10,3,1000))
# 绘制x直方图
x.hist()
# 显示图形
plt.show()
# 绘制x的箱线图
x.plot(kind='box')
plt.show()
installs = pd.Series(['1280万','6.7亿','2488万','1892万','9877','9877万','1.2亿'])
# 将安装量统一更改为“万”的单位
def transform(x):
if x.find('亿') != -1:
res = float(x[:-1])*10000
elif x.find('万') != -1:
res = float(x[:-1])
else:
res = float(x)/10000
return res
installs.apply(transform)
data:image/s3,"s3://crabby-images/94a4a/94a4ad9ceabfb6adc5427d9a95375846e699cc89" alt=""
data:image/s3,"s3://crabby-images/18a7c/18a7c9d4c952d8bf01d5223cf078c8c49758002b" alt=""
data:image/s3,"s3://crabby-images/0cd96/0cd967a14ea674d740f59cd2ec3d86bbd2ad6020" alt=""
data:image/s3,"s3://crabby-images/37b9b/37b9bde30f4d896b81018d495f28cfbd9a4464d1" alt=""
data:image/s3,"s3://crabby-images/9ca14/9ca146e1ad18084e8086b01417a7f4a652ef24e7" alt=""
data:image/s3,"s3://crabby-images/5776c/5776ce33c91211b0b685214d3cabc1ee66456354" alt=""
import numpy as np
import pandas as pd
np.random.seed(112)
x = pd.Series(np.random.randint(8,18,6))
print(x)
# 对x中的元素做一阶差分
print(x.diff())
# 对x中的元素做降序处理
print(x.sort_values(ascending = False))
y = pd.Series(np.random.randint(8,16,100))
# 将y中的元素做排重处理,并转换为列表对象
y.unique().tolist()
data:image/s3,"s3://crabby-images/0ffab/0ffabcdc8b2a501e074baa1eeaa0d9b769a13116" alt=""
推荐阅读:
入门: 最全的零基础学Python的问题 | 零基础学了8个月的Python | 实战项目 |学Python就是这条捷径
干货:爬取豆瓣短评,电影《后来的我们》 | 38年NBA最佳球员分析 | 从万众期待到口碑扑街!唐探3令人失望 | 笑看新倚天屠龙记 | 灯谜答题王 |用Python做个海量小姐姐素描图 |
趣味:弹球游戏 | 九宫格 | 漂亮的花 | 两百行Python《天天酷跑》游戏!
AI: 会做诗的机器人 | 给图片上色 | 预测收入 | 碟中谍这么火,我用机器学习做个迷你推荐系统电影
年度爆款文案
点这里,直达菜鸟学PythonB站!!
评论