本篇文章由 VeriMake 旧版论坛中备份出的原帖的 Markdown 源码生成
原帖标题为:Python 数据可视化案例赏析——Bird Seed Preferences
原帖网址为:https://verimake.com/topics/77 (旧版论坛网址,已失效)
原帖作者为:Felix(旧版论坛 id = 28,注册于 2020-04-18 19:59:47)
原帖由作者初次发表于 2020-04-23 18:26:13,最后编辑于 2020-04-23 18:26:13(编辑时间可能不准确)
截至 2021-12-18 14:27:30 备份数据库时,原帖已获得 943 次浏览、0 个点赞、0 条回复
Python 数据可视化案例赏析——Bird Seed Preferences
本文选取自github上一位作者aaronpenne的案例,研究各种鸟类对不同种子的喜好程度,链接如下:
https://github.com/aaronpenne/data_visualization/tree/master/birds
数据见附录 [1]
导入模块
import numpy as np
import pandas as pd
from statsmodels.graphics.mosaicplot import mosaic
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['font.family'] = 'monospace'
数据导入和预处理
# Bird info
df_bird = pd.read_csv('D:/programming/python/data_visualization_master/birds/data/birds_seeds.csv')
birds = df_bird['birds']
df_bird.drop(columns='birds', inplace=True) # "inplace=True": 原数组直接被替换
seeds = df_bird.columns.tolist() # tolist 转换成列表
df_bird.apply(pd.to_numeric) # apply遍历整个DataFrame,pd.to_numeric转化为数字型
max_bird = df_bird.max().max()
min_bird = df_bird.min().min()
可以简单预览一下数据:
df_bird.head()
seeds
print(max_bird, min_bird)
设置颜色
color_sat = ['#a6cee3','#1f78b4','#b2df8a','#fdbf6f','#33a02c','#fb9a99','#ff1a62','#ff7f00','#cab2d6']
color_pale = ['#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9']
color_grey = '#5F5F5F'
绘制极坐标图
绘制空图
# 画16张空图
title_dict = {'fontsize':'small', 'color':'#5F5F5F'}
handles = {}
bottom = 0
rows = 4
cols = 4
fig, axes = plt.subplots(rows, cols, subplot_kw={'projection':'polar'}, figsize=(8,8), dpi=300)
根据数据画图
i = 0
for row in range(0,rows):
for col in range(0,cols):
ax = axes[row, col]
N = 9 # 9种seeds
step = 2*np.pi/N # 弧度制
theta = np.arange(0.0, 2*np.pi, step) # x data
if i < 15: # 15种鸟
radii = df_bird.iloc[i,:].tolist() # y data,对于每种鸟,选择对应的所有seeds的倾向,tolist转换成列表
bars = ax.bar(theta, radii, width=0.7*step, bottom=bottom) #柱状图,Axes.bar(self, x, height, width=0.8, bottom=None) [2]
for index in range(0, N):
bars[index].set_color(color_sat[index]) # 每种seeds标注不同的颜色
ax.set_title(birds[i].title(), fontdict=title_dict) # 以鸟的名称作为标题
alpha=0.4 # 设置透明度
else: # 15种鸟之外(最后一幅图)
ax.set_title('Legend', fontdict=title_dict)
alpha=0 # 完全透明
ax.set_ylim(min_bird, max_bird) # 坐标轴上下限
ax.set_xticklabels([])
if i == 0:
ax.tick_params(axis='y', direction='out', which='both') # y轴格式,刻度线外部,主/副刻度线 [3]
ax.set_yticks([0, 1, 2, 3, 4])
ax.set_yticklabels(['', 1, 2, 3, 4], fontdict={'fontsize':'xx-small', 'color':color_grey})
ax.set_rlabel_position(220)
else:
ax.set_yticks(np.arange(min_bird, max_bird+1, 1))
ax.set_yticklabels([])
ax.grid(color=color_grey, linestyle='-', linewidth=1, axis='y', alpha=alpha) # y轴网格线
ax.grid(linewidth=0, axis='x') # x轴网格线(不显示)
# for s in ax.spines:
# print(s)
ax.spines['polar'].set_visible(False) # 隐藏坐标轴
ax.set_axisbelow(True) # Set whether axis ticks and gridlines are above or below most artists [4]
# Counter
i += 1 #循环所有鸟类
调整和设置
fig.subplots_adjust(hspace=0.3) # 调整子图间的间距
# Legend
for i, color in enumerate(color_sat): # enumerate()将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据和数据下标 [5]
label_dict = {'color':color, 'fontsize':'xx-small', 'ha':'center'}
if i < len(color_sat)/2:
x = np.pi/2
y = max_bird-(i*(max_bird*2)/len(color_sat))
plt.text(x, y, seeds[i].title(), fontdict=label_dict) #设置文字说明
elif i == len(color_sat)/2:
plt.text(0, 0, seeds[i].title(), fontdict=label_dict)
else:
x = 3*np.pi/2
y = min_bird+((i-len(color_sat)/2)*(max_bird*2)/len(color_sat))
plt.text(x, y, seeds[i].title(), fontdict=label_dict)
label_dict = {'color':color_grey, 'fontsize':'x-small', 'ha':'left'}
plt.text(0.12, 0.95, 'High=4, Med=2, Low=1',
fontdict=label_dict,
transform=plt.gcf().transFigure)
# Fig title
plt.suptitle('Seed Preferences of Birds\nNear Chagrin Falls, Ohio', x=0.12, y=1.01,
ha='left',
va='top')
# Fig annotations
plt.text(0.88, 1.01,
'Data: DataIsBeautiful Contest July 2018\n' \
'Code: www.github.com/aaronpenne\n' \
'@aaronpenne © 2018',
fontsize = 'xx-small',
color = color_grey,
ha='right',
va='top',
multialignment = 'right',
transform=plt.gcf().transFigure)
存储图片
fig.savefig('birdseed.png', dpi=fig.dpi, bbox_inches='tight') # 存储图片
效果展示
附录
[1] aaronpenne/data_visualization
[2] matplotlib.axes.Axes.bar
[3] matplotlib命令与格式:tick_params参数刻度线样式设置
[4] matplotlib.axes.Axes.set_axisbelow
[5] Python enumerate() 函数