Python 数据可视化案例赏析——Bird Seed Preferences

DataSci Felix ⋅ 于 2020-04-23 18:26:13 ⋅ 236 阅读

Python 数据可视化案例赏析——Bird Seed Preferences

本文选取自github上一位作者aaronpenne的案例,研究各种鸟类对不同种子的喜好程度,链接如下:

https://github.com/aaronpenne/data_visualization/tree/master/birds

数据见附录 [1]

导入模块

import numpy as np
import pandas as pd
from statsmodels.graphics.mosaicplot import mosaic
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['font.family'] = 'monospace'

数据导入和预处理

# Bird info
df_bird = pd.read_csv('D:/programming/python/data_visualization_master/birds/data/birds_seeds.csv')
birds = df_bird['birds']
df_bird.drop(columns='birds', inplace=True) # "inplace=True": 原数组直接被替换
seeds = df_bird.columns.tolist() # tolist 转换成列表
df_bird.apply(pd.to_numeric) # apply遍历整个DataFrame,pd.to_numeric转化为数字型
max_bird = df_bird.max().max()
min_bird = df_bird.min().min()

可以简单预览一下数据:

df_bird.head()

file

seeds

file

print(max_bird, min_bird)

file

设置颜色

color_sat = ['#a6cee3','#1f78b4','#b2df8a','#fdbf6f','#33a02c','#fb9a99','#ff1a62','#ff7f00','#cab2d6']
color_pale = ['#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9']
color_grey = '#5F5F5F'

绘制极坐标图

绘制空图

# 画16张空图
title_dict = {'fontsize':'small', 'color':'#5F5F5F'}
handles = {}
bottom = 0
rows = 4
cols = 4
fig, axes = plt.subplots(rows, cols, subplot_kw={'projection':'polar'}, figsize=(8,8), dpi=300)

file

根据数据画图

i = 0
for row in range(0,rows):
    for col in range(0,cols):
        ax = axes[row, col]

        N = 9 # 9种seeds
        step = 2*np.pi/N # 弧度制
        theta = np.arange(0.0, 2*np.pi, step) # x data
        if i < 15: # 15种鸟
            radii = df_bird.iloc[i,:].tolist() # y data,对于每种鸟,选择对应的所有seeds的倾向,tolist转换成列表
            bars = ax.bar(theta, radii, width=0.7*step, bottom=bottom) #柱状图,Axes.bar(self, x, height, width=0.8, bottom=None) [2]
            for index in range(0, N):
                bars[index].set_color(color_sat[index]) # 每种seeds标注不同的颜色
            ax.set_title(birds[i].title(), fontdict=title_dict) # 以鸟的名称作为标题
            alpha=0.4 # 设置透明度
        else: # 15种鸟之外(最后一幅图)
            ax.set_title('Legend', fontdict=title_dict)
            alpha=0 # 完全透明
        ax.set_ylim(min_bird, max_bird) # 坐标轴上下限
        ax.set_xticklabels([])
        if i == 0:            
            ax.tick_params(axis='y', direction='out', which='both') # y轴格式,刻度线外部,主/副刻度线 [3]
            ax.set_yticks([0, 1, 2, 3, 4])
            ax.set_yticklabels(['', 1, 2, 3, 4], fontdict={'fontsize':'xx-small', 'color':color_grey})
            ax.set_rlabel_position(220)
        else:
            ax.set_yticks(np.arange(min_bird, max_bird+1, 1))
            ax.set_yticklabels([])
        ax.grid(color=color_grey, linestyle='-', linewidth=1, axis='y', alpha=alpha) # y轴网格线
        ax.grid(linewidth=0, axis='x') # x轴网格线(不显示)
#        for s in ax.spines:
#            print(s)
        ax.spines['polar'].set_visible(False) # 隐藏坐标轴
        ax.set_axisbelow(True) # Set whether axis ticks and gridlines are above or below most artists [4]

        # Counter
        i += 1 #循环所有鸟类

调整和设置

fig.subplots_adjust(hspace=0.3) # 调整子图间的间距
# Legend
for i, color in enumerate(color_sat): # enumerate()将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据和数据下标 [5]
    label_dict = {'color':color, 'fontsize':'xx-small', 'ha':'center'}

    if i < len(color_sat)/2:
        x = np.pi/2
        y = max_bird-(i*(max_bird*2)/len(color_sat))
        plt.text(x, y, seeds[i].title(), fontdict=label_dict) #设置文字说明

    elif i == len(color_sat)/2:
        plt.text(0, 0, seeds[i].title(), fontdict=label_dict)

    else:
        x = 3*np.pi/2
        y = min_bird+((i-len(color_sat)/2)*(max_bird*2)/len(color_sat))
        plt.text(x, y, seeds[i].title(), fontdict=label_dict)
label_dict = {'color':color_grey, 'fontsize':'x-small', 'ha':'left'}
plt.text(0.12, 0.95, 'High=4, Med=2, Low=1', 
         fontdict=label_dict,
         transform=plt.gcf().transFigure)  
# Fig title
plt.suptitle('Seed Preferences of Birds\nNear Chagrin Falls, Ohio', x=0.12, y=1.01,
             ha='left',
             va='top')
# Fig annotations
plt.text(0.88, 1.01,
        'Data: DataIsBeautiful Contest July 2018\n' \
        'Code: www.github.com/aaronpenne\n' \
        '@aaronpenne © 2018',
        fontsize = 'xx-small',
        color = color_grey,
        ha='right',
        va='top',
        multialignment = 'right',
        transform=plt.gcf().transFigure)

存储图片

fig.savefig('birdseed.png', dpi=fig.dpi, bbox_inches='tight') # 存储图片

效果展示

file

附录

[1] aaronpenne/data_visualization

[2] matplotlib.axes.Axes.bar

[3] matplotlib命令与格式:tick_params参数刻度线样式设置

[4] matplotlib.axes.Axes.set_axisbelow

[5] Python enumerate() 函数

成为第一个点赞的人吧 :bowtie:
回复数量: 0
    暂无评论~~
    • 请注意单词拼写,以及中英文排版,参考此页
    • 支持 Markdown 格式, **粗体**、~~删除线~~、`单行代码`, 更多语法请见这里 Markdown 语法
    • 支持表情,使用方法请见 Emoji 自动补全来咯,可用的 Emoji 请见 :metal: :point_right: Emoji 列表 :star: :sparkles:
    • 上传图片, 支持拖拽和剪切板黏贴上传, 格式限制 - jpg, png, gif
    • 发布框支持本地存储功能,会在内容变更时保存,「提交」按钮点击时清空
    Ctrl+Enter