pycirclize python包画circos环形图
很多小伙伴都有画环形图的需求,网上也有很多画环形图的教程,讲解circos软件和circlize R包的比较多,本文介绍一款python包:pyCirclize。适合喜欢python且希望更灵活作图的小伙伴。
pyCirclize包实际上也是以matplotlib模块为基础进行开发,个人使用体验感觉比circos软件灵活很多,例如circos软件没办法为各圈写标题,图注也比较单一,相比之下pycirclize的图注和对扇区的调整更加灵活。详见官方的教程文档:https://moshi4.github.io/pyCirclize/。
1. 安装pycirclize
直接使用pip工具安装即可,要求Python 3.9以上版本
pip install pycirclize
2. 实例图
多说无益,直接上一个样图。
下图是一个甲基化相关环形图,包含了常用环形图的诸多要素,根据实例图代码修改应该可以满足大部分作图需求了。
- 第一圈为染色体:需要显示染色体ID和刻度(大刻度标出刻度值,但起始的大刻度不显示,免得首位的刻度值重叠显得很乱),标出小刻度。
- 第二圈为case组相对于control组的高甲基化位点:CG、CHG、CHH三种颜色均显示,图中由于CHH类型位点太多导致其他两个看不太清了。标注出甲基化水平刻度线,从0到0.9共10条浅灰色的线。
- 第三圈为基因密度热图:用黑白渐变展示。
- 第四圈为case组相对于control组的低甲基化位点:同第二圈,但方向相反。
3. 作图
俗话说得好:Talk is cheap. Show me the code.
3.1 数据准备
- 第一圈的
chromosome.bed
文件:
共三列,染色体名,start,end。
CHR1 0 27139696
CHR2 0 27139696
- 第二圈和第四圈的gDMR.hyper.txt
共4列,染色体ID,Start,End,值。
CHR1 1482 1487 0.167943
- 基因密度
根据gff文件提取
cut -f 1,3 chromosome.bed > test
bedtools makewindows -g test -w 1000000 > 1M
grep -w "gene" my.gff3 |awk '{print $1"\t"$4"\t"$5}'|uniq > gene.pos
bedtools intersect -a 1M -b gene.pos -c >gene.density
3.2 实例代码
python代码见下,细节部分注释了内容。希望能达到抛砖引玉的作用吧,祝大家科研顺利!
from pycirclize import Circos
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
np.random.seed(0)
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
# Initialize Circos from BED chromosomes
circos = Circos.initialize_from_bed("chromosome.bed", space=1,start=5,end=355,endspace=False) # 定义染色体,space设置间距
circos.text("图中标题,可以设置为组名vs组名", size=12, r=25,weight='bold') # 标题文字,默认位于在图中央
circos.text("Gene density", size=10, r=16,weight='bold')
# Plot chromosome name
for sector in circos.sectors:
sector.text(sector.name, size=5)
# Plot outer track
outer_track = sector.add_track((98, 100))
outer_track.axis(fc="lightgrey")
major_interval = 10000000 # 设置大刻度
minor_interval = 1000000 # 设置小刻度
if sector.size > minor_interval:
outer_track.xticks_by_interval(major_interval, label_formatter=lambda v: f"{v / 1000000:.0f} Mb" if v != 0 else None,label_size=4)
outer_track.xticks_by_interval(minor_interval, tick_length=1, show_label=False)
Hyper_CG=pd.read_table("CG_gDMR.hyper.txt",header=None)
x = np.array(Hyper_CG[Hyper_CG[0]==sector.name][1]+(Hyper_CG[Hyper_CG[0]==sector.name][2]-Hyper_CG[Hyper_CG[0]==sector.name][1])/2)
y = np.array(Hyper_CG[Hyper_CG[0]==sector.name][3])
track1 = sector.add_track((75, 95), r_pad_ratio=0.1)
# 注释圈名
if sector.name == circos.sectors[0].name:
circos.text("Hyper", r=track1.r_center, size=8)
for r in range(77, 95, 2):
sector.line(r=r, ec="lightgrey")
track1.scatter(x, y,c="#EECA40",vmin=0,vmax=1,ec="black",lw=0.1,alpha=0.5)
Hyper_CHG=pd.read_table("CHG_gDMR.hyper.txt",header=None)
x = np.array(Hyper_CHG[Hyper_CHG[0]==sector.name][1]+(Hyper_CHG[Hyper_CHG[0]==sector.name][2]-Hyper_CHG[Hyper_CHG[0]==sector.name][1])/2)
y = np.array(Hyper_CHG[Hyper_CHG[0]==sector.name][3])
track1.scatter(x, y,c="#FD763F",vmin=0,vmax=1,ec="black",lw=0.1,alpha=0.5)
Hyper_CHH=pd.read_table("CHH_gDMR.hyper.txt",header=None)
x = np.array(Hyper_CHH[Hyper_CHH[0]==sector.name][1]+(Hyper_CHH[Hyper_CHH[0]==sector.name][2]-Hyper_CHH[Hyper_CHH[0]==sector.name][1])/2)
y = np.array(Hyper_CHH[Hyper_CHH[0]==sector.name][3])
track1.scatter(x, y,c="#23BAC5",vmin=0,vmax=1,ec="black",lw=0.1,alpha=0.5)
# Add cytoband tracks from cytoband file
gene_density=pd.read_table("gene.density",header=None)
track2 = sector.add_track((70, 75), r_pad_ratio=0.1)
# 注释圈名
if sector.name == circos.sectors[0].name:
circos.text("Gene", r=track2.r_center, size=8)
track2.heatmap(gene_density[gene_density[0]==sector.name][3],vmin=0,vmax=max(gene_density[3]),cmap="Greys")
# 内圈的负值
Hyper_CG=pd.read_table("CG_gDMR.hypo.txt",header=None)
x = np.array(Hyper_CG[Hyper_CG[0]==sector.name][1]+(Hyper_CG[Hyper_CG[0]==sector.name][2]-Hyper_CG[Hyper_CG[0]==sector.name][1])/2)
y = np.array(Hyper_CG[Hyper_CG[0]==sector.name][3])
track3 = sector.add_track((50, 70), r_pad_ratio=0.1)
# 注释圈名
if sector.name == circos.sectors[0].name:
circos.text("Hypo", r=track3.r_center, size=8)
for r in range(52, 70, 2):
sector.line(r=r, ec="lightgrey")
track3.scatter(x, y,c="#EECA40",vmin=-1,vmax=0,ec="black",lw=0.1,alpha=0.5)
Hyper_CHG=pd.read_table("CHG_gDMR.hypo.txt",header=None)
x = np.array(Hyper_CHG[Hyper_CHG[0]==sector.name][1]+(Hyper_CHG[Hyper_CHG[0]==sector.name][2]-Hyper_CHG[Hyper_CHG[0]==sector.name][1])/2)
y = np.array(Hyper_CHG[Hyper_CHG[0]==sector.name][3])
track3.scatter(x, y,c="#FD763F",vmin=-1,vmax=0,ec="black",lw=0.1,alpha=0.5)
Hyper_CHH=pd.read_table("CHH_gDMR.hypo.txt",header=None)
x = np.array(Hyper_CHH[Hyper_CHH[0]==sector.name][1]+(Hyper_CHH[Hyper_CHH[0]==sector.name][2]-Hyper_CHH[Hyper_CHH[0]==sector.name][1])/2)
y = np.array(Hyper_CHH[Hyper_CHH[0]==sector.name][3])
track3.scatter(x, y,c="#23BAC5",vmin=-1,vmax=0,ec="black",lw=0.1,alpha=0.5)
circos.colorbar(bounds=(0.35, 0.55, 0.3, 0.01),vmin=0,vmax=max(gene_density[3]),orientation="horizontal",cmap="Greys")
fig = circos.plotfig()
# 图注画在圈中间
_ = circos.ax.legend(
handles=[
Line2D([], [], color="#EECA40", marker="o", label="CG", ms=6, ls="None"),
Line2D([], [], color="#FD763F", marker="o", label="CHG", ms=6, ls="None"),
Line2D([], [], color="#23BAC5", marker="o", label="CHH", ms=6, ls="None"),
],
bbox_to_anchor=(0.5, 0.45),
loc="center",
ncols=1,
)
fig.savefig("Circos.pdf") # 保存
fig.savefig("Circos.png",dpi=300)
更多内容敬请关注微信公众号: