ChestX-Det-Dataset数据集网址:https://github.com/Deepwise-AILab/ChestX-Det-Dataset/tree/main
数据集JSON内容:
[
{
"file_name": "36199.png",
"syms": [],
"boxes": [],
"polygons": []
},
{
"file_name": "36302.png",
"syms": [
"Effusion"
],
"boxes": [
[
799,
666,
937,
761
]
],
"polygons": [
[
[
799,
678
],
[
799,
678
],
[
799,
680
],
[
801,
681
],
[
805,
684
],
[
807,
684
],
[
809,
685
],
[
811,
686
],
[
813,
686
],
[
814,
686
],
[
817,
687
],
[
820,
687
],
[
824,
690
],
[
827,
690
],
[
830,
691
],
[
832,
691
],
[
833,
691
],
[
836,
693
],
[
837,
693
],
[
840,
695
],
[
844,
696
],
[
848,
696
],
[
851,
697
],
[
854,
697
],
[
855,
697
],
[
856,
698
],
[
861,
699
],
[
864,
699
],
[
870,
701
],
[
872,
703
],
[
875,
704
],
[
878,
705
],
[
881,
705
],
[
886,
707
],
[
890,
709
],
[
894,
711
],
[
896,
713
],
[
897,
714
],
[
899,
714
],
[
902,
716
],
[
903,
717
],
[
906,
720
],
[
908,
721
],
[
910,
725
],
[
912,
726
],
[
914,
728
],
[
916,
731
],
[
916,
732
],
[
917,
733
],
[
918,
734
],
[
921,
738
],
[
922,
740
],
[
924,
741
],
[
925,
744
],
[
927,
745
],
[
929,
747
],
[
930,
751
],
[
931,
752
],
[
934,
753
],
[
935,
755
],
[
935,
756
],
[
935,
757
],
[
936,
758
],
[
937,
759
],
[
937,
761
],
[
937,
759
],
[
937,
757
],
[
937,
756
],
[
937,
752
],
[
937,
750
],
[
937,
747
],
[
937,
745
],
[
937,
744
],
[
937,
743
],
[
937,
741
],
[
937,
740
],
[
937,
739
],
[
937,
738
],
[
937,
737
],
[
937,
735
],
[
937,
733
],
[
937,
731
],
[
937,
729
],
[
937,
728
],
[
937,
726
],
[
937,
723
],
[
937,
720
],
[
937,
717
],
[
937,
716
],
[
936,
714
],
[
935,
710
],
[
935,
709
],
[
935,
708
],
[
934,
705
],
[
934,
704
],
[
934,
703
],
[
934,
702
],
[
933,
701
],
[
933,
698
],
[
933,
696
],
[
931,
695
],
[
931,
692
],
[
931,
691
],
[
930,
690
],
[
930,
686
],
[
930,
685
],
[
929,
681
],
[
929,
680
],
[
929,
679
],
[
929,
677
],
[
928,
674
],
[
928,
673
],
[
927,
672
],
[
927,
671
],
[
925,
671
],
[
924,
668
],
[
924,
666
],
[
924,
667
],
[
924,
669
],
[
924,
672
],
[
924,
674
],
[
923,
677
],
[
923,
678
],
[
923,
679
],
[
923,
680
],
[
922,
681
],
[
921,
683
],
[
921,
684
],
[
920,
685
],
[
918,
685
],
[
918,
686
],
[
915,
689
],
[
912,
690
],
[
910,
691
],
[
909,
692
],
[
908,
692
],
[
906,
692
],
[
905,
693
],
[
904,
693
],
[
902,
695
],
[
900,
695
],
[
900,
695
],
[
899,
695
],
[
898,
695
],
[
896,
695
],
[
894,
693
],
[
891,
693
],
[
887,
693
],
[
886,
693
],
[
884,
693
],
[
881,
692
],
[
879,
692
],
[
876,
691
],
[
874,
691
],
[
870,
690
],
[
867,
690
],
[
866,
690
],
[
863,
690
],
[
861,
689
],
[
860,
689
],
[
857,
689
],
[
856,
687
],
[
854,
687
],
[
851,
687
],
[
848,
686
],
[
845,
686
],
[
842,
686
],
[
840,
686
],
[
839,
685
],
[
837,
685
],
[
834,
684
],
[
828,
683
],
[
825,
683
],
[
822,
681
],
[
819,
680
],
[
815,
679
],
[
814,
679
],
[
812,
679
],
[
811,
679
],
[
811,
678
],
[
808,
677
],
[
806,
675
],
[
803,
675
],
[
802,
675
],
[
801,
675
]
]
]
},
转化后coco格式样本json:
使用的python代码如下:
import json
import os
import sys
import cv2
from tqdm import tqdm
import math
che_json = './chetrain.json'
dst_json = './chestrain_coco.json'
test_img = './train_data/train'
# che_json = './chetest.json'
# dst_json = './chetest_coco.json'
# test_img = './test_data/test'
def polygon_area(vertices):
n = len(vertices)
area = 0.0
for i in range(n):
x1, y1 = vertices[i]
x2, y2 = vertices[(i + 1) % n]
area += (x1 * y2 - x2 * y1)
return abs(area) / 2.0
def main():
coco_data = {
"info": {},
"licenses": [],
"categories": [],
"images": [],
"annotations": []
}
category_mapping = {}
category_id = 1
image_id = 1
annotation_id = 1
with open(che_json,'r',encoding='utf-8') as js:
json_info = json.load(js)
image = {}
annotation = {}
boxid = 0
for jsfo in tqdm(json_info):
image['file_name'] = jsfo['file_name']
img =cv2.imread(test_img + '/' + jsfo['file_name'])
image['height'] = img.shape[0]
image['width'] = img.shape[1]
image['id'] = image_id
coco_data["images"].append(image)
image = {}
category_name = jsfo['syms']
for ii in range(len(category_name)):
if category_name[ii] not in category_mapping:
category_mapping[category_name[ii]] = category_id
coco_data["categories"].append({
"supercategory": category_name[ii],
"id": category_id,
"name": category_name[ii]
})
category_id += 1
box_cnt = len(jsfo['boxes'])
for i in range(box_cnt):
boxid = boxid + 1
segpnts = []
segtmp = jsfo['polygons'][i]
for segt in segtmp:
segpnts.append(segt[0])
segpnts.append(segt[1])
segarea = polygon_area(segtmp)
annotation['segmentation'] = [segpnts]
annotation['image_id'] = image_id
annotation['area'] = segarea
boxtmp = jsfo['boxes'][i]
x_left,y_left,x_br,y_br = boxtmp
box_w,box_h = x_br-x_left, y_br-y_left
annotation['bbox'] = [x_left,y_left,box_w,box_h]
annotation['category_id'] = category_mapping[category_name[i]]
annotation['id'] = boxid
coco_data["annotations"].append(annotation)
annotation = {}
image_id = image_id + 1
with open(dst_json,'w') as jsout:
json.dump(coco_data, jsout)
if __name__=='__main__':
main()
先跑训练集或先跑验证集会生成category_mapping 的字典内容,生成后统一用同一个,就可以保证训练集和验证集的标签一致