看了大神统计voc数据集标签框后,针对自己标注数据集,灵活应用 ,感谢!
看代码吧~
import re import os import xml.etree.ElementTree as ET class1 = \'answer\' class2 = \'hand\' class3 = \'write\' class4 = \'music\' class5 = \'phone\' \'\'\'class6 = \'bus\' class7 = \'car\' class8 = \'cat\' class9 = \'chair\' class10 = \'cow\' class11 = \'diningtable\' class12 = \'dog\' class13 = \'horse\' class14 = \'motorbike\' class15 = \'person\' class16 = \'pottedplant\' class17 = \'sheep\' class18 = \'sofa\' class19 = \'train\' class20 = \'tvmonitor\' \'\'\' annotation_folder = \'/home/.../train/\' #改为自己标签文件夹的路径 #annotation_folder = \'/home/.../VOC2007/Annotations/\' list = os.listdir(annotation_folder) def file_name(file_dir): L = [] for root, dirs, files in os.walk(file_dir): for file in files: if os.path.splitext(file)[1] == \'.xml\': L.append(os.path.join(root, file)) return L total_number1 = 0 total_number2 = 0 total_number3 = 0 total_number4 = 0 total_number5 = 0 \'\'\'total_number6 = 0 total_number7 = 0 total_number8 = 0 total_number9 = 0 total_number10 = 0 total_number11 = 0 total_number12 = 0 total_number13 = 0 total_number14 = 0 total_number15 = 0 total_number16 = 0 total_number17 = 0 total_number18 = 0 total_number19 = 0 total_number20 = 0\'\'\' total = 0 total_pic=0 pic_num1 = 0 pic_num2 = 0 pic_num3 = 0 pic_num4 = 0 pic_num5 = 0 \'\'\'pic_num6 = 0 pic_num7 = 0 pic_num8 = 0 pic_num9 = 0 pic_num10 = 0 pic_num11 = 0 pic_num12 = 0 pic_num13 = 0 pic_num14 = 0 pic_num15 = 0 pic_num16 = 0 pic_num17 = 0 pic_num18 = 0 pic_num19 = 0 pic_num20 = 0\'\'\' flag1 = 0 flag2 = 0 flag3 = 0 flag4 = 0 flag5 = 0 \'\'\'flag6 = 0 flag7 = 0 flag8 = 0 flag9 = 0 flag10 = 0 flag11 = 0 flag12 = 0 flag13 = 0 flag14 = 0 flag15= 0 flag16 = 0 flag17 = 0 flag18 = 0 flag19 = 0 flag20 = 0\'\'\' xml_dirs = file_name(annotation_folder) for i in range(0, len(xml_dirs)): print(xml_dirs[i]) #path = os.path.join(annotation_folder,list[i]) #print(path) annotation_file = open(xml_dirs[i]).read() root = ET.fromstring(annotation_file) #tree = ET.parse(annotation_file) #root = tree.getroot() total_pic = total_pic + 1 for obj in root.findall(\'object\'): label = obj.find(\'name\').text if label == class1: total_number1=total_number1+1 flag1=1 total = total + 1 #print(\"bounding box number:\", total_number1) if label == class2: total_number2=total_number2+1 flag2=1 total = total + 1 if label == class3: total_number3=total_number3+1 flag3=1 total = total + 1 if label == class4: total_number4=total_number4+1 flag4=1 total = total + 1 if label == class5: total_number5=total_number5+1 flag5=1 total = total + 1 \'\'\'if label == class6: total_number6=total_number6+1 flag6=1 total = total + 1 if label == class7: total_number7=total_number7+1 flag7=1 total = total + 1 if label == class8: total_number8=total_number8+1 flag8=1 total = total + 1 if label == class9: total_number9=total_number9+1 flag9=1 total = total + 1 if label == class10: total_number10=total_number10+1 flag10=1 total = total + 1 if label == class11: total_number11=total_number11+1 flag11=1 total = total + 1 if label == class12: total_number12=total_number12+1 flag12=1 total = total + 1 if label == class13: total_number13=total_number13+1 flag13=1 total = total + 1 if label == class14: total_number14=total_number14+1 flag14=1 total = total + 1 if label == class15: total_number15=total_number15+1 flag15=1 total = total + 1 if label == class16: total_number16=total_number16+1 flag16=1 total = total + 1 if label == class17: total_number17=total_number17+1 flag17=1 total = total + 1 if label == class18: total_number18=total_number18+1 flag18=1 total = total + 1 if label == class19: total_number19=total_number19+1 flag19=1 total = total + 1 if label == class20: total_number20=total_number20+1 flag20=1 total = total + 1\'\'\' if flag1==1: pic_num1=pic_num1+1 #print(\"pic number:\", pic_num1) flag1=0 if flag2==1: pic_num2=pic_num2+1 flag2=0 if flag3==1: pic_num3=pic_num3+1 flag3=0 if flag4==1: pic_num4=pic_num4+1 flag4=0 if flag5==1: pic_num5=pic_num5+1 flag5=0 \'\'\'if flag6==1: pic_num6=pic_num6+1 flag6=0 if flag7==1: pic_num7=pic_num7+1 flag7=0 if flag8==1: pic_num8=pic_num8+1 flag8=0 if flag9==1: pic_num9=pic_num9+1 flag9=0 if flag10==1: pic_num10=pic_num10+1 flag10=0 if flag11==1: pic_num11=pic_num11+1 flag11=0 if flag12==1: pic_num12=pic_num12+1 flag12=0 if flag13==1: pic_num13=pic_num13+1 flag13=0 if flag14==1: pic_num14=pic_num14+1 flag14=0 if flag15==1: pic_num15=pic_num15+1 flag15=0 if flag16==1: pic_num16=pic_num16+1 flag16=0 if flag17==1: pic_num17=pic_num17+1 flag17=0 if flag18==1: pic_num18=pic_num18+1 flag18=0 if flag19==1: pic_num19=pic_num19+1 flag19=0 if flag20==1: pic_num20=pic_num20+1 flag20=0\'\'\' print(class1,pic_num1,total_number1) print(class2,pic_num2,total_number2) print(class3,pic_num3, total_number3) print(class4,pic_num4, total_number4) print(class5,pic_num5, total_number5) \'\'\'print(class6,pic_num6, total_number6) print(class7,pic_num7, total_number7) print(class8,pic_num8, total_number8) print(class9,pic_num9, total_number9) print(class10,pic_num10, total_number10) print(class11,pic_num11,total_number11) print(class12,pic_num12,total_number12) print(class13,pic_num13, total_number13) print(class14,pic_num14, total_number14) print(class15,pic_num15, total_number15) print(class16,pic_num16, total_number16) print(class17,pic_num17, total_number17) print(class18,pic_num18, total_number18) print(class19,pic_num19, total_number19) print(class20,pic_num20, total_number20)\'\'\' print(\"total\", total_pic, total)
补充:【数据集处理】Python对目标检测数据集xml文件操作(统计目标种类、数量、面积、比例等&修改目标名字)
1. 根据xml文件统计目标种类以及数量
# -*- coding:utf-8 -*- #根据xml文件统计目标种类以及数量 import os import xml.etree.ElementTree as ET import numpy as np np.set_printoptions(suppress=True, threshold=np.nan) import matplotlib from PIL import Image def parse_obj(xml_path, filename): tree=ET.parse(xml_path+filename) objects=[] for obj in tree.findall(\'object\'): obj_struct={} obj_struct[\'name\']=obj.find(\'name\').text objects.append(obj_struct) return objects def read_image(image_path, filename): im=Image.open(image_path+filename) W=im.size[0] H=im.size[1] area=W*H im_info=[W,H,area] return im_info if __name__ == \'__main__\': xml_path=\'/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/Annotations/\' filenamess=os.listdir(xml_path) filenames=[] for name in filenamess: name=name.replace(\'.xml\',\'\') filenames.append(name) recs={} obs_shape={} classnames=[] num_objs={} obj_avg={} for i,name in enumerate(filenames): recs[name]=parse_obj(xml_path, name+ \'.xml\' ) for name in filenames: for object in recs[name]: if object[\'name\'] not in num_objs.keys(): num_objs[object[\'name\']]=1 else: num_objs[object[\'name\']]+=1 if object[\'name\'] not in classnames: classnames.append(object[\'name\']) for name in classnames: print(\'{}:{}个\'.format(name,num_objs[name])) print(\'信息统计算完毕。\')
2.根据xml文件统计目标的平均长度、宽度、面积以及每一个目标在原图中的占比
# -*- coding:utf-8 -*- #统计 # 计算每一个目标在原图中的占比 # 计算目标的平均长度、 # 计算平均宽度, # 计算平均面积、 # 计算目标平均占比 import os import xml.etree.ElementTree as ET import numpy as np #np.set_printoptions(suppress=True, threshold=np.nan) #10,000,000 np.set_printoptions(suppress=True, threshold=10000000) #10,000,000 import matplotlib from PIL import Image def parse_obj(xml_path, filename): tree = ET.parse(xml_path + filename) objects = [] for obj in tree.findall(\'object\'): obj_struct = {} obj_struct[\'name\'] = obj.find(\'name\').text bbox = obj.find(\'bndbox\') obj_struct[\'bbox\'] = [int(bbox.find(\'xmin\').text), int(bbox.find(\'ymin\').text), int(bbox.find(\'xmax\').text), int(bbox.find(\'ymax\').text)] objects.append(obj_struct) return objects def read_image(image_path, filename): im = Image.open(image_path + filename) W = im.size[0] H = im.size[1] area = W * H im_info = [W, H, area] return im_info if __name__ == \'__main__\': image_path = \'/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/JPEGImages/\' xml_path = \'/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/Annotations/\' filenamess = os.listdir(xml_path) filenames = [] for name in filenamess: name = name.replace(\'.xml\', \'\') filenames.append(name) print(filenames) recs = {} ims_info = {} obs_shape = {} classnames = [] num_objs={} obj_avg = {} for i, name in enumerate(filenames): print(\'正在处理 {}.xml \'.format(name)) recs[name] = parse_obj(xml_path, name + \'.xml\') print(\'正在处理 {}.jpg \'.format(name)) ims_info[name] = read_image(image_path, name + \'.jpg\') print(\'所有信息收集完毕。\') print(\'正在处理信息......\') for name in filenames: im_w = ims_info[name][0] im_h = ims_info[name][1] im_area = ims_info[name][2] for object in recs[name]: if object[\'name\'] not in num_objs.keys(): num_objs[object[\'name\']] = 1 else: num_objs[object[\'name\']] += 1 #num_objs += 1 ob_w = object[\'bbox\'][2] - object[\'bbox\'][0] ob_h = object[\'bbox\'][3] - object[\'bbox\'][1] ob_area = ob_w * ob_h w_rate = ob_w / im_w h_rate = ob_h / im_h area_rate = ob_area / im_area if not object[\'name\'] in obs_shape.keys(): obs_shape[object[\'name\']] = ([[ob_w, ob_h, ob_area, w_rate, h_rate, area_rate]]) else: obs_shape[object[\'name\']].append([ob_w, ob_h, ob_area, w_rate, h_rate, area_rate]) if object[\'name\'] not in classnames: classnames.append(object[\'name\']) # 求平均 for name in classnames: obj_avg[name] = (np.array(obs_shape[name]).sum(axis=0)) / num_objs[name] print(\'{}的情况如下:*******\\n\'.format(name)) print(\' 目标平均W={}\'.format(obj_avg[name][0])) print(\' 目标平均H={}\'.format(obj_avg[name][1])) print(\' 目标平均area={}\'.format(obj_avg[name][2])) print(\' 目标平均与原图的W比例={}\'.format(obj_avg[name][3])) print(\' 目标平均与原图的H比例={}\'.format(obj_avg[name][4])) print(\' 目标平均原图面积占比={}\\n\'.format(obj_avg[name][5])) print(\'信息统计计算完毕。\')
3.修改xml文件中某个目标的名字为另一个名字
#修改xml文件中的目标的名字, import os, sys import glob from xml.etree import ElementTree as ET # 批量读取Annotations下的xml文件 # per=ET.parse(r\'C:\\Users\\rockhuang\\Desktop\\Annotations\\000003.xml\') xml_dir = r\'/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/Annotations\' xml_list = glob.glob(xml_dir + \'/*.xml\') for xml in xml_list: print(xml) per = ET.parse(xml) p = per.findall(\'/object\') for oneper in p: # 找出person节点 child = oneper.getchildren()[0] # 找出person节点的子节点 if child.text == \'PinNormal\': #需要修改的名字 child.text = \'normal bolt\' #修改成什么名字 if child.text == \'PinDefect\': #需要修改的名字 child.text = \'defect bolt-1\' #修改成什么名字 per.write(xml) print(child.tag, \':\', child.text)
修改为:
以上为个人经验,希望能给大家一个参考,也希望大家多多支持自学编程网。