使用Hog特征进行字母和数字的分类

news2025/7/7 18:04:59

目的：使用字母数字的二值图像，进行识别：

整体思路：

1）对图像进行预处理；

对收集的单个字符进行二值化，进行数据均衡，并且将所有的字符图片直接resize为20*20（有过进行等比例缩放后padding为20*20，最终算法效果较差）

2）提取hog特征

3）进行svm（lightGBM）分类

查阅的资料：

使用python代码，提取相关的hog特征：

参数设置：pythonscikit-image库HOG提取特征（参数解释） - 百度文库

本人使用的hog特征提取代码：

def get_hog():
    """ Get hog descriptor """

    # cv2.HOGDescriptor(winSize, blockSize, blockStride, cellSize, nbins, derivAperture, winSigma, histogramNormType,
    # L2HysThreshold, gammaCorrection, nlevels, signedGradient)
    hog = cv2.HOGDescriptor((SZ, SZ), (8, 8), (4, 4), (8, 8), 9, 1, -1, 0, 0.2, 1, 64, True)
    print("get descriptor size: {}".format(hog.getDescriptorSize()))
    return hog

def get_data(train_data_path, train_dir, result_num):
    '''
    加载训练样本
    :param train_data_path:
    :param result_num:
    :return:
    '''
    hog = get_hog()
    # 识别中文
    # ------加载训练样本
    chars_train = []
    chars_train_label = []
    files = get_file_list(train_data_path)  # 获取所有图片的绝对路径
    files2 = get_file_list(train_dir)  # 获取所有图片的绝对路径
    files += files2
    for filepath in files:
        digit_img = cv2.imread(filepath)
        digit_img = cv2.cvtColor(digit_img, cv2.COLOR_BGR2GRAY)
        chars_train.append(hog.compute(deskew(digit_img)))
        # chars_train.append(preprocess_hog(deskew(digit_img)))
        classTag = result_num[filepath.split("/")[-2]]  # 得到 类标签(数字)
        chars_train_label.append(classTag)

    # chars_train = np.squeeze(chars_train)
    chars_train = np.squeeze(np.float32(chars_train))
    chars_label = np.array(chars_train_label)
    return chars_train, chars_label

使用SVM进行分类的代码：

class SVM(StatModel):
    def __init__(self, C = 1, gamma = 0.5):
        self.model = cv2.ml.SVM_create()
        self.model.setGamma(gamma)
        self.model.setC(C)
        self.model.setKernel(cv2.ml.SVM_RBF)
        # self.model.setKernel(cv2.ml.SVM_LINEAR)
        self.model.setType(cv2.ml.SVM_C_SVC)
        # 定义算法终止条件
        # self.model.setTermCriteria((cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 500, 1e-6)) #迭代次数超过阈值max_iter时停止，#cv2.TERM_CRITERIA_COUNT |

    # train svm
    def train(self, samples, responses):
        self.model.train(samples, cv2.ml.ROW_SAMPLE, responses)

    def predict(self, chars_test):
        r = self.model.predict(chars_test)
        # result = r[1].ravel()
        return r

    # inference
    def predict_svm(self, test_imgList, tcName, model_path, c, g):
        self.model = SVM(C=c, gamma=g)
        if os.path.exists(model_path):
            self.model.load(model_path)
            # cv2.ml.SVM_load("svmtest.mat")
            # self.model = joblib.load(model_path)
        else:
            print('model_path is missing!')

        allErrCount = 0
        class_num = 37
        ErrCount = np.zeros(class_num, int)
        TrueCount = np.zeros(class_num, int)

        for chars_test, chars_test_label, index in test_imgList:
            # print(chars_test.shape, index)
            first = time.time()
            r = self.predict(chars_test)
            end = time.time()
            print("Testing one pic spent {:.6f}s.".format((end - first)/len(chars_test)))
            result = r[1].ravel()
            # for x in result:
            #     if x != index:
            #         print('pred error：', result_index[index], result_index[x])

            errCount = len([x for x in result if x != index])
            ErrCount[index] = errCount
            TrueCount[index] = len(chars_test_label) - errCount
            print("errorCount: {}.".format(errCount), "trueCount: {}.".format(len(chars_test_label) - errCount))
            allErrCount += errCount

        # tet = time.time()
        # print("Testing All class total spent {:.6f}s.".format(tet - tst))
        print("All error Count is: {}.".format(allErrCount))

        print("number", " TrueCount", " ErrCount")
        mean_acc = 0
        num_div = 0
        for tcn in tcName:
            # tcn = int(tcn)

            # if all_num > 0:
            num_div += 1
            index = result_num[tcn]
            all_num = (TrueCount[index] + ErrCount[index])
            acc = TrueCount[index] /all_num
            mean_acc += acc
            print(tcn, "     ", TrueCount[index], "      ", ErrCount[index], 'acc：', acc)
        mean_acc /= class_num
        return mean_acc
        # plt.figure(figsize=(12, 6))
        # x = list(range(37))
        # plt.plot(x, TrueCount, color='blue', label="TrueCount")  # 将正确的数量设置为蓝色
        # plt.plot(x, ErrCount, color='red', label="ErrCount")  # 将错误的数量为红色
        # plt.legend(loc='best')  # 显示图例的位置，这里为右下方
        # plt.title('Projects')
        # plt.xlabel('number')  # x轴标签
        # plt.ylabel('count')  # y轴标签
        # plt.xticks(np.arange(37), list(tcName))
        # plt.show()

        # inference




    def train_svm(self, chars_train, chars_label, c, g):
        #识别英文字母和数字
        self.model = SVM(C=c, gamma=g)
        self.model.train(chars_train, chars_label)
        # joblib.dump(self.model.model, path)
        return self.model


    def save_trainmodel(self, path):
        if not os.path.exists(path):
            self.model.save(path)

            # joblib.dump(self.model, path)
        # if not os.path.exists("./train_dat/svmchinese.dat"):
        #     self.modelchinese.save("./train_dat/svmchinese.dat")

调用svm分类，并且进行最优参数查找：

    best_score = 0
    best_parameters = {'gamma': 0.001, 'C': 0.001}
    for g in [0.001, 0.01, 0.1, 1, 10, 100]:
        for c in [0.001, 0.01, 0.1, 1, 10,   100]:
            # if g in [100]:
            #     continue
            if g == 0.01 and c == 0.001:
                continue
            model_path = os.path.join(model_path_save, 'svm_hog20221123_'+'_g_' +str(g)+'_c_' +str(c)+'.dat')
            svm_model = SVM(C=c, gamma=g) #12.5

            svm_model.train_svm(chars_train, chars_label, c, g)
            svm_model.save_trainmodel(model_path)

            acc = svm_model.predict_svm(test_imgList, tcName, model_path, c, g)
            print('c：', c, 'gamma：', g, 'MEANacc：', acc)
            print('============================================')
            if acc > best_score:  # 找到表现最好的参数
                best_score = acc
                best_parameters = {'gamma': g, 'C': c}
    print('best_score：', best_score, 'best_parameters:', best_parameters)

使用lightGBM分类

import lightgbm as lgb
    import datetime
    import sklearn
    # import warnings
    # warnings.filterwarnings('ignore')
    # folds = KFold(n_splits=5, shuffle=True, random_state=1996)

    # 模型参数设定
    model = lgb.LGBMClassifier(boosting_type='gbdt' #'dart' #'goss' #学习器类型，通常选取gbdt
                               , class_weight=None
                               , colsample_bytree=1.0
                               , importance_type='split'
                               , learning_rate=0.1
                               , max_depth=5 # *   指定了每棵树的最大深度或者它能够生长的层数上限，数据量小，4-10都无所谓。
                               , min_child_samples=20
                               , min_child_weight=0.001
                               , min_split_gain=0.0
                               , n_estimators=200#迭代次数
                               , n_jobs=2
                               , num_leaves=31 # * 用来设置组成每棵树的叶子的数量，由于lightGBM是leaves_wise生长，官方说法是要小于2^max_depth
                               , objective='multi:softmax'
                               , random_state=None
                               , reg_alpha=0.0
                               , reg_lambda=0.0
                               , silent=True
                               , subsample=1.0
                               , subsample_for_bin=255)
    '''
    n_estimators：拟合的树的棵树,相当于训练轮数 
    n_jobs:并行运行多线程核心数

    '''
    # model.fit(pca_feats, chars_label, eval_set=[(pca_feats, chars_label), (test_imgs, test_labels)],
    #           eval_metric=['logloss'], verbose=True) #
    model.fit(chars_train, chars_label, eval_set=[(chars_train, chars_label), (test_imgs, test_labels)],
              eval_metric=['logloss'], early_stopping_rounds=20, verbose=True)  # early_stopping_rounds=20,

    # lgb.LGBMClassifier(n_estimators=200, n_jobs=1, objective='multi:softmax')
    model_path = "/home/fuxueping/4tdisk/data/certificate_reader/传统算法处理mrz/GBM_model/model.txt"
    # model.booster_.savemodel(model_path)
    joblib.dump(model, model_path)

    start = time.time()
    # model = joblib.load(model_path)
    pred_y_test = model.predict(test_imgs)

数据样例：