集成学习模型对比优化—银行业务

news2024/10/6 14:36:01

1.Data Understanding¶

2.Data Exploration

3.Data Preparation

4.Training Models

5.Optimization Model

集成学习模型对比优化—银行业务

1.Data Understanding¶

import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
df = pd.read_csv("D:\\课程学习\\机器学习\\银行客户开设定期存款账户情况预测\\banking.csv")
#Print the shape of the DataFrame
print("1.the shape of the DataFrame")
print(df.shape)

在这里插入图片描述

# Print the head of the DataFrame
print("2.the head of the DataFrame")
print(df.head())

在这里插入图片描述

# Print info of the DataFrame
print("3.the info of the DataFrame")
print(df.info())

在这里插入图片描述

# Print statistical description of the DataFrame
print("4.the statistical description of the DataFrame")
print(df.describe())

在这里插入图片描述

# Check for any null values in the DataFrame
print("5.Check for any null values in the DataFrame")
datacheck = df.isnull().any()
print(datacheck)

在这里插入图片描述

# Check for duplicates
print("6.Check for duplicates")
duplicates = df.duplicated()
print(f"Number of duplicated rows: {duplicates.sum()}")

在这里插入图片描述

print("7.See the duplicated rows")
# See the duplicated rows:
if duplicates.sum() > 0:
    print("\nDuplicated Rows:")
    print(df[duplicates])

在这里插入图片描述

#pick out the non_numeric_columns
non_numeric_columns = df.select_dtypes(exclude=['number']).columns.to_list()
numeric_columns = df.select_dtypes(include=['number']).columns
print(non_numeric_columns)

2.Data Exploration

#statistics description of continous variable
print("1.statistics description of continous variable")
df.describe()

在这里插入图片描述

print("2.Check the distribution of labels")
print(df['y'].value_counts())

在这里插入图片描述

# histograms of continuous variables
print("3.histograms of continuous variables")
fig, axes = plt.subplots(nrows=len(df.select_dtypes(include=['int64', 'float64']).columns), figsize=(6, 3*len(df.select_dtypes(include=['int64', 'float64']).columns)))
df.select_dtypes(include=['int64', 'float64']).hist(ax=axes,grid=False)
plt.tight_layout()
plt.show()

在这里插入图片描述

# histogram of categorical variables
print("4.plt the histogram of categorical variables")
#categorical_features = non_numeric_columns = df.select_dtypes(exclude=['number']).columns.to_list()
categorical_features = ["marital", "default", "housing","loan","contact","poutcome","y"]
fig, ax = plt.subplots(1, len(categorical_features), figsize=(25,3))

for i, categorical_feature in enumerate(df[categorical_features]):
    df[categorical_feature].value_counts().plot(kind="bar", ax=ax[i], rot=0).set_title(categorical_feature)

plt.tight_layout()
plt.show()

在这里插入图片描述

print("5.Check for high correlations")
import numpy as np

# Check for high correlations
correlation_matrix = df.corr().abs()

# Get pairs of highly correlated features
high_corr_var = np.where(correlation_matrix > 0.6)
high_corr_var = [(correlation_matrix.columns[x], correlation_matrix.columns[y])
                 for x, y in zip(*high_corr_var) if x != y and x < y]
print("Highly correlated pairs:", high_corr_var)

# Filter pairs by correlation threshold
threshold = 0.7
high_corr_pairs = {}
for column in correlation_matrix.columns:
    for index in correlation_matrix.index:
        # We'll only consider pairs of different columns and correlations above the threshold
        if column != index and abs(correlation_matrix[column][index]) > threshold:
            # We'll also ensure we don't duplicate pairs (i.e., A-B and B-A)
            sorted_pair = tuple(sorted([column, index]))
            if sorted_pair not in high_corr_pairs:
                high_corr_pairs[sorted_pair] = correlation_matrix[column][index]

# Display the high correlation pairs
for pair, corr_value in high_corr_pairs.items():
    print(f"Correlation between {pair[0]} and {pair[1]}: {corr_value:.2f}")

在这里插入图片描述

#Correlation Heatmap
print("6.Correlation Heatmap")
import seaborn as sns
df_duplicate = df.copy()
df_duplicate['y'] = df['y'].map({'no': 0, 'yes': 1})
correlation_matrix2 = df_duplicate.corr().abs()
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix2, cmap='Greens', annot=True)
plt.show()

在这里插入图片描述

# Plotting count plots for each categorical variable with respect to the 'y' column
print("Plotting count plots for each categorical variable with respect to the 'y' column")
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
print(categorical_cols)
#categorical_cols.remove('y')

# Setting up the figure size
plt.figure(figsize=(20, 20))

for i, col in enumerate(categorical_cols, 1):
    plt.subplot(4, 3, i)
    sns.countplot(data=df, x=col, hue='y', order=df[col].value_counts().index)
    plt.title(f'Distribution of {col} by y')
    plt.xticks(rotation=45)
    plt.tight_layout()

plt.show()

在这里插入图片描述

# Distribution of y by Month
print("Distribution of y by Month")
plt.figure(figsize=(11, 5))
sns.countplot(data=df, x='month', hue='y', order=['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'])
plt.title('Distribution of y by Month')
plt.ylabel('Count')
plt.xlabel('Month')
plt.tight_layout()
plt.show()

在这里插入图片描述

3.Data Preparation

import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load the dataset and drop duplicates
df = pd.read_csv("D:\\课程学习\\机器学习\\银行客户开设定期存款账户情况预测\\banking.csv").drop_duplicates()
# Convert the 'y' column to dummy variables
#y = pd.get_dummies(df['y'], drop_first=True)
y = df.iloc[:, -1]


# Process client-related data
bank_client = df.iloc[:, 0:7]
labelencoder_X = LabelEncoder()
columns_to_encode = ['job', 'marital', 'education', 'default', 'housing', 'loan']
for col in columns_to_encode:
    bank_client[col] = labelencoder_X.fit_transform(bank_client[col])

# Process bank-related data
bank_related = df.iloc[:, 7:11]
#columns_to_encode=df.select_dtypes(include=['object']).columns.tolist()
columns_to_encode = ['contact', 'month', 'day_of_week']
for col in columns_to_encode:
    bank_related[col] = labelencoder_X.fit_transform(bank_related[col])

# Process sociol & economic data (se) and other bank data (others)
bank_se = df.loc[:, ['emp_var_rate', 'cons_price_idx', 'cons_conf_idx', 'euribor3m', 'nr_employed']]
bank_others = df.loc[:, ['campaign', 'pdays', 'previous', 'poutcome']]
bank_others['poutcome'].replace(['nonexistent', 'failure', 'success'], [1, 2, 3], inplace=True)

# Concatenate all the processed parts, reorder columns, and save to CSV
bank_final = pd.concat([bank_client, bank_related,bank_others, bank_se,y], axis=1)
columns_order = ['age', 'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month',
                 'day_of_week', 'duration', 'campaign', 'pdays', 'previous', 'poutcome', 'emp_var_rate',
                 'cons_price_idx', 'cons_conf_idx', 'euribor3m', 'nr_employed', 'y']

bank_final = bank_final[columns_order]

bank_final.to_csv('bank_final.csv', index=False)

# Display basic information about the final dataframe
print(bank_final.shape)
print(bank_final.head())
print(bank_final.info())
print(bank_final.describe())

# Check for any null values in the DataFrame
datacheck = bank_final.isnull().any()
print(datacheck)

count = bank_final['y'].value_counts()
print(count)

在这里插入图片描述

4.Training Models

import warnings
warnings.filterwarnings("ignore")
import pandas as pd
from sklearn.model_selection import train_test_split  


x = bank_final.drop('y', axis=1)
y = bank_final['y']

# Split bank_final into training and testing sets (80%-20%)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

print(X_train.shape)
print(X_test.shape)

import numpy as np 
import pandas as pd 
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import cohen_kappa_score

#计算评价指标:用df_eval数据框装起来计算的评价指标数值
def evaluation(y_test, y_predict):
    accuracy=classification_report(y_test, y_predict,output_dict=True)['accuracy']
    s=classification_report(y_test, y_predict,output_dict=True)['weighted avg']
    precision=s['precision']
    recall=s['recall']
    f1_score=s['f1-score']
    #kappa=cohen_kappa_score(y_test, y_predict)
    return accuracy,precision,recall,f1_score


import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, roc_curve,auc

def roc(y_test,y_proba):
    gbm_auc = roc_auc_score(y_test, y_proba[:, 1])  # 计算auc
    gbm_fpr, gbm_tpr, gbm_threasholds = roc_curve(y_test, y_proba[:, 1])  # 计算ROC的值
    return gbm_auc,gbm_fpr, gbm_tpr
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import cross_val_score
import time
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

model0 = SVC(kernel="rbf", random_state=77,probability=True)
model1 = MLPClassifier(hidden_layer_sizes=(16,8), random_state=77, max_iter=10000)
model2 = LogisticRegression()
model3 = RandomForestClassifier()
model4 = AdaBoostClassifier()
model5 = GradientBoostingClassifier()
model6 = XGBClassifier()
model7 = LGBMClassifier()


model_list=[model1,model2,model3,model4,model5,model6,model7]
model_name=['BP','Logistic Regression', 'Random Forest', 'AdaBoost', 'GBDT', 'XGBoost','LightGBM']

df_eval=pd.DataFrame(columns=['Accuracy','Precision','Recall','F1_score','Running_time'])
plt.figure(figsize=(9, 7))
plt.title("ROC")
plt.plot([0,1],[0,1],'r--')
plt.xlim([0,1])
plt.ylim([0,1])
plt.xlabel('false positive rate')  # specificity = 1 - np.array(gbm_fpr))
plt.ylabel('true positive rate')  # sensitivity = gbm_tpr
for i in range(7):
    model_C=model_list[i]
    name=model_name[i]
    
    start = time.time()   
    model_C.fit(X_train, y_train)
    end = time.time()
    running_time = end - start
    
    pred=model_C.predict(X_test)
    pred_proba = model_C.predict_proba(X_test) 
    #s=classification_report(y_test, pred)
    s=evaluation(y_test,pred)
    s=list(s)
    s.append(running_time)
    df_eval.loc[name,:]=s
    
    gbm_auc,gbm_fpr, gbm_tpr=roc(y_test,pred_proba)
    plt.plot(list(np.array(gbm_fpr)), gbm_tpr, label="%s(AUC=%.4f)"% (name, gbm_auc))
    
print(df_eval)

plt.legend(loc='upper right')
plt.show()


在这里插入图片描述

# Confusion matrix graph
from lightgbm import LGBMClassifier
from sklearn.metrics import confusion_matrix

model=LGBMClassifier()
model.fit(X_train, y_train)   
pred=model.predict(X_test)
Confusion_Matrixn = confusion_matrix(y_test, pred)
Confusion_Matrix = confusion_matrix(y_test, pred,normalize='true')
print(Confusion_Matrixn)

from sklearn.metrics import ConfusionMatrixDisplay

#one way
disp = ConfusionMatrixDisplay(confusion_matrix=Confusion_Matrix)#display_labels=LR_model.classes_
disp.plot(cmap='Greens')#supported values are 'Accent', 'Accent_r', 'Blues', 'Blues_r', 'BrBG', 'BrBG_r', 'BuGn', 'BuGn_r', 'BuPu', 'BuPu_r', 'CMRmap', 'CMRmap_r', 'Dark2', 'Dark2_r', 'GnBu', 'GnBu_r', 'Greens', 'Greens_r', 'Greys', 'Greys_r', 'OrRd', 'OrRd_r', 'Oranges', 'Oranges_r', 'PRGn', 'PRGn_r', 'Paired', 'Paired_r', 'Pastel1', 'Pastel1_r', 'Pastel2', 'Pastel2_r', 'PiYG', 'PiYG_r', 'PuBu', 'PuBuGn', 'PuBuGn_r', 'PuBu_r', 'PuOr', 'PuOr_r', 'PuRd', 'PuRd_r', 'Purples', 'Purples_r', 'RdBu', 'RdBu_r', 'RdGy', 'RdGy_r', 'RdPu', 'RdPu_r', 'RdYlBu', 'RdYlBu_r', 'RdYlGn', 'RdYlGn_r', 'Reds', 'Reds_r', 'Set1', 'Set1_r', 'Set2', 'Set2_r', 'Set3', 'Set3_r', 'Spectral', 'Spectral_r', 'Wistia', 'Wistia_r', 'YlGn', 'YlGnBu', 'YlGnBu_r', 'YlGn_r', 'YlOrBr', 'YlOrBr_r', 'YlOrRd', 'YlOrRd_r', 'afmhot', 'afmhot_r', 'autumn', 'autumn_r', 'binary', 'binary_r', 'bone', 'bone_r', 'brg', 'brg_r', 'bwr', 'bwr_r', 'cividis', 'cividis_r', 'cool', 'cool_r', 'coolwarm', 'coolwarm_r', 'copper', 'copper_r', 'crest', 'crest_r', 'cubehelix', 'cubehelix_r', 'flag', 'flag_r', 'flare', 'flare_r', 'gist_earth', 'gist_earth_r', 'gist_gray', 'gist_gray_r', 'gist_heat', 'gist_heat_r', 'gist_ncar', 'gist_ncar_r', 'gist_rainbow', 'gist_rainbow_r', 'gist_stern', 'gist_stern_r', 'gist_yarg', 'gist_yarg_r', 'gnuplot', 'gnuplot2', 'gnuplot2_r', 'gnuplot_r', 'gray', 'gray_r', 'hot', 'hot_r', 'hsv', 'hsv_r', 'icefire', 'icefire_r', 'inferno', 'inferno_r', 'jet', 'jet_r', 'magma', 'magma_r', 'mako', 'mako_r', 'nipy_spectral', 'nipy_spectral_r', 'ocean', 'ocean_r', 'pink', 'pink_r', 'plasma', 'plasma_r', 'prism', 'prism_r', 'rainbow', 'rainbow_r', 'rocket', 'rocket_r', 'seismic', 'seismic_r', 'spring', 'spring_r', 'summer', 'summer_r', 'tab10', 'tab10_r', 'tab20', 'tab20_r', 'tab20b', 'tab20b_r', 'tab20c', 'tab20c_r', 'terrain', 'terrain_r', 'turbo', 'turbo_r', 'twilight', 'twilight_r', 'twilight_shifted', 'twilight_shifted_r', 'viridis', 'viridis_r', 'vlag', 'vlag_r', 'winter', 'winter_r'

#disp.color='blue'
plt.show()

在这里插入图片描述

5.Optimization Model

#Train Test split with Undersampling to bring 0 to 10,000 and Oversampling to bring 1 to 10,000
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.preprocessing import StandardScaler
# Apply SMOTE to oversample class 1 to 10,000 samples
smote_strategy = {1: 10000}
smote = SMOTE(sampling_strategy=smote_strategy, random_state=42)
x_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Apply RandomUnderSampler to undersample class 0 to 10,000 samples
undersample_strategy = {0: 10000}
undersampler = RandomUnderSampler(sampling_strategy=undersample_strategy, random_state=42)
x_train_resampled, y_train_resampled = undersampler.fit_resample(x_train_resampled, y_train_resampled)


# Check the new class distribution
print(y_train_resampled.value_counts())

在这里插入图片描述

# Check for NaN values in x_train_resampled after resampling
nan_values = x_train_resampled.isnull().sum()
print("\nNumber of NaN values in each column:")
print(nan_values)

在这里插入图片描述

# If there are any NaN values, print the columns that contain them
nan_columns = nan_values[nan_values > 0].index.tolist()
if nan_columns:
    print("\nColumns with NaN values:", nan_columns)
else:
    print("\nThere are no NaN values in the resampled data.")

在这里插入图片描述

# Before resampling
print(y_train.value_counts())

# After resampling
print(y_train_resampled.value_counts())

在这里插入图片描述

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import cross_val_score
import time
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

model0 = SVC(kernel="rbf", random_state=77,probability=True)
model1 = MLPClassifier(hidden_layer_sizes=(16,8), random_state=77, max_iter=10000)
model2 = LogisticRegression()
model3 = RandomForestClassifier()
model4 = AdaBoostClassifier()
model5 = GradientBoostingClassifier()
model6 = XGBClassifier()
model7 = LGBMClassifier()


model_list=[model1,model2,model3,model4,model5,model6,model7]
model_name=['BP','Logistic Regression', 'Random Forest', 'AdaBoost', 'GBDT', 'XGBoost','LightGBM']

df_eval=pd.DataFrame(columns=['Accuracy','Precision','Recall','F1_score','Running_time'])
plt.figure(figsize=(9, 7))
plt.title("ROC")
plt.plot([0,1],[0,1],'r--')
plt.xlim([0,1])
plt.ylim([0,1])
plt.xlabel('false positive rate')  # specificity = 1 - np.array(gbm_fpr))
plt.ylabel('true positive rate')  # sensitivity = gbm_tpr
for i in range(7):
    model_C=model_list[i]
    name=model_name[i]
    
    start = time.time()   
    model_C.fit(x_train_resampled, y_train_resampled)
    end = time.time()
    running_time = end - start
    
    pred=model_C.predict(X_test)
    pred_proba = model_C.predict_proba(X_test) 
    #s=classification_report(y_test, pred)
    s=evaluation(y_test,pred)
    s=list(s)
    s.append(running_time)
    df_eval.loc[name,:]=s
    
    gbm_auc,gbm_fpr, gbm_tpr=roc(y_test,pred_proba)
    plt.plot(list(np.array(gbm_fpr)), gbm_tpr, label="%s(AUC=%.4f)"% (name, gbm_auc))
    
print(df_eval)
plt.legend(loc='upper right')
plt.show()

在这里插入图片描述
在这里插入图片描述

# Confusion matrix graph
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix

model=XGBClassifier()
model.fit(x_train_resampled, y_train_resampled)   
pred=model.predict(X_test)

Confusion_Matrixn = confusion_matrix(y_test, pred)
Confusion_Matrix = confusion_matrix(y_test, pred,normalize='true')
print(Confusion_Matrixn)

from sklearn.metrics import ConfusionMatrixDisplay

#one way
disp = ConfusionMatrixDisplay(confusion_matrix=Confusion_Matrix)#display_labels=LR_model.classes_
disp.plot(cmap='Greens')#supported values are 'Accent', 'Accent_r', 'Blues', 'Blues_r', 'BrBG', 'BrBG_r', 'BuGn', 'BuGn_r', 'BuPu', 'BuPu_r', 'CMRmap', 'CMRmap_r', 'Dark2', 'Dark2_r', 'GnBu', 'GnBu_r', 'Greens', 'Greens_r', 'Greys', 'Greys_r', 'OrRd', 'OrRd_r', 'Oranges', 'Oranges_r', 'PRGn', 'PRGn_r', 'Paired', 'Paired_r', 'Pastel1', 'Pastel1_r', 'Pastel2', 'Pastel2_r', 'PiYG', 'PiYG_r', 'PuBu', 'PuBuGn', 'PuBuGn_r', 'PuBu_r', 'PuOr', 'PuOr_r', 'PuRd', 'PuRd_r', 'Purples', 'Purples_r', 'RdBu', 'RdBu_r', 'RdGy', 'RdGy_r', 'RdPu', 'RdPu_r', 'RdYlBu', 'RdYlBu_r', 'RdYlGn', 'RdYlGn_r', 'Reds', 'Reds_r', 'Set1', 'Set1_r', 'Set2', 'Set2_r', 'Set3', 'Set3_r', 'Spectral', 'Spectral_r', 'Wistia', 'Wistia_r', 'YlGn', 'YlGnBu', 'YlGnBu_r', 'YlGn_r', 'YlOrBr', 'YlOrBr_r', 'YlOrRd', 'YlOrRd_r', 'afmhot', 'afmhot_r', 'autumn', 'autumn_r', 'binary', 'binary_r', 'bone', 'bone_r', 'brg', 'brg_r', 'bwr', 'bwr_r', 'cividis', 'cividis_r', 'cool', 'cool_r', 'coolwarm', 'coolwarm_r', 'copper', 'copper_r', 'crest', 'crest_r', 'cubehelix', 'cubehelix_r', 'flag', 'flag_r', 'flare', 'flare_r', 'gist_earth', 'gist_earth_r', 'gist_gray', 'gist_gray_r', 'gist_heat', 'gist_heat_r', 'gist_ncar', 'gist_ncar_r', 'gist_rainbow', 'gist_rainbow_r', 'gist_stern', 'gist_stern_r', 'gist_yarg', 'gist_yarg_r', 'gnuplot', 'gnuplot2', 'gnuplot2_r', 'gnuplot_r', 'gray', 'gray_r', 'hot', 'hot_r', 'hsv', 'hsv_r', 'icefire', 'icefire_r', 'inferno', 'inferno_r', 'jet', 'jet_r', 'magma', 'magma_r', 'mako', 'mako_r', 'nipy_spectral', 'nipy_spectral_r', 'ocean', 'ocean_r', 'pink', 'pink_r', 'plasma', 'plasma_r', 'prism', 'prism_r', 'rainbow', 'rainbow_r', 'rocket', 'rocket_r', 'seismic', 'seismic_r', 'spring', 'spring_r', 'summer', 'summer_r', 'tab10', 'tab10_r', 'tab20', 'tab20_r', 'tab20b', 'tab20b_r', 'tab20c', 'tab20c_r', 'terrain', 'terrain_r', 'turbo', 'turbo_r', 'twilight', 'twilight_r', 'twilight_shifted', 'twilight_shifted_r', 'viridis', 'viridis_r', 'vlag', 'vlag_r', 'winter', 'winter_r'

#disp.color='blue'
plt.show()

在这里插入图片描述
数据集来源:https://www.heywhale.com/home/user/profile/6535165d9217caa11b5ee5b3/overview

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/1809506.html

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!

相关文章

表的设计与查询

目录 一、表的设计 1.第一范式&#xff08;一对一&#xff09; 定义&#xff1a; 示例&#xff1a; 2.第二范式&#xff08;一对多&#xff09; 定义&#xff1a; 要求&#xff1a; 示例&#xff1a; 3.第三范式&#xff08;多对多&#xff09; 定义&#xff1a; 要求…

Bio-Info每日一题:Rosalind-06-Counting Point Mutations

&#x1f389; 进入生物信息学的世界&#xff0c;与Rosalind一起探索吧&#xff01;&#x1f9ec; Rosalind是一个在线平台&#xff0c;专为学习和实践生物信息学而设计。该平台提供了一系列循序渐进的编程挑战&#xff0c;帮助用户从基础到高级掌握生物信息学知识。无论你是初…

每日算法——归并排序

什么是归并排序 归并排序是一种分治算法。它将数组不断地分成两半&#xff0c;对每一半进行排序&#xff0c;然后再将排序好的两半合并起来。通过不断重复这个过程&#xff0c;最终得到完全排序的数组。 归并排序的注意点&#xff1a; 空间复杂度&#xff1a;归并排序需要额…

javascript动态绑定

介绍 先来看看ai的解释 动态绑定机制是面向对象编程中的一个核心概念&#xff0c;特别是在Java这样的语言中。它允许在运行时根据对象的实际类型来决定调用哪个方法&#xff0c;而不是在编译时。这是多态性的关键特性之一。 在Java中&#xff0c;动态绑定是通过方法调用和方法…

C#——枚举类型详情

枚举类型 枚举类型&#xff08;也可以称为“枚举器”&#xff09;由一组具有独立标识符&#xff08;名称&#xff09;的整数类型常量构成&#xff0c;在 C# 中枚举类型不仅可以在类或结构体的内部声明&#xff0c;也可以在类或结构体的外部声明&#xff0c;默认情况下枚举类型…

ViT:2 理解CLIP

大模型技术论文不断&#xff0c;每个月总会新增上千篇。本专栏精选论文重点解读&#xff0c;主题还是围绕着行业实践和工程量产。若在某个环节出现卡点&#xff0c;可以回到大模型必备腔调或者LLM背后的基础模型新阅读。而最新科技&#xff08;Mamba,xLSTM,KAN&#xff09;则提…

大模型基础——从零实现一个Transformer(2)

大模型基础——从零实现一个Transformer(1) 一、引言 上一章主要实现了一下Transformer里面的BPE算法和 Embedding模块定义 本章主要讲一下 Transformer里面的位置编码以及多头注意力 二、位置编码 2.1正弦位置编码(Sinusoidal Position Encoding) 其中&#xff1a; pos&…

linux中xterm窗口怎么调整字体大小

需求&#xff1a;打开的xterm窗口字体比较小&#xff0c;怎么才能调整字体大小&#xff0c;打开的大写&#xff1a; 解决方法&#xff1a; 在home目录下搞一个设置文件 .Xresource&#xff0c;里面内容如下 然后把设置文件添加到 .tcshrc 文件中生效 这样重新打开的xterm字…

MySQL数据库(二)和java复习

一.MySQL数据库学习(二) (一).DQL查询数据 DQL&#xff08;Data Query Language&#xff09;是用于从数据库中检索数据的语言。常见的 DQL 语句包括 SELECT、FROM、WHERE、GROUP BY、HAVING 和 ORDER BY 等关键字&#xff0c;用于指定要检索的数据、数据源、过滤条件、分组方…

《编程小白变大神:DjangoBlog带你飞越代码海洋》

还在为你的博客加载速度慢而烦恼&#xff1f;DjangoBlog性能优化大揭秘&#xff0c;让你的网站速度飞跃提升&#xff01;本文将带你深入了解缓存策略、数据库优化、静态文件处理等关键技术&#xff0c;更有Gunicorn和Nginx的黄金搭档&#xff0c;让你的博客部署如虎添翼。无论你…

助力高考,一组彩色的文字

1、获取文本内容 首先&#xff0c;获取每个<div>元素的文本内容&#xff0c;并清空其内部HTML&#xff08;innerHTML ""&#xff09;。 2、创建<span>元素 然后&#xff0c;它遍历文本的每个字符&#xff0c;为每个字符创建一个新的<span>元素…

《python程序语言设计》2018版第5章第36题改造4.17 石头 剪刀 布某一方超过2次就结束。

代码编写记录 2024.05.04 05.36.01version 换一个什么数代替剪子 我先建立一个函数judgement condition 石头3 剪子2 布1 如何构建一个循环进行的架构&#xff0c;是我们最需要的想法 循环以什么条件开始呢 是小于2个还是大于2个。 guess_num random.randint(1, 3) computer…

nginx优化与防盗链【☆☆☆】

目录 一、用户层面的优化 1、隐藏版本号 方法一&#xff1a;修改配置文件 方法二&#xff1a;修改源码文件&#xff0c;重新编译安装 2、修改nginx用户与组 3、配置nginx网页缓存时间 4、nginx的日志切割 5、配置nginx实现连接超时 6、更改nginx运行进程数 7、开启网…

IPv4 子网掩码计算器—python代码实现

今天聊一下&#xff0c;我用python和vscode工具实现一个IPv4计算器的一些思路&#xff0c;以及使用Python编写IPv4计算器一些好处&#xff1f; 首先&#xff0c;一、Python语法简洁易读&#xff0c;便于理解和维护&#xff0c;即使对编程不熟悉的用户也能快速了解代码逻辑。其…

阿里通义千问 Qwen2 大模型开源发布

阿里通义千问 Qwen2 大模型开源发布 Qwen2 系列模型是 Qwen1.5 系列模型的重大升级。该系列包括了五个不同尺寸的预训练和指令微调模型&#xff1a;Qwen2-0.5B、Qwen2-1.5B、Qwen2-7B、Qwen2-57B-A14B 以及 Qwen2-72B。 在中文和英文的基础上&#xff0c;Qwen2 系列的训练数…

已解决Error || RuntimeError: size mismatch, m1: [32 x 100], m2: [500 x 10]

已解决Error || RuntimeError: size mismatch, m1: [32 x 100], m2: [500 x 10] 原创作者&#xff1a; 猫头虎 作者微信号&#xff1a; Libin9iOak 作者公众号&#xff1a; 猫头虎技术团队 更新日期&#xff1a; 2024年6月6日 博主猫头虎的技术世界 &#x1f31f; 欢迎来…

情景题之小明的Linux实习之旅:linux实战练习1(下)【基础命令,权限修改,日志查询,进程管理...】

小明的Linux实习之旅&#xff1a;基础指令练习情景练习题下 前景提要小明是怎么做的场景1&#xff1a;初识Linux&#xff0c;创建目录和文件场景2&#xff1a;权限管理&#xff0c;小明的权限困惑场景3&#xff1a;打包与解压&#xff0c;小明的备份操作场景4&#xff1a;使用G…

分享一个 .NET Core Console 项目中应用 NLog 写日志的详细例子

前言 日志在软件开发中扮演着非常重要的角色&#xff0c;通常我们用它来记录应用程序运行时发生的事件、错误信息、警告以及其他相关信息&#xff0c;帮助在调试和排查问题时更快速地定位和解决 Bug。 通过日志&#xff0c;我们可以做到&#xff1a; 故障排除和调试&#xff…

让GNSSRTK不再难【第一天】

第1讲 GNSS系统组成以及应用 北斗导航科普动画_哔哩哔哩_bilibili 1.1 GNSS系统 1.1.1 基本概念 全球卫星导航系统&#xff08;Global Navigation Satellite System, GNSS&#xff09;&#xff0c;是能在地球表面或近地空间的任何地点为用户提供全天候的三维坐标、速度以及…