一、 切换前预检查
1. dg_precheck_main_v1.4.sh
#!/bin/bash
#**********************************************************************************
# Author: Hehuyi_In
# Date: 2022年06月16日
# FileName: dg_precheck_main_v1.4.sh
#
# For sys user, execute the script directly.
# For other sysdba privileged users, you need to manually synchronize the password file to standby server in versions below 12.2.
# The script cannot be executed on the Linux 5 os.
#
# Example:
# ./dg_precheck_main_v1.4.sh &> dg_precheck.log
# cat dg_precheck.log | grep --color=no -E "Warning|Error" ,or less -R dg_precheck.log
#
#
# Description:
# 2022-06-16 v_1.0 just get and print the infomation we want
# 2022-06-17 v_1.1 auto check the database role
# 2022-06-22 v_1.2 adjust the output format
# 2022-06-28 v_1.3 add compare function, compare two nodes at a time
# 2022-06-28 split to two scripts dg_precheck_main_v1.0.sh and dg_precheck_func_v1.0.sh
# 2022-07-06 dg_precheck_main_v1.1 check os info
# 2022-07-12 dg_precheck_main_v1.2 adjust the output format
# 2022-07-13 dg_precheck_main_v1.3 adjust os info check function
# 2022-10-19 dg_precheck_main_v1.4 add checking items: dblink,standby_file_management,flashback_on,db_flashback_retention_target,db_recovery_file_dest,db_recovery_file_dest_size
#**********************************************************************************
source ./dg_precheck_func_v1.4.sh
# TNS列表
TNS_LIST=(test_dg test)
# 连接信息
DBUSER='sys/"xxxxx"'
# --------------------------- 检查并设置TNS_LIST中各tnsname对应数据库角色 ----------------------------------
auto_check_db_role
# 字典定义
declare -A PRIMARY_TNS_DIC
declare -A STANDBY_TNS_DIC
echo -e "--------------------------------------------------------------------------------"
echo -e "- 各节点主要参数检查 "
echo -e "--------------------------------------------------------------------------------\n"
# 内存参数 及 compatible 检查和对比
PARAMS_LIST=(sga_max_size sga_target pga_aggregate_target shared_pool_size db_cache_size streams_pool_size compatible standby_file_management)
for PARAM in ${PARAMS_LIST[@]};
do
PRIMARY_TNS_DIC+=([$PARAM]=`get_params_value $PRIMARY_TNS $PARAM`)
STANDBY_TNS_DIC+=([$PARAM]=`get_params_value $STANDBY_TNS $PARAM`)
# 主从库间对比参数是否相等
compare_value ${PRIMARY_TNS_DIC[$PARAM]} ${STANDBY_TNS_DIC[$PARAM]} $PARAM
done
# 获取其他参数值
PARAMS_LIST=(log_archive_max_processes db_create_file_dest db_file_name_convert log_file_name_convert fal_client fal_server log_archive_config db_recovery_file_dest db_recovery_file_dest_size db_flashback_retention_target)
for PARAM in ${PARAMS_LIST[@]};
do
PRIMARY_TNS_DIC+=([$PARAM]=`get_params_value $PRIMARY_TNS $PARAM`)
STANDBY_TNS_DIC+=([$PARAM]=`get_params_value $STANDBY_TNS $PARAM`)
done
echo -e "\n"
echo -e "--------------------------------------------------------------------------------"
echo -e "- 各节点主要文件数检查 "
echo -e "--------------------------------------------------------------------------------\n"
PRIMARY_TNS_DIC+=([redo_count]=`get_file_count $PRIMARY_TNS "LOG"`)
PRIMARY_TNS_DIC+=([standby_count]=`get_file_count $PRIMARY_TNS "STANDBY_LOG"`)
PRIMARY_TNS_DIC+=([tempfile_count]=`get_file_count $PRIMARY_TNS "TEMPFILE"`)
STANDBY_TNS_DIC+=([redo_count]=`get_file_count $STANDBY_TNS "LOG"`)
STANDBY_TNS_DIC+=([standby_count]=`get_file_count $STANDBY_TNS "STANDBY_LOG"`)
STANDBY_TNS_DIC+=([tempfile_count]=`get_file_count $STANDBY_TNS "TEMPFILE"`)
PRIMARY_TNS_DIC+=([datafile_online_count]=`get_datafile_online_count $PRIMARY_TNS`)
STANDBY_TNS_DIC+=([datafile_online_count]=`get_datafile_online_count $STANDBY_TNS`)
compare_value ${PRIMARY_TNS_DIC[redo_count]} ${STANDBY_TNS_DIC[redo_count]} "redo_count"
compare_value ${PRIMARY_TNS_DIC[standby_count]} ${STANDBY_TNS_DIC[standby_count]} "standby_count"
compare_value ${PRIMARY_TNS_DIC[tempfile_count]} ${STANDBY_TNS_DIC[tempfile_count]} "tempfile_count"
compare_value ${PRIMARY_TNS_DIC[datafile_online_count]} ${STANDBY_TNS_DIC[datafile_online_count]} "datafile_online_count"
echo -e "\n"
echo -e "--------------------------------------------------------------------------------"
echo -e "- 从节点redo日志状态检查 "
echo -e "--------------------------------------------------------------------------------\n"
# 只有从库需要检查
STANDBY_TNS_DIC+=([redo_status_count]=`get_redo_status_count $STANDBY_TNS`)
check_redo_status_count ${STANDBY_TNS_DIC[db_create_file_dest]} ${STANDBY_TNS_DIC[log_file_name_convert]} ${STANDBY_TNS_DIC[redo_status_count]}
echo -e "\n"
echo -e "--------------------------------------------------------------------------------"
echo -e "- 闪回开启及保留时间检查 "
echo -e "--------------------------------------------------------------------------------\n"
# --------------------------- flashback_on 与 db_flashback_retention_target -------------------------------
PRIMARY_TNS_DIC+=([flashback_on]=`get_flashback_on $PRIMARY_TNS`)
echo -e "- primary database "
compare_value ${PRIMARY_TNS_DIC[flashback_on]} "NO" "flashback_on"
compare_value ${PRIMARY_TNS_DIC[db_flashback_retention_target]} "4320" "db_flashback_retention_target"
STANDBY_TNS_DIC+=([flashback_on]=`get_flashback_on $STANDBY_TNS`)
echo -e "- standby database "
compare_value ${STANDBY_TNS_DIC[flashback_on]} "YES" "flashback_on"
compare_value ${STANDBY_TNS_DIC[db_flashback_retention_target]} "4320" "db_flashback_retention_target"
echo -e "\n"
echo -e "--------------------------------------------------------------------------------"
echo -e "- FRA区设置检查 "
echo -e "--------------------------------------------------------------------------------\n"
# --------------------------- db_recovery_file_dest 与 db_recovery_file_dest_size -------------------------------
# 主从库设置是否相同
compare_value ${PRIMARY_TNS_DIC[db_recovery_file_dest]} ${STANDBY_TNS_DIC[db_recovery_file_dest]} "db_recovery_file_dest"
compare_value ${PRIMARY_TNS_DIC[db_recovery_file_dest_size]} ${STANDBY_TNS_DIC[db_recovery_file_dest_size]} "db_recovery_file_dest_size"
echo -e "\n"
echo -e "--------------------------------------------------------------------------------"
echo -e "- 各节点主从相关参数检查 "
echo -e "--------------------------------------------------------------------------------\n"
# --------------------------- fal_client 与 fal_server ---------------------------
# fal_server的值应该包括对方,因为${PRIMARY_TNS_DIC[fal_server]}返回值包含逗号,因此需要在传参时加"",避免参数传入函数后被截断
# 主节点参数值
include_value $PRIMARY_TNS "fal_server" "${PRIMARY_TNS_DIC[fal_server]}" $STANDBY_TNS
# 从节点参数值
include_value $STANDBY_TNS "fal_server" "${STANDBY_TNS_DIC[fal_server]}" $PRIMARY_TNS
# fal_client的值应该包含自己
# 主节点参数值
include_value $PRIMARY_TNS "fal_client" "${PRIMARY_TNS_DIC[fal_client]}" $PRIMARY_TNS
# 从节点参数值
include_value $STANDBY_TNS "fal_client" "${STANDBY_TNS_DIC[fal_client]}" $STANDBY_TNS
# --------------------------- log_archive_config -------------------------------
# 主节点参数值
include_value $PRIMARY_TNS "log_archive_config" ${PRIMARY_TNS_DIC[log_archive_config]} $STANDBY_TNS
# 从节点参数值
include_value $STANDBY_TNS "log_archive_config" ${STANDBY_TNS_DIC[log_archive_config]} $PRIMARY_TNS
# --------------------------- log_archive_max_processes -------------------------------
check_log_archive_max_processes $PRIMARY_TNS ${PRIMARY_TNS_DIC[log_archive_max_processes]}
check_log_archive_max_processes $STANDBY_TNS ${STANDBY_TNS_DIC[log_archive_max_processes]}
# --------------------------- log_archive_dest_n 与 log_archive_dest_state_n -------------------------------
check_archive_dest $PRIMARY_TNS
check_archive_dest $STANDBY_TNS
check_archive_dest_state $PRIMARY_TNS
check_archive_dest_state $STANDBY_TNS
# --------------------------- OMF 与 db_file_name_convert,log_file_name_convert -------------------------------
check_omf_and_convert_params $PRIMARY_TNS ${PRIMARY_TNS_DIC[db_create_file_dest]} ${PRIMARY_TNS_DIC[db_file_name_convert]} ${PRIMARY_TNS_DIC[log_file_name_convert]}
check_omf_and_convert_params $STANDBY_TNS ${STANDBY_TNS_DIC[db_create_file_dest]} ${STANDBY_TNS_DIC[db_file_name_convert]} ${STANDBY_TNS_DIC[log_file_name_convert]}
echo -e "\n"
echo -e "--------------------------------------------------------------------------------"
echo -e "- 从节点延迟 MRP进程与触发器检查 "
echo -e "--------------------------------------------------------------------------------\n"
check_db_lag $STANDBY_TNS "transport lag"
check_db_lag $STANDBY_TNS "apply lag"
check_mrp_process $STANDBY_TNS
check_mrp_trigger $PRIMARY_TNS
check_mrp_trigger $STANDBY_TNS
echo -e "\n"
echo -e "--------------------------------------------------------------------------------"
echo -e "- dblink检查 "
echo -e "--------------------------------------------------------------------------------\n"
# --------------------------- dblink -------------------------------
# 检查主库即可
check_dblink $PRIMARY_TNS
echo -e "\n"
2. dg_precheck_func_v1.4.sh
#!/bin/bash
#**********************************************************************************
# Author: Hehuyi_In
# Date: 2022年06月16日
# FileName: dg_precheck_func_v1.4.sh
#
# Description:
# 2022-06-16 v_1.0 just get and print the infomation we want
# 2022-06-17 v_1.1 auto check the database role
# 2022-06-22 v_1.2 adjust the output format
# 2022-06-28 v_1.3 add compare function, compare two nodes at a time
# 2022-06-28 split to two scripts dg_precheck_main_v1.0.sh and dg_precheck_func_v1.0.sh
# 2022-07-06 dg_precheck_func_v1.1 check os info
# 2022-07-12 dg_precheck_func_v1.2 adjust the output format
# 2022-07-13 dg_precheck_func_v1.3 adjust os info check function
# 2022-10-19 dg_precheck_func_v1.4 add checking items: dblink,standby_file_management,flashback_on,db_flashback_retention_target,db_recovery_file_dest,db_recovery_file_dest_size
#**********************************************************************************
# 颜色定义
# 红色
ERROR_COLOR='\e[1;31m'
# 绿色
NARMAL_COLOR='\e[1;32m'
# 黄色
WARNING_COLOR='\e[1;33m'
# 重置
RESET_COLOR='\e[0m'
# ------------------------------------------------------------------------
# 检查TNS_LIST中各tnsname对应数据库角色
auto_check_db_role() {
for TNS in ${TNS_LIST[@]}; do
DB_ROLE=$(
sqlplus -s $DBUSER@$TNS as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
select database_role from v\$database;
EOF
)
# 主库,则赋值给 PRIMARY_TNS
if [[ "$DB_ROLE" = "PRIMARY" ]]; then
PRIMARY_TNS=$TNS
# 从库,则赋值给 STANDBY_TNS
elif [[ "$DB_ROLE" = "PHYSICAL STANDBY" ]]; then
STANDBY_TNS=$TNS
fi
done
if [[ "$PRIMARY_TNS" = "" ]]; then
echo -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! There is no primary database tns ${RESET_COLOR}"
exit 1
elif [[ "$STANDBY_TNS" = "" ]]; then
echo -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! There is no standby database tns ${RESET_COLOR}"
exit 1
fi
}
# ----------------------------------获取操作系统信息--------------------------------------
# 参数1:TNS连接串
# get_cpu_info() {
# sqlplus -s $DBUSER@$1 as sysdba <<EOF
# !cat /proc/cpuinfo | grep "processor" | wc -l
# EOF
# }
# get_memory_info() {
# sqlplus -s $DBUSER@$1 as sysdba <<EOF
# !free -h | grep 'Mem:' | awk '{print \$2}'
# EOF
# }
# get_disk_info() {
# sqlplus -s $DBUSER@$1 as sysdba <<EOF
# !df -m | grep -v 'Filesystem' | awk '{sum+=\$2} END {print sum}'
# EOF
# }
# 获取Oracle参数值
# 参数1:TNS连接串,参数2:待检查参数名(不区分大小写)
get_params_value() {
if [[ "$1" != "" && "$2" != "" ]]; then
sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
select value from v\$parameter where upper(name)=upper('$2');
EOF
else
echo -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! get_params_value() parameter 1 and 2 can't be null ${RESET_COLOR}"
exit 1
fi
}
# log_archive_max_processes 参数检查
# 参数1:TNS连接串,参数2:待检查参数值
check_log_archive_max_processes() {
if [[ "$1" != "" && "$2" != "" ]]; then
if [[ "$2" -ge 4 ]]; then
printf "${NARMAL_COLOR}%-10s | Success | log_archive_max_processes >= 4${RESET_COLOR}\n" $1
else
printf "${WARNING_COLOR}%-10s | Warning | log_archive_max_processes < 4${RESET_COLOR}\n" $1
fi
else
echo "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! check_log_archive_max_processes() parameter 1 and 2 can't be null ${RESET_COLOR}"
exit 1
fi
}
# 检查log_archive_dest_n参数,是否有设置DELAY关键字(延迟从库)
# 参数1:TNS连接串
check_archive_dest() {
if [[ "$1" != "" ]]; then
COUNT=$(
sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
select count(*) FROM v\$parameter where name like 'log_archive_dest%' and upper(value) like '%DELAY%';
EOF
)
if [[ "$COUNT" -eq 0 ]]; then
printf "${NARMAL_COLOR}%-10s | Success | There is no DELAY attribute in log_archive_dest_n${RESET_COLOR}\n" $1
else
printf "${WARNING_COLOR}%-10s | Warning | There is DELAY attribute in log_archive_dest_n${RESET_COLOR}\n" $1
fi
else
echo -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! check_archive_dest() parameter 1 can't be null ${RESET_COLOR}"
exit 1
fi
}
# 检查log_archive_dest_state_n参数,是否有设置DEFER
# 参数1:TNS连接串
check_archive_dest_state() {
if [[ "$1" != "" ]]; then
COUNT=$(
sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
select count(*) FROM v\$parameter where name like 'log_archive_dest_state%' and upper(value)='DEFER';
EOF
)
if [[ "$COUNT" -eq 0 ]]; then
printf "${NARMAL_COLOR}%-10s | Success | There is no DEFER value in log_archive_dest_state_n${RESET_COLOR}\n" $1
else
printf "${WARNING_COLOR}%-10s | Warning | There is DEFER value in log_archive_dest_state_n${RESET_COLOR}\n" $1
fi
else
echo -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! check_archive_dest_state() parameter 1 can't be null ${RESET_COLOR}"
exit 1
fi
}
# OMF与convert参数检查
# 参数1:TNS连接串;参数2:db_create_file_dest参数值;参数3:db_file_name_convert参数值;参数4:log_file_name_convert参数值
check_omf_and_convert_params() {
if [[ "$2" != "" ]]; then
printf "${NARMAL_COLOR}%-10s | Success | db_create_file_dest parameter was set${RESET_COLOR}\n" $1
else
if [[ "$3" != "" && "$4" != "" ]]; then
printf "${NARMAL_COLOR}%-10s | Success | dbfile and logfile convert parameters were set${RESET_COLOR}\n" $1
else
printf "${WARNING_COLOR}%-10s | Warning | db_create_file_dest,dbfile and logfile convert parameters were set${RESET_COLOR}\n" $1
fi
fi
}
# 获取各类文件数
# 参数1:TNS连接串,参数2:待查询视图名(V$LOG V$STANDBY_LOG V$TEMPFILE)
get_file_count() {
if [[ "$1" != "" ]]; then
sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
SELECT count(*) FROM V\$$2;
EOF
else
echo -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! get_file_count() parameter 1 and 2 can't be null ${RESET_COLOR}"
exit 1
fi
}
# 获取指定状态的redo文件数
# 参数1:TNS连接串
get_redo_status_count() {
if [[ "$1" != "" ]]; then
sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
SELECT count(*) FROM V\$LOG WHERE STATUS NOT IN ('UNUSED', 'CLEARING','CLEARING_CURRENT');
EOF
else
echo -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! get_redo_status_count() parameter 1 can't be null ${RESET_COLOR}"
exit 1
fi
}
# 检查指定状态的redo文件数
# 参数1:db_create_file_dest参数值;参数2:log_file_name_convert参数值;参数3:${STANDBY_TNS_DIC[redo_status_count]}
check_redo_status_count() {
# 如果设置了OMF或者LOG_FILE_NAME_CONVERT,则可以跳过此步骤
if [[ "$1" != "" || "$2" != "" ]]; then
printf "${NARMAL_COLOR}%-30s | %-8s | There is no need to check this item${RESET_COLOR}\n" redo_status_count Success
else
# 否则,指定状态的redo文件数应该为0
if [[ "$3" -eq 0 ]]; then
printf "${WARNING_COLOR}%-30s | %-8s | redo_status_count value is 0${RESET_COLOR}\n" redo_status_count Warning
else
# 否则,发出告警
printf "${WARNING_COLOR}%-30s | %-8s | redo_status_count value is not 0${RESET_COLOR}\n" $1 Warning
fi
fi
}
# 获取online状态的数据文件数量
# 参数1:TNS连接串
get_datafile_online_count() {
if [[ "$1" != "" ]]; then
sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
SELECT count(*) FROM V\$DATAFILE WHERE STATUS='ONLINE';
EOF
else
echo -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! get_datafile_online_count() parameter 1 can't be null ${RESET_COLOR}"
exit 1
fi
}
# 获取数据库闪回功能开启状态
# 参数1:TNS连接串
get_flashback_on() {
if [[ "$1" != "" ]]; then
sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
SELECT FLASHBACK_ON FROM V\$DATABASE;
EOF
else
echo -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! get_flashback_on() parameter 1 can't be null ${RESET_COLOR}"
exit 1
fi
}
# mrp触发器检查
# 参数1:TNS连接串
check_mrp_trigger() {
if [[ "$1" != "" ]]; then
COUNT=$(
sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
select count(*) FROM dba_triggers where trigger_name in ('AUTO_START_STANDBY_MRP','AUTO_START_STANDBY_MRP_PDB');
EOF
)
if [[ "$COUNT" -eq 0 ]]; then
printf "${ERROR_COLOR}%-10s | %-8s | There is no MRP trigger${RESET_COLOR}\n" $1 Error
else
printf "${NARMAL_COLOR}%-10s | %-8s | MRP trigger was found${RESET_COLOR}\n" $1 Success
fi
else
echo -e "${ERROR_COLOR}$(date "+%Y-%m-%d %H:%M:%S") | Error! check_mrp_trigger() parameter 1 can't be null ${RESET_COLOR}"
exit 1
fi
}
# mrp进程检查
# 参数1:TNS连接串
check_mrp_process() {
if [[ "$1" != "" ]]; then
COUNT=$(
sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
select count(*) FROM v\$managed_standby where process like 'MRP%';
EOF
)
if [[ "$COUNT" -eq 0 ]]; then
printf "${ERROR_COLOR}%-10s | %-8s | There is no MRP process${RESET_COLOR}\n" $1 Error
else
printf "${NARMAL_COLOR}%-10s | %-8s | MRP process was found${RESET_COLOR}\n" $1 Success
fi
else
echo -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! check_mrp_process() parameter 1 can't be null ${RESET_COLOR}"
exit 1
fi
}
# 检查从库延迟(transport lag,apply lag )
# 参数1:TNS连接串,参数2:待检查的lag类型
check_db_lag(){
if [[ "$1" != "" && "$2" != "" ]]
then
DB_LAG=`sqlplus -s $DBUSER@$1 as sysdba << EOF
set heading off feedback off pagesize 0 verify off echo off
select (extract(second from to_dsinterval(value)) +
extract(minute from to_dsinterval(value)) * 60 +
extract(hour from to_dsinterval(value)) * 60 * 60 +
extract(day from to_dsinterval(value)) * 60 * 60 * 24) as retvalue
from v\\$dataguard_stats
where name = '$2';
EOF`
#去除左侧空格
DB_LAG=`eval echo $DB_LAG`
if [[ "$DB_LAG" -gt 30 ]]
then
printf "${WARNING_COLOR}%-10s | %-8s | the $2 is too large($DB_LAG seconds)${RESET_COLOR}\n" $1 Warning
else
printf "${NARMAL_COLOR}%-10s | %-8s | the $2 is $DB_LAG second(s)${RESET_COLOR}\n" $1 Success
fi
else
echo -e "${ERROR_COLOR} $(date "+%Y-%m-%d %H:%M:%S") | Error! check_db_lag() parameter 1 and 2 can't be null ${RESET_COLOR}"
exit 1
fi
}
# dblink检查
# 参数1:TNS连接串
check_dblink() {
if [[ "$1" != "" ]]; then
COUNT=$(
sqlplus -s $DBUSER@$1 as sysdba <<EOF
set heading off feedback off pagesize 0 verify off echo off
select count(*) FROM dba_db_links;
EOF
)
if [[ "$COUNT" -eq 0 ]]; then
printf "${NARMAL_COLOR}%-10s | %-8s | There is no dblink in our database${RESET_COLOR}\n" $1 Success
else
printf "${WARNING_COLOR}%-10s | %-8s | dblink was found,please check the tns file and firewall${RESET_COLOR}\n" $1 Warning
fi
else
echo -e "${ERROR_COLOR}$(date "+%Y-%m-%d %H:%M:%S") | Error! check_mrp_trigger() parameter 1 can't be null ${RESET_COLOR}"
exit 1
fi
}
# 对比两个值是否相等
# 参数1:待比较值1,参数2:待比较值2
compare_value() {
if [[ "$1" = "$2" ]]; then
printf "${NARMAL_COLOR}%-30s | %-8s | The value is $1 ${RESET_COLOR}\n" $3 Success
else
printf "${WARNING_COLOR}%-30s | %-8s | The value 1 is: $1,but value 2 is: $2 ${RESET_COLOR}\n" $3 Warning
fi
}
# 待判断参数值中是否包含给定的子字符串
# 参数1:TNS连接串;参数2:待判断参数名;参数3:待判断参数值;参数3:子字符串
# 例如:include_value $PRIMARY_TNS "fal_server" "${PRIMARY_TNS_DIC[fal_server]}" $STANDBY_TNS
include_value(){
# 过滤结果
cnt=`echo $3 | grep $4 | wc -l`
if [[ "$cnt" -gt 0 ]]; then
printf "${NARMAL_COLOR}%-10s | %-8s| %-20s | The value is $3,include $4 ${RESET_COLOR}\n" $1 Success $2
else
printf "${WARNING_COLOR}%-10s | %-8s| %-20s | The value is $3,doesn't include $4 ${RESET_COLOR}\n" $1 Warning $2
fi
}
3. 执行效果
二、 执行切换
1. dg_switchover_v1.4.sh
#!/bin/bash
#**********************************************************************************
# Author: Hehuyi_In
# Date: 2022年06月10日
# FileName: dg_switchover_v1.4.sh
#
# For sys user, execute the script directly.
# For other sysdba privileged users, you need to manually synchronize the password file to standby server and chown file mode to 640 in versions below 12.2.
#
# Example: ./dg_switchover_v1.4.sh or ./dg_switchover_v1.4.sh &>> dg_switchover.log
# Description:
# 2022-06-10 v_1.0 only oracle dataguard switchover
# 2022-06-10 v_1.1 check instance status and db role after switchover.
# 2022-06-10 v_1.2 check transport lag,apply lag,switchover_status before switchover.
# 2022-06-13 v_1.3 add logging for main steps
# 2022-06-14 v_1.4 auto check and select the target primary and standby database.
#**********************************************************************************
# 连接信息
DBUSER='sys/xxxxx'
# TNS列表,越靠前的被选为目标主库的概率较高
TNS_LIST=(mrptest mrptest_dg mrpuat)
# 列表元素个数检查,目前仅支持一主一从或一主两从
if [[ ${#TNS_LIST[@]} -lt 2 || ${#TNS_LIST[@]} -gt 3 ]]
then
echo "`date "+%Y-%m-%d %H:%M:%S"` | Error! The number of standby db is less than 1 or greater than 2."
fi
# ------------------------------------------------------------------------
# 判断 TNS_LIST 中每个连接串角色,并设置各切换目标参数
# 若为主库,则作为TARGET_STANDBY_TNS
# 若为从库,则检查lag。如果lag均为0,则作为TARGET_PRIMARY_TNS,剩余的从库作为OUTLOOKING_STANDBY_TNS
# 如果所有从库lag都大于0,则报错退出。
auto_set_target_db_tns(){
for TNS in ${TNS_LIST[@]}
do
# 获取TNS对应db角色
DB_ROLE=`check_db_role $TNS`
# 若为主库,则赋值给TARGET_STANDBY_TNS
if [[ "$DB_ROLE" = "PRIMARY" ]]
then
TARGET_STANDBY_TNS=$TNS
# 若为从库
elif [[ "$DB_ROLE" = "PHYSICAL STANDBY" ]]
then
# 如果TARGET_PRIMARY_TNS不为空,说明已经设置过了目标主库,直接设置旁观从库即可
if [[ "$TARGET_PRIMARY_TNS" != "" ]]
then
OUTLOOKING_STANDBY_TNS=$TNS
else
# 若TARGET_PRIMARY_TNS为空,则需要再检查lag
DB_TRAN_LAG=`check_db_lag $TNS "transport lag"`
DB_APPLY_LAG=`check_db_lag $TNS "apply lag"`
# 如果lag均为0,则赋值给TARGET_PRIMARY_TNS
if [[ "$DB_TRAN_LAG" -eq 0 && "$DB_APPLY_LAG" -eq 0 ]]
then
TARGET_PRIMARY_TNS=$TNS
fi
fi
fi
done
# 设置后检查,若TARGET_PRIMARY_TNS或TARGET_STANDBY_TNS为空,直接报错退出
if [[ "$TARGET_PRIMARY_TNS" = "" || "$TARGET_STANDBY_TNS" = "" ]]
then
echo "`date "+%Y-%m-%d %H:%M:%S"` | Error! TARGET_PRIMARY_TNS and TARGET_STANDBY_TNS can't be null."
exit 1
fi
}
# ------------------------------------------------------------------------
# 检查从库延迟(transport lag,apply lag )
# 参数1为db对应的tnsname,参数2为待检查的lag
check_db_lag(){
if [[ "$1" != "" && "$2" != "" ]]
then
sqlplus -s $DBUSER@$1 as sysdba << EOF
set heading off feedback off pagesize 0 verify off echo off
select (extract(second from to_dsinterval(value)) +
extract(minute from to_dsinterval(value)) * 60 +
extract(hour from to_dsinterval(value)) * 60 * 60 +
extract(day from to_dsinterval(value)) * 60 * 60 * 24) as retvalue
from v\$dataguard_stats
where name = '$2';
EOF
else
echo "`date "+%Y-%m-%d %H:%M:%S"` | Error! parameter 1,2 can't be null."
exit 1
fi
}
# ------------------------------------------------------------------------
# 检查切换前switchover_status
# 参数1为db对应的tnsname,参数2为待检查的状态
check_switchover_status(){
if [[ "$1" != "" && "$2" != "" ]]
then
STATUS=`sqlplus -s $DBUSER@$1 as sysdba << EOF
set heading off feedback off pagesize 0 verify off echo off
select switchover_status from v\\$database;
EOF`
if [[ "$STATUS" = $2 || "$STATUS" = "SESSIONS ACTIVE" ]]
then
echo "`date "+%Y-%m-%d %H:%M:%S"` | Success! switchover_status of $1 is $STATUS."
else
echo "`date "+%Y-%m-%d %H:%M:%S"` | Error! switchover_status of $1 is $STATUS (should be $2 or SESSIONS ACTIVE)."
exit 1
fi
else
echo "`date "+%Y-%m-%d %H:%M:%S"` | Error! parameter 1 and 2 can't be null."
exit 1
fi
}
# ------------------------------------------------------------------------
# 检查db角色,参数1为db对应的tnsname
check_db_role(){
if [[ "$1" != "" ]]
then
sqlplus -s $DBUSER@$1 as sysdba << EOF
set heading off feedback off pagesize 0 verify off echo off
select database_role from v\$database;
EOF
else
echo "`date "+%Y-%m-%d %H:%M:%S"` | Error! parameter 1 should be tnsname."
exit 1
fi
}
# ------------------------------------------------------------------------
# 检查切换后实例状态
# 参数1为db对应的tnsname
check_instance_status(){
if [[ "$1" != "" ]]
then
STATUS=`sqlplus -s $DBUSER@$1 as sysdba << EOF
set heading off feedback off pagesize 0 verify off echo off
select status from v\\$instance;
EOF`
if [[ "$STATUS" != "OPEN" ]]
then
echo "`date "+%Y-%m-%d %H:%M:%S"` | Error! instance status of $1 is $STATUS after switchover."
exit 1
else
echo "`date "+%Y-%m-%d %H:%M:%S"` | Success! instance status of $1 is $STATUS after switchover."
fi
else
echo "`date "+%Y-%m-%d %H:%M:%S"` | Error! parameter 1 can't be null."
exit 1
fi
}
# ------------------------------- main() -----------------------------------------
echo "------------------------------------------------------------------------"
echo "`date "+%Y-%m-%d %H:%M:%S"` | Precheck before switchover..."
# 判断 TNS_LIST 中每个连接串角色,并设置各切换目标参数
auto_set_target_db_tns
# 设置后各目标参数值
echo "`date "+%Y-%m-%d %H:%M:%S"` | TARGET_PRIMARY_TNS: $TARGET_PRIMARY_TNS"
echo "`date "+%Y-%m-%d %H:%M:%S"` | TARGET_STANDBY_TNS: $TARGET_STANDBY_TNS"
echo "`date "+%Y-%m-%d %H:%M:%S"` | OUTLOOKING_STANDBY_TNS: $OUTLOOKING_STANDBY_TNS"
# 切换前switchover_status检查
check_switchover_status $TARGET_STANDBY_TNS "TO STANDBY"
# 目标从库(原主库)执行:将原主库转切为从库
echo "`date "+%Y-%m-%d %H:%M:%S"` | Begin switchover $TARGET_STANDBY_TNS to standby database..."
sqlplus -s $DBUSER@$TARGET_STANDBY_TNS as sysdba << EOF
alter database commit to switchover to standby with session shutdown;
shutdown immediate
EOF
echo "`date "+%Y-%m-%d %H:%M:%S"` | Restarting $TARGET_STANDBY_TNS database..."
# 重新启动目标从库
sqlplus -s $DBUSER@$TARGET_STANDBY_TNS as sysdba << EOF
startup
EOF
# 切换后角色检查
DB_ROLE=`check_db_role $TARGET_STANDBY_TNS`
if [[ "$DB_ROLE" != "PHYSICAL STANDBY" ]];
then
echo "`date "+%Y-%m-%d %H:%M:%S"` | Error! db role of $TARGET_STANDBY_TNS is $DB_ROLE(should be PHYSICAL STANDBY) after switchover."
exit 1
else
echo "`date "+%Y-%m-%d %H:%M:%S"` | Success! db role of $TARGET_STANDBY_TNS is $DB_ROLE after switchover."
fi
# 切换后实例状态检查
check_instance_status $TARGET_STANDBY_TNS
echo "`date "+%Y-%m-%d %H:%M:%S"` | Finish switchover $TARGET_STANDBY_TNS to standby database."
# ------------------------------------------------------------------------
echo "`date "+%Y-%m-%d %H:%M:%S"` | Precheck before switchover $TARGET_PRIMARY_TNS to primary database..."
# 切换前switchover_status检查
check_switchover_status $TARGET_PRIMARY_TNS "TO PRIMARY"
# 目标主库(原从库)执行
# 将原从库切为主库
echo "`date "+%Y-%m-%d %H:%M:%S"` | Begin switchover $TARGET_PRIMARY_TNS to primary database..."
sqlplus -s $DBUSER@$TARGET_PRIMARY_TNS as sysdba << EOF
alter database commit to switchover to primary with session shutdown;
ALTER DATABASE OPEN;
EOF
# 切换后角色检查
DB_ROLE=`check_db_role $TARGET_PRIMARY_TNS`
if [[ "$DB_ROLE" != "PRIMARY" ]];
then
echo "`date "+%Y-%m-%d %H:%M:%S"` | Error! db role of $1 is $DB_ROLE(should be PRIMARY) after switchover."
exit 1
else
echo "`date "+%Y-%m-%d %H:%M:%S"` | Success! db role of $1 is $DB_ROLE after switchover."
fi
# 切换后实例状态检查
check_instance_status $TARGET_PRIMARY_TNS
# 切换后从库lag检查
DB_TRAN_LAG=`check_db_lag $TARGET_STANDBY_TNS "transport lag"`
DB_APPLY_LAG=`check_db_lag $TARGET_STANDBY_TNS "apply lag"`
echo "`date "+%Y-%m-%d %H:%M:%S"` | After switchover: DB_TRAN_LAG is $DB_TRAN_LAG second(s), DB_APPLY_LAG is $DB_APPLY_LAG second(s)"
echo "`date "+%Y-%m-%d %H:%M:%S"` | Finish switchover $TARGET_PRIMARY_TNS to primary database."
# ------------------------------------------------------------------------
# 如果有旁观从库
if [ "$OUTLOOKING_STANDBY_TNS" != "" ]; then
echo "`date "+%Y-%m-%d %H:%M:%S"` | Begin onlooking standby database setting..."
# 目标主库调整参数,指向旁观从库,先清空再设置,否则有可能报参数冲突
sqlplus -s $DBUSER@$TARGET_PRIMARY_TNS as sysdba << EOF
alter system set LOG_ARCHIVE_DEST_2='';
alter system set LOG_ARCHIVE_DEST_3='';
alter system set LOG_ARCHIVE_DEST_2='SERVICE=$TARGET_STANDBY_TNS ASYNC VALID_FOR=(ONLINE_LOGFILES,PRIMARY_ROLE) DB_UNIQUE_NAME=$TARGET_STANDBY_TNS';
alter system set LOG_ARCHIVE_DEST_3='SERVICE=$OUTLOOKING_STANDBY_TNS ASYNC VALID_FOR=(ONLINE_LOGFILES,PRIMARY_ROLE) DB_UNIQUE_NAME=$OUTLOOKING_STANDBY_TNS';
EOF
# 目标从库调整参数,断开到旁观从库的连接
sqlplus -s $DBUSER@$TARGET_STANDBY_TNS as sysdba << EOF
alter system set LOG_ARCHIVE_DEST_3='';
alter system set LOG_ARCHIVE_DEST_2='SERVICE=$TARGET_PRIMARY_TNS ASYNC VALID_FOR=(ONLINE_LOGFILES,PRIMARY_ROLE) DB_UNIQUE_NAME=$TARGET_PRIMARY_TNS';
EOF
# 旁观从库调整参数,指向新主库
sqlplus -s $DBUSER@$OUTLOOKING_STANDBY_TNS as sysdba << EOF
alter system set LOG_ARCHIVE_DEST_3='';
alter system set LOG_ARCHIVE_DEST_2='SERVICE=$TARGET_PRIMARY_TNS ASYNC VALID_FOR=(ONLINE_LOGFILES,PRIMARY_ROLE) DB_UNIQUE_NAME=$TARGET_PRIMARY_TNS';
EOF
# 切换后旁观从库lag检查
DB_TRAN_LAG=`check_db_lag $OUTLOOKING_STANDBY_TNS "transport lag"`
DB_APPLY_LAG=`check_db_lag $OUTLOOKING_STANDBY_TNS "apply lag"`
echo "`date "+%Y-%m-%d %H:%M:%S"` | After setting: DB_TRAN_LAG is $DB_TRAN_LAG second(s), DB_APPLY_LAG is $DB_APPLY_LAG second(s)"
echo "`date "+%Y-%m-%d %H:%M:%S"` | Finish onlooking standby database setting."
else # 没有旁观从库
echo "`date "+%Y-%m-%d %H:%M:%S"` | There is no onlooking standby database."
fi