全局事务提交
TM提交全局事务
当业务正常处理完毕后 本地事务全部提交完成,TM会将xid提交给TC,TC会返回当前事务状态,status由TC决定,TM最后会将xid从RootContext中解绑,全局事务结束。
TransactionalTemplate
private void commitTransaction(GlobalTransaction tx) throws TransactionalExecutor.ExecutionException {
try {
triggerBeforeCommit();
tx.commit();
triggerAfterCommit();
} catch (TransactionException txe) {
// 4.1 Failed to commit
throw new TransactionalExecutor.ExecutionException(tx, txe,
TransactionalExecutor.Code.CommitFailure);
}
}
@Override
public void commit() throws TransactionException {
if (role == GlobalTransactionRole.Participant) {
// Participant has no responsibility of committing
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Ignore Commit(): just involved in global transaction [{}]", xid);
}
return;
}
assertXIDNotNull();
// 默认重试5次 client.tm.commitRetryCount
int retry = COMMIT_RETRY_COUNT <= 0 ? DEFAULT_TM_COMMIT_RETRY_COUNT : COMMIT_RETRY_COUNT;
try {
while (retry > 0) {
try {
// GlobalCommitRequest 发送全局事务id给TC,TC会返回全局事务状态给TM
status = transactionManager.commit(xid);
break;
} catch (Throwable ex) {
LOGGER.error("Failed to report global commit [{}],Retry Countdown: {}, reason: {}", this.getXid(), retry, ex.getMessage());
retry--;
if (retry == 0) {
throw new TransactionException("Failed to report global commit", ex);
}
}
}
} finally {
// 将xid从ThreadLocal中移除,代表这个全局事务已经结束
if (xid.equals(RootContext.getXID())) {
suspend();
}
}
if (LOGGER.isInfoEnabled()) {
LOGGER.info("[{}] commit status: {}", xid, status);
}
}
TC处理全局事务提交请求
AbstractTCInboundHandler
@Override
public GlobalCommitResponse handle(GlobalCommitRequest request, final RpcContext rpcContext) {
GlobalCommitResponse response = new GlobalCommitResponse();
response.setGlobalStatus(GlobalStatus.Committing);
exceptionHandleTemplate(new AbstractCallback<GlobalCommitRequest, GlobalCommitResponse>() {
@Override
public void execute(GlobalCommitRequest request, GlobalCommitResponse response)
throws TransactionException {
try {
//全局事务提交
doGlobalCommit(request, response, rpcContext);
} catch (StoreException e) {
throw new TransactionException(TransactionExceptionCode.FailedStore,
String.format("global commit request failed. xid=%s, msg=%s", request.getXid(), e.getMessage()),
e);
}
}
@Override
public void onTransactionException(GlobalCommitRequest request, GlobalCommitResponse response,
TransactionException tex) {
super.onTransactionException(request, response, tex);
// 设置当前全局事务状态
checkTransactionStatus(request, response);
}
@Override
public void onException(GlobalCommitRequest request, GlobalCommitResponse response, Exception rex) {
// 设置当前全局事务状态
super.onException(request, response, rex);
checkTransactionStatus(request, response);
}
}, request, response);
return response;
}
DefaultCoordinator
@Override
protected void doGlobalCommit(GlobalCommitRequest request, GlobalCommitResponse response, RpcContext rpcContext)
throws TransactionException {
MDC.put(RootContext.MDC_KEY_XID, request.getXid());
response.setGlobalStatus(core.commit(request.getXid()));
}
public GlobalStatus commit(String xid) throws TransactionException {
GlobalSession globalSession = SessionHolder.findGlobalSession(xid);
if (globalSession == null) {
return GlobalStatus.Finished;
}
globalSession.addSessionLifecycleListener(SessionHolder.getRootSessionManager());
boolean shouldCommit = SessionHolder.lockAndExecute(globalSession, () -> {
if (globalSession.getStatus() == GlobalStatus.Begin) {
// 如果分支事务都是AT模式,释放全局锁,delete from lock_table where xid = ?
globalSession.closeAndClean();
// 如果分支事务都是AT模式,或分支事务有一阶段提交失败的,则可以执行异步提交
if (globalSession.canBeCommittedAsync()) {
// 执行异步提交,更新全局事务状态为AsyncCommitting,update global_table set status = AsyncCommitting where xid = ?
globalSession.asyncCommit();
MetricsPublisher.postSessionDoneEvent(globalSession, GlobalStatus.Committed, false, false);
return false;
} else {
globalSession.changeGlobalStatus(GlobalStatus.Committing);
return true;
}
}
return false;
});
if (shouldCommit) { // 同步提交
boolean success = doGlobalCommit(globalSession, false);
// ...
} else { // 异步提交
return globalSession.getStatus() == GlobalStatus.AsyncCommitting ? GlobalStatus.Committed : globalSession.getStatus();
}
}
DefaultCoordinator在初始化时启动了一个1秒执行一次的定时任务,通过distributed_lock表获取分布式锁,同一时间只有一个TC能执行异步提交任务。
DefaultCoordinator
public void init() {
// 全局事务二阶段异步提交
asyncCommitting.scheduleAtFixedRate(
() -> SessionHolder.distributedLockAndExecute(ASYNC_COMMITTING, this::handleAsyncCommitting), 0,
1000, TimeUnit.MILLISECONDS);
}
protected void handleAsyncCommitting() {
SessionCondition sessionCondition = new SessionCondition(GlobalStatus.AsyncCommitting);
Collection<GlobalSession> asyncCommittingSessions =
SessionHolder.getAsyncCommittingSessionManager().findGlobalSessions(sessionCondition);
if (CollectionUtils.isEmpty(asyncCommittingSessions)) {
return;
}
SessionHelper.forEach(asyncCommittingSessions, asyncCommittingSession -> {
try {
asyncCommittingSession.addSessionLifecycleListener(SessionHolder.getRootSessionManager());
//提交
core.doGlobalCommit(asyncCommittingSession, true);
} catch (TransactionException ex) {
LOGGER.error("Failed to async committing [{}] {} {}", asyncCommittingSession.getXid(), ex.getCode(), ex.getMessage(), ex);
}
});
}
DefaultCore.doGlobalCommit
1.循环处理每个分支事务,发送BranchCommitRequest给RM,RM删除undo_log;
2.如果RM处理二阶段提交成功,返回分支事务状态PhaseTwo_Committed,TC删除branch_table中对应的分支事务;
3.如果所有分支事务都处理成功,TC删除global_table中的全局事务;
@Override
public boolean doGlobalCommit(GlobalSession globalSession, boolean retrying) throws TransactionException {
boolean success = true;
// start committing event
MetricsPublisher.postSessionDoingEvent(globalSession, retrying);
//如果是saga模式
if (globalSession.isSaga()) {
success = getCore(BranchType.SAGA).doGlobalCommit(globalSession, retrying);
} else {
//循环分支事务
Boolean result = SessionHelper.forEach(globalSession.getSortedBranches(), branchSession -> {
// if not retrying, skip the canBeCommittedAsync branches
if (!retrying && branchSession.canBeCommittedAsync()) {
return CONTINUE;
}
BranchStatus currentStatus = branchSession.getStatus();
if (currentStatus == BranchStatus.PhaseOne_Failed) {
SessionHelper.removeBranch(globalSession, branchSession, !retrying);
return CONTINUE;
}
try {
//发送BranchCommitRequest给RM,RM会删除undo_log
BranchStatus branchStatus = getCore(branchSession.getBranchType()).branchCommit(globalSession, branchSession);
if (isXaerNotaTimeout(globalSession,branchStatus)) {
LOGGER.info("Commit branch XAER_NOTA retry timeout, xid = {} branchId = {}", globalSession.getXid(), branchSession.getBranchId());
branchStatus = BranchStatus.PhaseTwo_Committed;
}
switch (branchStatus) {
case PhaseTwo_Committed:
//删除branch_table中的分支事务记录
SessionHelper.removeBranch(globalSession, branchSession, !retrying);
return CONTINUE;
case PhaseTwo_CommitFailed_Unretryable:
//not at branch
SessionHelper.endCommitFailed(globalSession, retrying);
LOGGER.error("Committing global transaction[{}] finally failed, caused by branch transaction[{}] commit failed.", globalSession.getXid(), branchSession.getBranchId());
return false;
default:
if (!retrying) {
globalSession.queueToRetryCommit();
return false;
}
if (globalSession.canBeCommittedAsync()) {
LOGGER.error("Committing branch transaction[{}], status:{} and will retry later",
branchSession.getBranchId(), branchStatus);
return CONTINUE;
} else {
LOGGER.error(
"Committing global transaction[{}] failed, caused by branch transaction[{}] commit failed, will retry later.", globalSession.getXid(), branchSession.getBranchId());
return false;
}
}
} catch (Exception ex) {
StackTraceLogger.error(LOGGER, ex, "Committing branch transaction exception: {}",
new String[] {branchSession.toString()});
if (!retrying) {
globalSession.queueToRetryCommit();
throw new TransactionException(ex);
}
}
//某个分支事务处理失败,继续处理后续分支事务
return CONTINUE;
});
// Return if the result is not null
if (result != null) {
return result;
}
//If has branch and not all remaining branches can be committed asynchronously,
//do print log and return false
if (globalSession.hasBranch() && !globalSession.canBeCommittedAsync()) {
LOGGER.info("Committing global transaction is NOT done, xid = {}.", globalSession.getXid());
return false;
}
if (!retrying) {
//contains not AT branch
globalSession.setStatus(GlobalStatus.Committed);
}
}
// if it succeeds and there is no branch, retrying=true is the asynchronous state when retrying. EndCommitted is
// executed to improve concurrency performance, and the global transaction ends..
if (success && globalSession.getBranchSessions().isEmpty()) {
//如果所有分支事务被删除,则删除全局事务 delete from global_table where xid = ?
SessionHelper.endCommitted(globalSession, retrying);
LOGGER.info("Committing global transaction is successfully done, xid = {}.", globalSession.getXid());
}
return success;
}
RM处理分支事务的提交
AbstractRMHandler
@Override
public BranchCommitResponse handle(BranchCommitRequest request) {
BranchCommitResponse response = new BranchCommitResponse();
exceptionHandleTemplate(new AbstractCallback<BranchCommitRequest, BranchCommitResponse>() {
@Override
public void execute(BranchCommitRequest request, BranchCommitResponse response)
throws TransactionException {
doBranchCommit(request, response);
}
}, request, response);
return response;
}
AbstractRMHandler实际委托ResourceManager实现分支事务提交。
AsyncWorker异步处理分支事务提交,将xid、branchId、resourceId封装为Phase2Context,放入阻塞队列,等待一个1秒执行一次的定时任务消费队列中的Phase2Context。
// DataSourceManager
private final AsyncWorker asyncWorker = new AsyncWorker(this);
public BranchStatus branchCommit(BranchType branchType, String xid, long branchId, String resourceId,
String applicationData) throws TransactionException {
return asyncWorker.branchCommit(xid, branchId, resourceId);
}
private void addToCommitQueue(Phase2Context context) {
if (commitQueue.offer(context)) {
return;
}
CompletableFuture.runAsync(this::doBranchCommitSafely, scheduledExecutor)
.thenRun(() -> addToCommitQueue(context));
}
最后会由AsyncWorker执行 从阻塞队列里面poll数据 然后执行 实际上就是删除 undo_log数据
private void deleteUndoLog(final Connection conn, UndoLogManager undoLogManager, List<Phase2Context> contexts) {
Set<String> xids = new LinkedHashSet<>(contexts.size());
Set<Long> branchIds = new LinkedHashSet<>(contexts.size());
contexts.forEach(context -> {
xids.add(context.xid);
branchIds.add(context.branchId);
});
try {
undoLogManager.batchDeleteUndoLog(xids, branchIds, conn);
if (!conn.getAutoCommit()) {
conn.commit();
}
} catch (SQLException e) {
LOGGER.error("Failed to batch delete undo log", e);
try {
conn.rollback();
addAllToCommitQueue(contexts);
} catch (SQLException rollbackEx) {
LOGGER.error("Failed to rollback JDBC resource after deleting undo log failed", rollbackEx);
}
}
}
总结
1.当一阶段提交没有任何异常情况下进行二阶段提交 TM会发送提交全局事务请求给TC
2.TC接收到全局事务提交请求之后会循环所有的分支事务 往RM发送分支事务提交的请求
3.RM接收到分支事务提交请求之后会委派给AsyncWorker去处理 从阻塞队列里面拿数据
4.AsyncWorker处理的时候其实就是将undo_log的数据删除
5.当RM处理完之后返回TC 然后会将全局锁删除 就是 lock_table表数据
6.将分支事务状态变为二阶段已提交
7.然后会将分支事务表brach_table数据给清空
8.然后会将global_table数据删除
9.当TC端所有处理完成之后 TM收到响应信息把XID给解绑
TM全局事务回滚
TM处理全局事务回滚向TC发起请求
当一阶段提交出问题 会触发TM的回滚 然后交给TC执行
private void completeTransactionAfterThrowing(TransactionInfo txInfo, GlobalTransaction tx, Throwable originalException) throws TransactionalExecutor.ExecutionException {
//roll back
if (txInfo != null && txInfo.rollbackOn(originalException)) {
try {
// 默认所有异常都会回滚
rollbackTransaction(tx, originalException);
} catch (TransactionException txe) {
// Failed to rollback
throw new TransactionalExecutor.ExecutionException(tx, txe,
TransactionalExecutor.Code.RollbackFailure, originalException);
}
} else {
// 如果GlobalTransactional排除部分异常,可以选择提交
// not roll back on this exception, so commit
commitTransaction(tx);
}
}
private void rollbackTransaction(GlobalTransaction tx, Throwable originalException) throws TransactionException, TransactionalExecutor.ExecutionException {
triggerBeforeRollback();
tx.rollback();
triggerAfterRollback();
// 3.1 Successfully rolled back
throw new TransactionalExecutor.ExecutionException(tx, GlobalStatus.RollbackRetrying.equals(tx.getLocalStatus())
? TransactionalExecutor.Code.RollbackRetrying : TransactionalExecutor.Code.RollbackDone, originalException);
}
TM向TC发送回滚请求,请求包含xid,由TC返回全局事务状态。这和TM发送提交请求类似。
@Override
public void rollback() throws TransactionException {
if (role == GlobalTransactionRole.Participant) {
// Participant has no responsibility of rollback
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Ignore Rollback(): just involved in global transaction [{}]", xid);
}
return;
}
assertXIDNotNull();
// 默认重试5次 client.tm.rollbackRetryCount
int retry = ROLLBACK_RETRY_COUNT <= 0 ? DEFAULT_TM_ROLLBACK_RETRY_COUNT : ROLLBACK_RETRY_COUNT;
try {
while (retry > 0) {
try {
// GlobalRollbackRequest,包含全局事务id给TC,TC会返回全局事务状态给TM
status = transactionManager.rollback(xid);
break;
} catch (Throwable ex) {
LOGGER.error("Failed to report global rollback [{}],Retry Countdown: {}, reason: {}", this.getXid(), retry, ex.getMessage());
retry--;
if (retry == 0) {
throw new TransactionException("Failed to report global rollback", ex);
}
}
}
} finally {
//解绑全局事务ID
if (xid.equals(RootContext.getXID())) {
suspend();
}
}
if (LOGGER.isInfoEnabled()) {
LOGGER.info("[{}] rollback status: {}", xid, status);
}
}
TC处理全局事务回滚
DefaultCore
public GlobalStatus rollback(String xid) throws TransactionException {
GlobalSession globalSession = SessionHolder.findGlobalSession(xid);
if (globalSession == null) {
return GlobalStatus.Finished;
}
globalSession.addSessionLifecycleListener(SessionHolder.getRootSessionManager());
boolean shouldRollBack = SessionHolder.lockAndExecute(globalSession, () -> {
globalSession.close();
if (globalSession.getStatus() == GlobalStatus.Begin) {
// 将全局锁lock_table状态更新为Rollbacking
// 将全局事务global_table状态更新为Rollbacking
globalSession.changeGlobalStatus(GlobalStatus.Rollbacking);
return true;
}
return false;
});
if (!shouldRollBack) {
return globalSession.getStatus();
}
// 执行全局回滚
boolean rollbackSuccess = doGlobalRollback(globalSession, false);
return rollbackSuccess ? GlobalStatus.Rollbacked : globalSession.getStatus();
}
在变更完中间状态后,执行doGlobalRollback方法实际执行二阶段回滚
二阶段回滚逻辑:
1:TC循环向所有RM发送BranchRollbackRequest;
2:如果RM返回PhaseTwo_Rollbacked,则删除对应分支事务;发生异常或返回状态非回滚成功,将全局事务标记为RollbackRetrying,等待后续补偿执行全局回滚;
3:如果所有RM二阶段回滚成功,对于file存储模式,直接删除全局事务(因为file模式在分支注册和二阶段回滚操作上都加了锁);对于db/redis存储模式,需要异步再次执行doGlobalRollback,确保不会有分支事务注册与二阶段回滚同时发生,造成分支事务注册成功,二阶段回滚没有清理干净全局锁和分支事务;
DefaultCore
public boolean doGlobalRollback(GlobalSession globalSession, boolean retrying) throws TransactionException {
boolean success = true;
//循环向RM发送回滚分支事务请求
Boolean result = SessionHelper.forEach(globalSession.getReverseSortedBranches(), branchSession -> {
BranchStatus currentBranchStatus = branchSession.getStatus();
if (currentBranchStatus == BranchStatus.PhaseOne_Failed) {
SessionHelper.removeBranch(globalSession, branchSession, !retrying);
return CONTINUE;
}
try {
// 发送BranchRollbackRequest
BranchStatus branchStatus = branchRollback(globalSession, branchSession);
switch (branchStatus) {
case PhaseTwo_Rollbacked:
// 释放全局锁,删除分支事务
SessionHelper.removeBranch(globalSession, branchSession, !retrying);
return CONTINUE;
case PhaseTwo_RollbackFailed_Unretryable: // 回滚失败且无法重试成功
SessionHelper.endRollbackFailed(globalSession, retrying);
return false;
default:
// 如果RM回滚失败 全局事务状态变为RollbackRetrying 等待重试
if (!retrying) {
globalSession.queueToRetryRollback();
}
return false;
}
} catch (Exception ex) {
if (!retrying) {
// 如果步骤异常 全局事务状态变为RollbackRetrying 等待重试
globalSession.queueToRetryRollback();
}
throw new TransactionException(ex);
}
});
// 如果存在一个分支事务回滚失败,则返回false
if (result != null) {
return result;
}
// 对于file模式,直接删除全局事务
// 对于db/redis模式,异步再次执行doGlobalRollback,这里不做任何处理
// 防止由于各种网络波动造成分支事务注册成功lock_table和branch_table中始终有残留数据
// 导致全局锁一直被占用,无法释放
if (success) {
SessionHelper.endRollbacked(globalSession, retrying);
}
return success;
}
// SessionHelper
public static void endRollbacked(GlobalSession globalSession, boolean retryGlobal) throws TransactionException {
// 如果是重试 或 file模式
if (retryGlobal || !DELAY_HANDLE_SESSION) {
long beginTime = System.currentTimeMillis();
GlobalStatus currentStatus = globalSession.getStatus();
boolean retryBranch =
currentStatus == GlobalStatus.TimeoutRollbackRetrying || currentStatus == GlobalStatus.RollbackRetrying;
if (isTimeoutGlobalStatus(currentStatus)) {
globalSession.changeGlobalStatus(GlobalStatus.TimeoutRollbacked);
} else {
globalSession.changeGlobalStatus(GlobalStatus.Rollbacked);
}
// 删除全局事务global_table
globalSession.end();
}
}
RM处理分支事务回滚
AbstractRMHandler
@Override
public BranchRollbackResponse handle(BranchRollbackRequest request) {
BranchRollbackResponse response = new BranchRollbackResponse();
exceptionHandleTemplate(new AbstractCallback<BranchRollbackRequest, BranchRollbackResponse>() {
@Override
public void execute(BranchRollbackRequest request, BranchRollbackResponse response)
throws TransactionException {
//分支事务回滚
doBranchRollback(request, response);
}
}, request, response);
return response;
}
protected void doBranchRollback(BranchRollbackRequest request, BranchRollbackResponse response)
throws TransactionException {
String xid = request.getXid();
long branchId = request.getBranchId();
String resourceId = request.getResourceId();
String applicationData = request.getApplicationData();
BranchStatus status = getResourceManager().branchRollback(request.getBranchType(), xid, branchId, resourceId,
applicationData);
response.setXid(xid);
response.setBranchId(branchId);
response.setBranchStatus(status);
}
DataSourceManager
public BranchStatus branchRollback(BranchType branchType, String xid, long branchId, String resourceId,
String applicationData) throws TransactionException {
DataSourceProxy dataSourceProxy = get(resourceId);
if (dataSourceProxy == null) {
throw new ShouldNeverHappenException(String.format("resource: %s not found",resourceId));
}
try {
UndoLogManagerFactory.getUndoLogManager(dataSourceProxy.getDbType()).undo(dataSourceProxy, xid, branchId);
} catch (TransactionException te) {
if (te.getCode() == TransactionExceptionCode.BranchRollbackFailed_Unretriable) {
return BranchStatus.PhaseTwo_RollbackFailed_Unretryable;
} else {
return BranchStatus.PhaseTwo_RollbackFailed_Retryable;
}
}
return BranchStatus.PhaseTwo_Rollbacked;
}
RM二阶段回滚就是执行undoLog里的前置镜像,回滚一阶段的本地事务。具体逻辑都在
// AbstractUndoLogManager
public void undo(DataSourceProxy dataSourceProxy, String xid, long branchId) throws TransactionException {
Connection conn = null;
ResultSet rs = null;
PreparedStatement selectPST = null;
boolean originalAutoCommit = true;
for (; ; ) {
try {
conn = dataSourceProxy.getPlainConnection();
if (originalAutoCommit = conn.getAutoCommit()) {
conn.setAutoCommit(false);
}
// Step1 根据xid+branchId查询undo_log
selectPST = conn.prepareStatement(SELECT_UNDO_LOG_SQL);
selectPST.setLong(1, branchId);
selectPST.setString(2, xid);
rs = selectPST.executeQuery();
boolean exists = false;
while (rs.next()) {
exists = true;
int state = rs.getInt(ClientTableColumnsName.UNDO_LOG_LOG_STATUS);
if (!canUndo(state)) {
return;
}
String contextString = rs.getString(ClientTableColumnsName.UNDO_LOG_CONTEXT);
Map<String, String> context = parseContext(contextString);
byte[] rollbackInfo = getRollbackInfo(rs);
String serializer = context == null ? null : context.get(UndoLogConstants.SERIALIZER_KEY);
UndoLogParser parser = serializer == null ? UndoLogParserFactory.getInstance()
: UndoLogParserFactory.getInstance(serializer);
BranchUndoLog branchUndoLog = parser.decode(rollbackInfo);
try {
setCurrentSerializer(parser.getName());
List<SQLUndoLog> sqlUndoLogs = branchUndoLog.getSqlUndoLogs();
if (sqlUndoLogs.size() > 1) {
Collections.reverse(sqlUndoLogs);
}
// Step2 回滚本地事务
for (SQLUndoLog sqlUndoLog : sqlUndoLogs) {
TableMeta tableMeta = TableMetaCacheFactory.getTableMetaCache(dataSourceProxy.getDbType()).getTableMeta(
conn, sqlUndoLog.getTableName(), dataSourceProxy.getResourceId());
sqlUndoLog.setTableMeta(tableMeta);
AbstractUndoExecutor undoExecutor = UndoExecutorFactory.getUndoExecutor(
dataSourceProxy.getDbType(), sqlUndoLog);
undoExecutor.executeOn(conn);
}
} finally {
removeCurrentSerializer();
}
}
// See https://github.com/seata/seata/issues/489
// Step3 undo_log处理
if (exists) {
// RM一阶段正常提交,删除undoLog
deleteUndoLog(xid, branchId, conn);
conn.commit();
} else {
// RM一阶段提交失败,TC由于全局事务超时发起回滚
// 此时RM的本地事务可能提交,这里插入一个undo_log来防止RM提交事务成功导致数据不一致
insertUndoLogWithGlobalFinished(xid, branchId, UndoLogParserFactory.getInstance(), conn);
conn.commit();
}
return;
} catch (SQLIntegrityConstraintViolationException e) {
// insertUndoLogWithGlobalFinished 如果插入undo_log由于唯一约束失败,那么执行重试回滚
// 日志...
} catch (Throwable e) {
if (conn != null) {
try {
conn.rollback();
} catch (SQLException rollbackEx) {
LOGGER.warn("Failed to close JDBC resource while undo ... ", rollbackEx);
}
}
throw new BranchTransactionException(BranchRollbackFailed_Retriable, String
.format("Branch session rollback failed and try again later xid = %s branchId = %s %s", xid,
branchId, e.getMessage()), e);
} finally {
// 关闭资源
}
}
}
二阶段回滚RM本地事务分为三步:
1.查询分支事务对应undoLog,select from undo_log where xid = ? and branch_id = ? for update;
2.AbstractUndoExecutor利用undoLog中的前置镜像回滚本地事务;
3.处理undoLog;
回滚一阶段本地事务
根据undoLog回滚一阶段本地事务。
// AbstractUndoExecutor
public void executeOn(Connection conn) throws SQLException {
// Step1 比较当前数据和前后镜像,如果情况允许才会回滚
if (IS_UNDO_DATA_VALIDATION_ENABLE && !dataValidationAndGoOn(conn)) {
return;
}
PreparedStatement undoPST = null;
try {
// Step2 构建回滚sql 如 UPDATE a SET x = ? WHERE pk1 in (?)
String undoSQL = buildUndoSQL();
undoPST = conn.prepareStatement(undoSQL);
TableRecords undoRows = getUndoRows();
// 根据主键回滚前置镜像中每一行数据
for (Row undoRow : undoRows.getRows()) {
ArrayList<Field> undoValues = new ArrayList<>();
List<Field> pkValueList = getOrderedPkList(undoRows, undoRow, getDbType(conn));
for (Field field : undoRow.getFields()) {
if (field.getKeyType() != KeyType.PRIMARY_KEY) {
undoValues.add(field);
}
}
// Step3 替换占位符
undoPrepare(undoPST, undoValues, pkValueList);
// Step4 根据id回滚一行
undoPST.executeUpdate();
}
} catch (Exception ex) {
if (ex instanceof SQLException) {
throw (SQLException) ex;
} else {
throw new SQLException(ex);
}
}
finally {
//important for oracle
IOUtil.close(undoPST);
}
}
在执行回滚之前,dataValidationAndGoOn方法会比较当前数据和前后镜像,如果情况允许才会回滚
protected boolean dataValidationAndGoOn(Connection conn) throws SQLException {
TableRecords beforeRecords = sqlUndoLog.getBeforeImage();
TableRecords afterRecords = sqlUndoLog.getAfterImage();
// case1 前后镜像一致,不需要回滚,返回false
Result<Boolean> beforeEqualsAfterResult = DataCompareUtils.isRecordsEquals(beforeRecords, afterRecords);
if (beforeEqualsAfterResult.getResult()) {
return false;
}
// select for update by 主键 获取当前快照数据
TableRecords currentRecords = queryCurrentRecords(conn);
// case2 当前快照数据和后置镜像不一致
Result<Boolean> afterEqualsCurrentResult = DataCompareUtils.isRecordsEquals(afterRecords, currentRecords);
if (!afterEqualsCurrentResult.getResult()) {
// case2-1 如果前置镜像与当前快照一致,也不需要回滚,返回false
Result<Boolean> beforeEqualsCurrentResult = DataCompareUtils.isRecordsEquals(beforeRecords, currentRecords);
if (beforeEqualsCurrentResult.getResult()) {
return false;
} else {
// case2-2 当前快照数据和后置镜像不一致,抛出异常,有脏数据
throw new SQLException("Has dirty records when undo.");
}
}
// case3 当前快照数据与后置镜像一致,可以执行回滚
return true;
}
处理undolog
如果RM一阶段commit成功,undo_log存在,即exists=true,只要删除undo_log即可,与回滚事务一起提交;
如果RM一阶段未commit成功,undo_log不存在,可能由于RM一阶段业务处理超时,导致TC判断全局事务超时发起二阶段回滚,此时RM一阶段事务提交和RM二阶段回滚可能同时发生。
为了确保数据一致性,利用undo_log的xid和branch_id的唯一约束,插入一条status=GlobalFinished的数据来解决这个问题。
如果RM一阶段事务提交成功,即一阶段提交插入status=Normal的undo_log成功,那么在二阶段回滚时insertUndoLogWithGlobalFinished会抛出SQLIntegrityConstraintViolationException唯一约束冲突,AbstractUndoLogManager.undo重试二阶段回滚本地事务,最终会回滚成功;
如果二阶段回滚插入status=GlobalFinished的undo_log成功,那么RM一阶段就不会提交成功(发生唯一约束冲突),从而解决数据一致性问题。
AbstractUndoLogManager
public void undo(DataSourceProxy dataSourceProxy, String xid, long branchId) throws TransactionException {
for (; ; ) {
try {
// ...
// See https://github.com/seata/seata/issues/489
// Step3 undo_log处理
if (exists) {
// RM一阶段正常提交,删除undoLog
deleteUndoLog(xid, branchId, conn);
conn.commit();
} else {
// RM一阶段超时,TC由于全局事务超时发起回滚
// 此时RM的本地事务可能同时提交,这里插入一个undo_log来防止RM提交事务成功导致数据不一致
insertUndoLogWithGlobalFinished(xid, branchId, UndoLogParserFactory.getInstance(), conn);
conn.commit();
}
return;
} catch (SQLIntegrityConstraintViolationException e) {
// insertUndoLogWithGlobalFinished 如果插入undo_log由于唯一约束失败,那么执行重试回滚
// 日志...
}
// ...
}
}
// MySQLUndoLogManager
protected void insertUndoLogWithGlobalFinished(String xid, long branchId, UndoLogParser parser, Connection conn) throws SQLException {
insertUndoLog(xid, branchId, buildContext(parser.getName(), CompressorType.NONE), parser.getDefaultContent(), State.GlobalFinished, conn);
}
总结
1.当一阶段业务或者其他发生异常的时候会触发二阶段的回滚
2.TM发送二阶段回滚请求给TC
3.TC接收回滚请求进行处理 会先遍历所有的分支事务 然后往RM去发送回滚的请求
4.RM接收请求之后会基于undolog去进行回滚
4.1会先查询当前undo_log 然后拿到前置和后置镜像 去比较前置和后置镜像和当前需要执行的语句是不是一致的
4.2如果后置镜像和需要回滚的语句是一样的那正常回滚 如果不一样则代表有脏数据的情况抛异常
5.如果一阶段commit undo_log存在 只要删除undo_log即可,与回滚事务一起提交
6.如果RM一阶段未commit成功,undo_log不存在,可能由于RM一阶段业务处理超时,导致TC判断全局事务超时发起二阶段回滚,此时RM一阶段事务提交和RM二阶段回滚可能同时发生。
7.当RM处理完之后响应TC TC会删除分支事务 然后把状态改为二阶段已回滚
8.当所有分支事务全部回滚结束之后 将全局事务删除
二阶段失败重试机制
DefaultCoordinator每秒执行handleRetryRollbacking方法,一方面是处理二阶段回滚的重试工作,另一方面处理正常二阶段回滚的剩余数据处理,比如残留的全局锁、分支事务,以及db/redis存储模式下需要异步清理的全局事务。
重试工作会一直进行下去,除非RM返回PhaseTwo_Rollbacked回滚成功,或PhaseTwo_RollbackFailed_Unretryable(代表回滚失败但是重试也不可能成功,属于不可恢复异常,目前这一状态只有XA模式下会返回,所以在AT模式下会重试到RM返回二阶段回滚成功为止)。
// DefaultCoordinator
protected void handleRetryRollbacking() {
// 查询TimeoutRollbacking,TimeoutRollbackRetrying, RollbackRetrying, Rollbacking 的全局事务
SessionCondition sessionCondition = new SessionCondition(rollbackingStatuses);
sessionCondition.setLazyLoadBranch(true);
Collection<GlobalSession> rollbackingSessions =
SessionHolder.getRetryRollbackingSessionManager().findGlobalSessions(sessionCondition);
if (CollectionUtils.isEmpty(rollbackingSessions)) {
return;
}
long now = System.currentTimeMillis();
SessionHelper.forEach(rollbackingSessions, rollbackingSession -> {
try {
// 如果是正在回滚的事务,需要等待开启全局事务的130秒后变为DeadSession再处理
if (rollbackingSession.getStatus().equals(GlobalStatus.Rollbacking)
&& !rollbackingSession.isDeadSession()) {
return;
}
// 默认MAX_ROLLBACK_RETRY_TIMEOUT=-1,不走这里
if (isRetryTimeout(now, MAX_ROLLBACK_RETRY_TIMEOUT.toMillis(), rollbackingSession.getBeginTime())) {
// ...
return;
}
// 再次执行全局事务回滚逻辑
rollbackingSession.addSessionLifecycleListener(SessionHolder.getRootSessionManager());
core.doGlobalRollback(rollbackingSession, true);
} catch (TransactionException ex) {
LOGGER.info("Failed to retry rollbacking [{}] {} {}", rollbackingSession.getXid(), ex.getCode(), ex.getMessage());
}
});
}
全局事务超时处理
DefaultCoordinator每秒执行timeoutCheck方法,将处于begin状态的超时全局事务更新为TimeoutRollbacking状态,交由二阶段回滚重试定时任务(handleRetryRollbacking),异步执行回滚操作。
// DefaultCoordinator
protected void timeoutCheck() {
// 1. 查询状态处于begin的全局事务
SessionCondition sessionCondition = new SessionCondition(GlobalStatus.Begin);
sessionCondition.setLazyLoadBranch(true);
Collection<GlobalSession> beginGlobalsessions =
SessionHolder.getRootSessionManager().findGlobalSessions(sessionCondition);
if (CollectionUtils.isEmpty(beginGlobalsessions)) {
return;
}
SessionHelper.forEach(beginGlobalsessions, globalSession -> {
SessionHolder.lockAndExecute(globalSession, () -> {
// 2. 校验是否超时
if (globalSession.getStatus() != GlobalStatus.Begin || !globalSession.isTimeout()) {
return false;
}
globalSession.addSessionLifecycleListener(SessionHolder.getRootSessionManager());
globalSession.close();
// 3. 更新全局事务状态为TimeoutRollbacking,通过handleRetryRollbacking异步处理二阶段回滚
globalSession.setStatus(GlobalStatus.TimeoutRollbacking);
globalSession.addSessionLifecycleListener(SessionHolder.getRetryRollbackingSessionManager());
SessionHolder.getRetryRollbackingSessionManager().addGlobalSession(globalSession);
return true;
});
});
}