Python高级算法与数据结构优化实战
在算法竞赛中,掌握高级优化技巧和数据结构实现可以显著提升解题效率和代码性能。本文深入探讨Python中常见算法问题的高效实现方法,通过实际比赛案例展示如何优化时间复杂度和空间复杂度。
一、前缀和与差分数组
前缀和与差分数组是算法竞赛中处理区间查询和修改的利器,能将时间复杂度从O(n)降至O(1)。
1.1 前缀和技术
基本实现:
def build_prefix_sum(nums):
n = len(nums)
prefix = [0] * (n + 1)
for i in range(n):
prefix[i + 1] = prefix[i] + nums[i]
return prefix
def range_sum(prefix, left, right):
# 返回nums[left]到nums[right-1]的和
return prefix[right] - prefix[left]
实战应用: 矩阵区域和
题目: 计算二维矩阵中任意子矩阵的元素和。
def matrix_region_sum(matrix):
if not matrix or not matrix[0]:
return []
m, n = len(matrix), len(matrix[0])
# 构建二维前缀和
prefix = [[0] * (n + 1) for _ in range(m + 1)]
for i in range(m):
for j in range(n):
prefix[i + 1][j + 1] = prefix[i + 1][j] + prefix[i][j + 1] - prefix[i][j] + matrix[i][j]
# 查询函数: 返回(row1,col1)到(row2,col2)矩形区域的和
def query(row1, col1, row2, col2):
return prefix[row2 + 1][col2 + 1] - prefix[row2 + 1][col1] - prefix[row1][col2 + 1] + prefix[row1][col1]
return query
# 示例
matrix = [
[3, 0, 1, 4, 2],
[5, 6, 3, 2, 1],
[1, 2, 0, 1, 5],
[4, 1, 0, 1, 7],
[1, 0, 3, 0, 5]
]
region_sum = matrix_region_sum(matrix)
print(region_sum(1, 1, 3, 3)) # 矩阵中(1,1)到(3,3)区域的和: 6+3+2+0+1
高级应用: 子数组和为k的个数
题目: 给定一个数组和整数k,求数组中和为k的连续子数组个数。
from collections import defaultdict
def subarray_sum_equals_k(nums, k):
count = 0
prefix_sum = 0
# 前缀和出现次数的哈希表
prefix_count = defaultdict(int)
prefix_count[0] = 1 # 空前缀
for num in nums:
prefix_sum += num
# 如果prefix_sum - k在哈希表中,说明存在前缀和为k的子数组
count += prefix_count[prefix_sum - k]
prefix_count[prefix_sum] += 1
return count
# 示例
nums = [1, 1, 1]
k = 2
print(subarray_sum_equals_k(nums, k)) # 输出: 2
1.2 差分数组技术
差分数组是前缀和的逆运算,常用于区间更新操作。
基本实现:
def build_difference_array(nums):
n = len(nums)
diff = [0] * n
diff[0] = nums[0]
for i in range(1, n):
diff[i] = nums[i] - nums[i - 1]
return diff
def range_add(diff, left, right, val):
# 将nums[left]到nums[right]的元素都加上val
diff[left] += val
if right + 1 < len(diff):
diff[right + 1] -= val
def reconstruct_array(diff):
n = len(diff)
nums = [0] * n
nums[0] = diff[0]
for i in range(1, n):
nums[i] = nums[i - 1] + diff[i]
return nums
实战应用: 航班预订统计
题目: 有n个航班,航班编号从1到n。有多个预订记录,每个记录包含(first, last, seats),表示从first到last号航班预订了seats个座位。求每个航班预订的座位总数。
def corporate_flight_bookings(bookings, n):
# 初始化差分数组
diff = [0] * (n + 1)
# 处理预订记录
for first, last, seats in bookings:
diff[first - 1] += seats # 注意索引从0开始
diff[last] -= seats # 结束后恢复
# 还原原始数组
result = [0] * n
result[0] = diff[0]
for i in range(1, n):
result[i] = result[i - 1] + diff[i]
return result
# 示例
bookings = [[1, 2, 10], [2, 3, 20], [2, 5, 25]]
n = 5
print(corporate_flight_bookings(bookings, n)) # 输出: [10, 55, 45, 25, 25]
二、并查集(Union-Find)
并查集是处理元素分组和合并操作的高效数据结构,广泛应用于图论问题。
2.1 并查集的高效实现
class UnionFind:
def __init__(self, n):
self.parent = list(range(n))
self.rank = [0] * n # 按秩合并优化
self.count = n # 连通分量数
def find(self, x):
if self.parent[x] != x:
# 路径压缩
self.parent[x] = self.find(self.parent[x])
return self.parent[x]
def union(self, x, y):
root_x = self.find(x)
root_y = self.find(y)
if root_x == root_y:
return False
# 按秩合并
if self.rank[root_x] < self.rank[root_y]:
self.parent[root_x] = root_y
elif self.rank[root_x] > self.rank[root_y]:
self.parent[root_y] = root_x
else:
self.parent[root_y] = root_x
self.rank[root_x] += 1
self.count -= 1
return True
def connected(self, x, y):
return self.find(x) == self.find(y)
实战应用: 岛屿数量问题
题目: 给定一个由 ‘1’(陆地)和 ‘0’(水)组成的二维网格,计算岛屿的数量。
def num_islands(grid):
if not grid or not grid[0]:
return 0
m, n = len(grid), len(grid[0])
uf = UnionFind(m * n)
# 将水域标记为已访问
land_count = 0
for i in range(m):
for j in range(n):
if grid[i][j] == '1':
land_count += 1
else:
# 水域节点的父节点设为一个特殊值
uf.parent[i * n + j] = -1
# 方向数组: 右、下
directions = [(0, 1), (1, 0)]
# 合并相邻的陆地
for i in range(m):
for j in range(n):
if grid[i][j] == '1':
current = i * n + j
# 检查右边和下边的相邻节点
for dx, dy in directions:
ni, nj = i + dx, j + dy
if 0 <= ni < m and 0 <= nj < n and grid[ni][nj] == '1':
neighbor = ni * n + nj
uf.union(current, neighbor)
# 计算连通分量数量
islands = 0
for i in range(m * n):
if uf.parent[i] != -1 and uf.find(i) == i:
islands += 1
return islands
# 示例
grid = [
["1","1","0","0","0"],
["1","1","0","0","0"],
["0","0","1","0","0"],
["0","0","0","1","1"]
]
print(num_islands(grid)) # 输出: 3
高级应用: 最小生成树的Kruskal算法
def kruskal_mst(n, edges):
"""
Kruskal算法求最小生成树
n: 节点数
edges: 边列表 [(u, v, weight)]
返回: 最小生成树的总权重
"""
# 按权重排序
edges.sort(key=lambda x: x[2])
uf = UnionFind(n)
mst_weight = 0
mst_edges = []
for u, v, weight in edges:
if uf.union(u, v): # 如果合并成功(不会形成环)
mst_weight += weight
mst_edges.append((u, v, weight))
# 如果已经找到n-1条边,说明最小生成树已完成
if len(mst_edges) == n - 1:
break
return mst_weight, mst_edges
# 示例
edges = [
(0, 1, 10), (0, 2, 6), (0, 3, 5),
(1, 3, 15), (2, 3, 4)
]
n = 4
weight, mst = kruskal_mst(n, edges)
print(f"最小生成树权重: {
weight}")
print(f"最小生成树边: {
mst}")
三、线段树与树状数组
线段树和树状数组是处理区间查询和区间修改的高级数据结构。
3.1 树状数组(Binary Indexed Tree)
树状数组在O(log n)时间内完成单点更新和前缀和查询。
class BinaryIndexedTree:
def __init__(self, n):
self.size = n
self.tree = [0] * (n + 1) # 索引从1开始
def update(self, index, delta):
"""更新单个元素"""
while index <= self.size:
self.tree[index] += delta
index += (index & -index) # 加上最低位的1
def query(self, index):
"""查询前缀和: 从1到index的元素和"""
result = 0
while index > 0:
result += self.tree[index]
index -= (index & -index) # 减去最低位的1
return result
def range_query(self, left, right):
"""查询区间和: 从left到right的元素和"""
return self.query(right) - self.query(left - 1)
实战应用: 逆序对计数
题目: 计算一个数组中的逆序对数量。逆序对是指数组中的两个元素,前面的元素大于后面的元素。
def count_inversions(nums):
# 离散化: 将数组中的元素映射到1到n
sorted_nums = sorted(set(nums))
rank = {
val: idx + 1 for idx, val in enumerate(s