说明
如果需要用到这些知识却没有掌握,则会让人感到沮丧,也可能导致面试被拒。无论是花几天时间“突击”,还是利用零碎的时间持续学习,在数据结构上下点功夫都是值得的。那么Python 中有哪些数据结构呢?列表、字典、集合,还有……栈?Python 有栈吗?本系列文章将给出详细拼图。
13章: Binary Tree
The binary Tree: 二叉树,每个节点做多只有两个子节点
class _BinTreeNode: def __init__(self, data): self.data = data self.left = None self.right = None # 三种depth-first遍历 def preorderTrav(subtree): """ 先(根)序遍历""" if subtree is not None: print(subtree.data) preorderTrav(subtree.left) preorderTrav(subtree.right) def inorderTrav(subtree): """ 中(根)序遍历""" if subtree is not None: preorderTrav(subtree.left) print(subtree.data) preorderTrav(subtree.right) def postorderTrav(subtree): """ 后(根)序遍历""" if subtree is not None: preorderTrav(subtree.left) preorderTrav(subtree.right) print(subtree.data) # 宽度优先遍历(bradth-First Traversal): 一层一层遍历, 使用queue def breadthFirstTrav(bintree): from queue import Queue # py3 q = Queue() q.put(bintree) while not q.empty(): node = q.get() print(node.data) if node.left is not None: q.put(node.left) if node.right is not None: q.put(node.right) class _ExpTreeNode: __slots__ = ('element', 'left', 'right') def __init__(self, data): self.element = data self.left = None self.right = None def __repr__(self): return '<_ExpTreeNode: {} {} {}>'.format( self.element, self.left, self.right) from queue import Queue class ExpressionTree: """ 表达式树: 操作符存储在内节点操作数存储在叶子节点的二叉树。(符号树真难打出来) * / \ + - / \ / \ 9 3 8 4 (9+3) * (8-4) Expression Tree Abstract Data Type,可以实现二元操作符 ExpressionTree(expStr): user string as constructor param evaluate(varDict): evaluates the expression and returns the numeric result toString(): constructs and retutns a string represention of the expression Usage: vars = {'a': 5, 'b': 12} expTree = ExpressionTree("(a/(b-3))") print('The result = ', expTree.evaluate(vars)) """ def __init__(self, expStr): self._expTree = None self._buildTree(expStr) def evaluate(self, varDict): return self._evalTree(self._expTree, varDict) def __str__(self): return self._buildString(self._expTree) def _buildString(self, treeNode): """ 在一个子树被遍历之前添加做括号,在子树被遍历之后添加右括号 """ # print(treeNode) if treeNode.left is None and treeNode.right is None: return str(treeNode.element) # 叶子节点是操作数直接返回 else: expStr = '(' expStr += self._buildString(treeNode.left) expStr += str(treeNode.element) expStr += self._buildString(treeNode.right) expStr += ')' return expStr def _evalTree(self, subtree, varDict): # 是不是叶子节点, 是的话说明是操作数,直接返回 if subtree.left is None and subtree.right is None: # 操作数是合法数字吗 if subtree.element >= '0' and subtree.element <= '9': return int(subtree.element) else: # 操作数是个变量 assert subtree.element in varDict, 'invalid variable.' return varDict[subtree.element] else: # 操作符则计算其子表达式 lvalue = self._evalTree(subtree.left, varDict) rvalue = self._evalTree(subtree.right, varDict) print(subtree.element) return self._computeOp(lvalue, subtree.element, rvalue) def _computeOp(self, left, op, right): assert op op_func = { '+': lambda left, right: left + right, # or import operator, operator.add '-': lambda left, right: left - right, '*': lambda left, right: left * right, '/': lambda left, right: left / right, '%': lambda left, right: left % right, } return op_func[op](left, right) def _buildTree(self, expStr): expQ = Queue() for token in expStr: # 遍历表达式字符串的每个字符 expQ.put(token) self._expTree = _ExpTreeNode(None) # 创建root节点 self._recBuildTree(self._expTree, expQ) def _recBuildTree(self, curNode, expQ): token = expQ.get() if token == '(': curNode.left = _ExpTreeNode(None) self._recBuildTree(curNode.left, expQ) # next token will be an operator: + = * / % curNode.element = expQ.get() curNode.right = _ExpTreeNode(None) self._recBuildTree(curNode.right, expQ) # the next token will be ')', remmove it expQ.get() else: # the token is a digit that has to be converted to an int. curNode.element = token vars = {'a': 5, 'b': 12} expTree = ExpressionTree("((2*7)+8)") print(expTree) print('The result = ', expTree.evaluate(vars))
Heap(堆):二叉树最直接的一个应用就是实现堆。堆就是一颗完全二叉树,最大堆的非叶子节点的值都比孩子大,最小堆的非叶子结点的值都比孩子小。 python内置了heapq模块帮助我们实现堆操作,比如用内置的heapq模块实现个堆排序:
# 使用python内置的heapq实现heap sort def heapsort(iterable): from heapq import heappush, heappop h = [] for value in iterable: heappush(h, value) return [heappop(h) for i in range(len(h))]
但是一般实现堆的时候实际上并不是用数节点来实现的,而是使用数组实现,效率比较高。为什么可以用数组实现呢?因为完全二叉树的性质, 可以用下标之间的关系表示节点之间的关系,MaxHeap的docstring中已经说明了
class MaxHeap: """ Heaps: 完全二叉树,最大堆的非叶子节点的值都比孩子大,最小堆的非叶子结点的值都比孩子小 Heap包含两个属性,order property 和 shape property(a complete binary tree),在插入 一个新节点的时候,始终要保持这两个属性 插入操作:保持堆属性和完全二叉树属性, sift-up 操作维持堆属性 extract操作:只获取根节点数据,并把树最底层最右节点copy到根节点后,sift-down操作维持堆属性 用数组实现heap,从根节点开始,从上往下从左到右给每个节点编号,则根据完全二叉树的 性质,给定一个节点i, 其父亲和孩子节点的编号分别是: parent = (i-1) // 2 left = 2 * i + 1 rgiht = 2 * i + 2 使用数组实现堆一方面效率更高,节省树节点的内存占用,一方面还可以避免复杂的指针操作,减少 调试难度。 """ def __init__(self, maxSize): self._elements = Array(maxSize) # 第二章实现的Array ADT self._count = 0 def __len__(self): return self._count def capacity(self): return len(self._elements) def add(self, value): assert self._count < self.capacity(), 'can not add to full heap' self._elements[self._count] = value self._count += 1 self._siftUp(self._count - 1) self.assert_keep_heap() # 确定每一步add操作都保持堆属性 def extract(self): assert self._count > 0, 'can not extract from an empty heap' value = self._elements[0] # save root value self._count -= 1 self._elements[0] = self._elements[self._count] # 最右下的节点放到root后siftDown self._siftDown(0) self.assert_keep_heap() return value def _siftUp(self, ndx): if ndx > 0: parent = (ndx - 1) // 2 # print(ndx, parent) if self._elements[ndx] > self._elements[parent]: # swap self._elements[ndx], self._elements[parent] = self._elements[parent], self._elements[ndx] self._siftUp(parent) # 递归 def _siftDown(self, ndx): left = 2 * ndx + 1 right = 2 * ndx + 2 # determine which node contains the larger value largest = ndx if (left < self._count and self._elements[left] >= self._elements[largest] and self._elements[left] >= self._elements[right]): # 原书这个地方没写实际上找的未必是largest largest = left elif right < self._count and self._elements[right] >= self._elements[largest]: largest = right if largest != ndx: self._elements[ndx], self._elements[largest] = self._elements[largest], self._elements[ndx] self._siftDown(largest) def __repr__(self): return ' '.join(map(str, self._elements)) def assert_keep_heap(self): """ 我加了这个函数是用来验证每次add或者extract之后,仍保持最大堆的性质""" _len = len(self) for i in range(0, int((_len-1)/2)): # 内部节点(非叶子结点) l = 2 * i + 1 r = 2 * i + 2 if l < _len and r < _len: assert self._elements[i] >= self._elements[l] and self._elements[i] >= self._elements[r] def test_MaxHeap(): """ 最大堆实现的单元测试用例 """ _len = 10 h = MaxHeap(_len) for i in range(_len): h.add(i) h.assert_keep_heap() for i in range(_len): # 确定每次出来的都是最大的数字,添加的时候是从小到大添加的 assert h.extract() == _len-i-1 test_MaxHeap() def simpleHeapSort(theSeq): """ 用自己实现的MaxHeap实现堆排序,直接修改原数组实现inplace排序""" if not theSeq: return theSeq _len = len(theSeq) heap = MaxHeap(_len) for i in theSeq: heap.add(i) for i in reversed(range(_len)): theSeq[i] = heap.extract() return theSeq def test_simpleHeapSort(): """ 用一些测试用例证明实现的堆排序是可以工作的 """ def _is_sorted(seq): for i in range(len(seq)-1): if seq[i] > seq[i+1]: return False return True from random import randint assert simpleHeapSort([]) == [] for i in range(1000): _len = randint(1, 100) to_sort = [] for i in range(_len): to_sort.append(randint(0, 100)) simpleHeapSort(to_sort) # 注意这里用了原地排序,直接更改了数组 assert _is_sorted(to_sort) test_simpleHeapSort()
14章: Search Trees
二叉差找树性质:对每个内部节点V, 1. 所有key小于V.key的存储在V的左子树。 2. 所有key大于V.key的存储在V的右子树 对BST进行中序遍历会得到升序的key序列
class _BSTMapNode: __slots__ = ('key', 'value', 'left', 'right') def __init__(self, key, value): self.key = key self.value = value self.left = None self.right = None def __repr__(self): return '<{}:{}> left:{}, right:{}'.format( self.key, self.value, self.left, self.right) __str__ = __repr__ class BSTMap: """ BST,树节点包含key可payload。用BST来实现之前用hash实现过的Map ADT. 性质:对每个内部节点V, 1.对于节点V,所有key小于V.key的存储在V的左子树。 2.所有key大于V.key的存储在V的右子树 对BST进行中序遍历会得到升序的key序列 """ def __init__(self): self._root = None self._size = 0 self._rval = None # 作为remove的返回值 def __len__(self): return self._size def __iter__(self): return _BSTMapIterator(self._root, self._size) def __contains__(self, key): return self._bstSearch(self._root, key) is not None def valueOf(self, key): node = self._bstSearch(self._root, key) assert node is not None, 'Invalid map key.' return node.value def _bstSearch(self, subtree, target): if subtree is None: # 递归出口,遍历到树底没有找到key或是空树 return None elif target < subtree.key: return self._bstSearch(subtree.left, target) elif target > subtree.key: return self._bstSearch(subtree.right, target) return subtree # 返回引用 def _bstMinumum(self, subtree): """ 顺着树一直往左下角递归找就是最小的,向右下角递归就是最大的 """ if subtree is None: return None elif subtree.left is None: return subtree else: return subtree._bstMinumum(self, subtree.left) def add(self, key, value): """ 添加或者替代一个key的value, O(N) """ node = self._bstSearch(self._root, key) if node is not None: # if key already exists, update value node.value = value return False else: # insert a new entry self._root = self._bstInsert(self._root, key, value) self._size += 1 return True def _bstInsert(self, subtree, key, value): """ 新的节点总是插入在树的叶子结点上 """ if subtree is None: subtree = _BSTMapNode(key, value) elif key < subtree.key: subtree.left = self._bstInsert(subtree.left, key, value) elif key > subtree.key: subtree.right = self._bstInsert(subtree.right, key, value) # 注意这里没有else语句了,应为在被调用处add函数里先判断了是否有重复key return subtree def remove(self, key): """ O(N) 被删除的节点分为三种: 1.叶子结点:直接把其父亲指向该节点的指针置None 2.该节点有一个孩子: 删除该节点后,父亲指向一个合适的该节点的孩子 3.该节点有俩孩子: (1)找到要删除节点N和其后继S(中序遍历后该节点下一个) (2)复制S的key到N (3)从N的右子树中删除后继S(即在N的右子树中最小的) """ assert key in self, 'invalid map key' self._root = self._bstRemove(self._root, key) self._size -= 1 return self._rval def _bstRemove(self, subtree, target): # search for the item in the tree if subtree is None: return subtree elif target < subtree.key: subtree.left = self._bstRemove(subtree.left, target) return subtree elif target > subtree.key: subtree.right = self._bstRemove(subtree.right, target) return subtree else: # found the node containing the item self._rval = subtree.value if subtree.left is None and subtree.right is None: # 叶子node return None elif subtree.left is None or subtree.right is None: # 有一个孩子节点 if subtree.left is not None: return subtree.left else: return subtree.right else: # 有俩孩子节点 successor = self._bstMinumum(subtree.right) subtree.key = successor.key subtree.value = successor.value subtree.right = self._bstRemove(subtree.right, successor.key) return subtree def __repr__(self): return '->'.join([str(i) for i in self]) def assert_keep_bst_property(self, subtree): """ 写这个函数为了验证add和delete操作始终维持了bst的性质 """ if subtree is None: return if subtree.left is not None and subtree.right is not None: assert subtree.left.value <= subtree.value assert subtree.right.value >= subtree.value self.assert_keep_bst_property(subtree.left) self.assert_keep_bst_property(subtree.right) elif subtree.left is None and subtree.right is not None: assert subtree.right.value >= subtree.value self.assert_keep_bst_property(subtree.right) elif subtree.left is not None and subtree.right is None: assert subtree.left.value <= subtree.value self.assert_keep_bst_property(subtree.left) class _BSTMapIterator: def __init__(self, root, size): self._theKeys = Array(size) self._curItem = 0 self._bstTraversal(root) self._curItem = 0 def __iter__(self): return self def __next__(self): if self._curItem < len(self._theKeys): key = self._theKeys[self._curItem] self._curItem += 1 return key else: raise StopIteration def _bstTraversal(self, subtree): if subtree is not None: self._bstTraversal(subtree.left) self._theKeys[self._curItem] = subtree.key self._curItem += 1 self._bstTraversal(subtree.right) def test_BSTMap(): l = [60, 25, 100, 35, 17, 80] bst = BSTMap() for i in l: bst.add(i) def test_HashMap(): """ 之前用来测试用hash实现的map,改为用BST实现的Map测试 """ # h = HashMap() h = BSTMap() assert len(h) == 0 h.add('a', 'a') assert h.valueOf('a') == 'a' assert len(h) == 1 a_v = h.remove('a') assert a_v == 'a' assert len(h) == 0 h.add('a', 'a') h.add('b', 'b') assert len(h) == 2 assert h.valueOf('b') == 'b' b_v = h.remove('b') assert b_v == 'b' assert len(h) == 1 h.remove('a') assert len(h) == 0 _len = 10 for i in range(_len): h.add(str(i), i) assert len(h) == _len for i in range(_len): assert str(i) in h for i in range(_len): print(len(h)) print('bef', h) _ = h.remove(str(i)) assert _ == i print('aft', h) print(len(h)) assert len(h) == 0 test_HashMap()