*说明
如果需要用到这些知识却没有掌握,则会让人感到沮丧,也可能导致面试被拒。无论是花几天时间“突击”,还是利用零碎的时间持续学习,在数据结构上下点功夫都是值得的。那么Python 中有哪些数据结构呢?列表、字典、集合,还有……栈?Python 有栈吗?本系列文章将给出详细拼图。
第5章:Searching 和 Sorting
排序和查找是最基础和频繁的操作,python内置了in操作符和bisect二分操作模块实现查找,内置了sorted方法来实现排序操作。二分和快排也是面试中经常考到的,本章讲的是基本的排序和查找。
def binary_search(sorted_seq, val): """ 实现标准库中的bisect.bisect_left """ low = 0 high = len(sorted_seq) - 1 while low <= high: mid = (high + low) // 2 if sorted_seq[mid] == val: return mid elif val < sorted_seq[mid]: high = mid - 1 else: low = mid + 1 return low def bubble_sort(seq): # O(n^2), n(n-1)/2 = 1/2(n^2 + n) n = len(seq) for i in range(n-1): for j in range(n-1-i): # 这里之所以 n-1 还需要 减去 i 是因为每一轮冒泡最大的元素都会冒泡到最后,无需再比较 if seq[j] > seq[j+1]: seq[j], seq[j+1] = seq[j+1], seq[j] def select_sort(seq): """可以看作是冒泡的改进,每次找一个最小的元素交换,每一轮只需要交换一次""" n = len(seq) for i in range(n-1): min_idx = i # assume the ith element is the smallest for j in range(i+1, n): if seq[j] < seq[min_idx]: # find the minist element index min_idx = j if min_idx != i: # swap seq[i], seq[min_idx] = seq[min_idx], seq[i] def insertion_sort(seq): """ 每次挑选下一个元素插入已经排序的数组中,初始时已排序数组只有一个元素""" n = len(seq) for i in range(1, n): value = seq[i] # save the value to be positioned # find the position where value fits in the ordered part of the list pos = i while pos > 0 and value < seq[pos-1]: # Shift the items to the right during the search seq[pos] = seq[pos-1] pos -= 1 seq[pos] = value def merge_sorted_list(listA, listB): """ 归并两个有序数组 """ new_list = list() a = b = 0 while a < len(listA) and b < len(listB): if listA[a] < listB[b]: new_list.append(listA[a]) a += 1 else: new_list.append(listB[b]) b += 1 while a < len(listA): new_list.append(listA[a]) a += 1 while b < len(listB): new_list.append(listB[b]) b += 1 return new_list
第6章: Linked Structure
list是最常用的数据结构,但是list在中间增减元素的时候效率会很低,这时候linked list会更适合,缺点就是获取元素的平均时间复杂度变成了O(n)
# 单链表实现 class ListNode: def __init__(self, data): self.data = data self.next = None def travsersal(head, callback): curNode = head while curNode is not None: callback(curNode.data) curNode = curNode.next def unorderdSearch(head, target): curNode = head while curNode is not None and curNode.data != target: curNode = curNode.next return curNode is not None # Given the head pointer, prepend an item to an unsorted linked list. def prepend(head, item): newNode = ListNode(item) newNode.next = head head = newNode # Given the head reference, remove a target from a linked list def remove(head, target): predNode = None curNode = head while curNode is not None and curNode.data != target: # 寻找目标 predNode = curNode curNode = curNode.data if curNode is not None: if curNode is head: head = curNode.next else: predNode.next = curNode.next
第7章:Stacks
栈也是计算机里用得比较多的数据结构,栈是一种后进先出的数据结构,可以理解为往一个桶里放盘子,先放进去的会被压在地下,拿盘子的时候,后放的会被先拿出来。
class Stack: """ Stack ADT, using a python list Stack() isEmpty() length() pop(): assert not empty peek(): assert not empty, return top of non-empty stack without removing it push(item) """ def __init__(self): self._items = list() def isEmpty(self): return len(self) == 0 def __len__(self): return len(self._items) def peek(self): assert not self.isEmpty() return self._items[-1] def pop(self): assert not self.isEmpty() return self._items.pop() def push(self, item): self._items.append(item) class Stack: """ Stack ADT, use linked list 使用list实现很简单,但是如果涉及大量push操作,list的空间不够时复杂度退化到O(n) 而linked list可以保证最坏情况下仍是O(1) """ def __init__(self): self._top = None # top节点, _StackNode or None self._size = 0 # int def isEmpty(self): return self._top is None def __len__(self): return self._size def peek(self): assert not self.isEmpty() return self._top.item def pop(self): assert not self.isEmpty() node = self._top self.top = self._top.next self._size -= 1 return node.item def _push(self, item): self._top = _StackNode(item, self._top) self._size += 1 class _StackNode: def __init__(self, item, link): self.item = item self.next = link
第8章:Queues
队列也是经常使用的数据结构,比如发送消息等,celery可以使用redis提供的list实现消息队列。 本章我们用list和linked list来实现队列和优先级队列。
class Queue: """ Queue ADT, use list。list实现,简单但是push和pop效率最差是O(n) Queue() isEmpty() length() enqueue(item) dequeue() """ def __init__(self): self._qList = list() def isEmpty(self): return len(self) == 0 def __len__(self): return len(self._qList) def enquue(self, item): self._qList.append(item) def dequeue(self): assert not self.isEmpty() return self._qList.pop(0) from array import Array # Array那一章实现的Array ADT class Queue: """ circular Array ,通过头尾指针实现。list内置append和pop复杂度会退化,使用 环数组实现可以使得入队出队操作时间复杂度为O(1),缺点是数组长度需要固定。 """ def __init__(self, maxSize): self._count = 0 self._front = 0 self._back = maxSize - 1 self._qArray = Array(maxSize) def isEmpty(self): return self._count == 0 def isFull(self): return self._count == len(self._qArray) def __len__(self): return len(self._count) def enqueue(self, item): assert not self.isFull() maxSize = len(self._qArray) self._back = (self._back + 1) % maxSize # 移动尾指针 self._qArray[self._back] = item self._count += 1 def dequeue(self): assert not self.isFull() item = self._qArray[self._front] maxSize = len(self._qArray) self._front = (self._front + 1) % maxSize self._count -= 1 return item class _QueueNode: def __init__(self, item): self.item = item class Queue: """ Queue ADT, linked list 实现。为了改进环型数组有最大数量的限制,改用 带有头尾节点的linked list实现。 """ def __init__(self): self._qhead = None self._qtail = None self._qsize = 0 def isEmpty(self): return self._qhead is None def __len__(self): return self._count def enqueue(self, item): node = _QueueNode(item) # 创建新的节点并用尾节点指向他 if self.isEmpty(): self._qhead = node else: self._qtail.next = node self._qtail = node self._qcount += 1 def dequeue(self): assert not self.isEmpty(), 'Can not dequeue from an empty queue' node = self._qhead if self._qhead is self._qtail: self._qtail = None self._qhead = self._qhead.next # 前移头节点 self._count -= 1 return node.item class UnboundedPriorityQueue: """ PriorityQueue ADT: 给每个item加上优先级p,高优先级先dequeue 分为两种: - bounded PriorityQueue: 限制优先级在一个区间[0...p) - unbounded PriorityQueue: 不限制优先级 PriorityQueue() BPriorityQueue(numLevels): create a bounded PriorityQueue with priority in range [0, numLevels-1] isEmpty() length() enqueue(item, priority): 如果是bounded PriorityQueue, priority必须在区间内 dequeue(): 最高优先级的出队,同优先级的按照FIFO顺序 - 两种实现方式: 1.入队的时候都是到队尾,出队操作找到最高优先级的出队,出队操作O(n) 2.始终维持队列有序,每次入队都找到该插入的位置,出队操作是O(1) (注意如果用list实现list.append和pop操作复杂度会因内存分配退化) """ from collections import namedtuple _PriorityQEntry = namedtuple('_PriorityQEntry', 'item, priority') # 采用方式1,用内置list实现unbounded PriorityQueue def __init__(self): self._qlist = list() def isEmpty(self): return len(self) == 0 def __len__(self): return len(self._qlist) def enqueue(self, item, priority): entry = UnboundedPriorityQueue._PriorityQEntry(item, priority) self._qlist.append(entry) def deque(self): assert not self.isEmpty(), 'can not deque from an empty queue' highest = self._qlist[0].priority for i in range(len(self)): # 出队操作O(n),遍历找到最高优先级 if self._qlist[i].priority < highest: highest = self._qlist[i].priority entry = self._qlist.pop(highest) return entry.item class BoundedPriorityQueue: """ BoundedPriorityQueue ADT,用linked list实现。上一个地方提到了 BoundedPriorityQueue 但是为什么需要 BoundedPriorityQueue呢? BoundedPriorityQueue 的优先级限制在[0, maxPriority-1] 对于 UnboundedPriorityQueue,出队操作由于要遍历寻找优先级最高的item,所以平均 是O(n)的操作,但是对于 BoundedPriorityQueue,用队列数组实现可以达到常量时间, 用空间换时间。比如要弹出一个元素,直接找到第一个非空队列弹出 元素就可以了。 (小数字代表高优先级,先出队) qlist [0] -> ["white"] [1] [2] -> ["black", "green"] [3] -> ["purple", "yellow"] """ # Implementation of the bounded Priority Queue ADT using an array of # # queues in which the queues are implemented using a linked list. from array import Array # 第二章定义的ADT def __init__(self, numLevels): self._qSize = 0 self._qLevels = Array(numLevels) for i in range(numLevels): self._qLevels[i] = Queue() # 上一节讲到用linked list实现的Queue def isEmpty(self): return len(self) == 0 def __len__(self): return len(self._qSize) def enqueue(self, item, priority): assert priority >= 0 and priority < len(self._qLevels), 'invalid priority' self._qLevel[priority].enquue(item) # 直接找到 priority 对应的槽入队 def deque(self): assert not self.isEmpty(), 'can not deque from an empty queue' i = 0 p = len(self._qLevels) while i < p and not self._qLevels[i].isEmpty(): # 找到第一个非空队列 i += 1 return self._qLevels[i].dequeue()