C语言数据结构【堆】动态模拟实现，堆排序，TOP-K问题

引言

堆的各个接口的实现（以代码注释为主），实现堆排序，解决经典问题：TOP-K问题

一、堆的概念与结构

堆具有以下性质

• 堆中某个结点的值总是不大于或不小于其父结点的值；

• 堆总是一棵完全二叉树。

二、堆的模拟实现

堆底层结构用数组来实现

注意：跟结点的下标为0

分三个文件来写：

Heap.h //实现堆需要的头文件，函数的声明
Heap.c //实现堆的各个接口的代码
test.c //测式堆接口的代码

1.在Heap.h中定义堆的结构和声明要实现的函数：

#pragma once
#include<stdio.h>
#include<stdlib.h>
#include<assert.h>
#include<stdbool.h>

//堆的结构
typedef int HPDataType;
typedef struct Heap
{
	HPDataType* arr;  //数组
	int size;         //有效元素个数
	int capacity;     //数组的容量
}HP;

void HPInit(HP* php);  //初始化堆

void Swap(int* x, int* y); //交换两个元素

void HPDestroy(HP* php);   //堆的销毁

void HPPrint(HP* php);     //打印堆数组

void HPPush(HP* php, HPDataType x); //往堆中插入元素
void HPPop(HP* php);    //删除堆顶元素
//取堆顶元素

HPDataType HPTop(HP* php);

//判空
bool HPEmpty(HP* php);

//向下调整算法,时间复杂度：O(n)
void AdjustDown(HPDataType* arr, int parent, int n);

//向上调整算法,时间复杂度：O（n*logn）
void AdjustUp(HPDataType* arr, int child);

2.堆的初始化和堆的销毁

//初始化堆
void HPInit(HP* php)
{
	php->arr = NULL;   //先将数组的地址指向NULL
	php->size = php->capacity = 0;  
}
//堆的销毁
void HPDestroy(HP* php)
{
	if (php->arr)
		free(php->arr); //将数组给释放掉
	php->arr = NULL;
	php->size = php->capacity = 0;
}

3.向上调整算法和在堆中插入元素

void Swap(int* x, int* y) //实现两个元素的交换
{
	int tmp = *x;
	*x = *y;
	*y = tmp;
}
//向上调整算法,时间复杂度：O（n*logn）
//从插入的叶子结点向上调整
void AdjustUp(HPDataType* arr, int child)
{
	int parent = (child - 1) / 2; //找到父节点
	while (child > 0)
	{
		//大堆：>
		//小堆：<
		if (arr[child] > arr[parent]) //这里创建大根堆
		{
			Swap(&arr[child], &arr[parent]); //孩子结点大于父亲结点，不符合大根堆，要交换
			child = parent;     //将父节点设置为孩子结点，重复上面的步骤
			parent = (child - 1) / 2; 

		}
		else
		{
			break;
		}
	}
}
void HPPush(HP* php, HPDataType x)
{
	assert(php);
	//判断空间是否足够
	if (php->size == php->capacity)
	{
		int newCapacity = php->capacity == 0 ? 4 : 2 * php->capacity;
		HPDataType* tmp = (HPDataType*)realloc(php->arr, newCapacity * sizeof(HPDataType));
		if (tmp == NULL)
		{
			perror("realloc fail!");
			exit(1);
		}
		php->arr = tmp;
		php->capacity = newCapacity;
	}
	//插入新结点
	php->arr[php->size] = x;
	AdjustUp(php->arr, php->size);//将插入的结点调整成符合堆的位置
	++php->size; //有效元素加1
}

4.打印堆元素和判空

void HPPrint(HP* php)
{
	for (int i = 0; i < php->size; i++)
	{
		printf("%d ", php->arr[i]);
	}
	printf("\n");
}

//判空
bool HPEmpty(HP* php)
{
	assert(php);
	return php->size == 0;
}

5.向下调整算法和删除堆顶元素

//向下调整算法,时间复杂度：O(n)
void AdjustDown(HPDataType* arr, int parent, int n)
{
	int child = parent * 2 + 1; //左孩子
	while (child < n)
	{
		//大堆：<
		//小堆：>
		if (child + 1 < n && arr[child] < arr[child + 1]) child++;
		//大堆：>
		//小堆:<
		if (arr[child] > arr[parent])
		{
			Swap(&arr[child], &arr[parent]);
			parent = child;
			child = parent * 2 + 1; //还是左孩子
		}
		else
		{
			break;
		}
	}
}

void HPPop(HP* php)
{
	assert(!HPEmpty(php));
	//0 php->size - 1
	Swap(&php->arr[0], &php->arr[php->size - 1]); //交换根节点和最后一个结点
	php->size--;
	AdjustDown(php->arr, 0, php->size);
}

6.取堆顶元素

//取堆顶数据
HPDataType HPTop(HP* php)
{
	assert(!HPEmpty(php));
	return php->arr[0];
}

三、所以代码：

1.Heap.h

#pragma once
#include<stdio.h>
#include<stdlib.h>
#include<assert.h>
#include<stdbool.h>

//堆的结构
typedef int HPDataType;
typedef struct Heap
{
	HPDataType* arr;  //数组
	int size;         //有效元素个数
	int capacity;     //数组的容量
}HP;

void HPInit(HP* php);  //初始化堆

void Swap(int* x, int* y); //交换两个元素

void HPDestroy(HP* php);   //堆的销毁

void HPPrint(HP* php);     //打印堆数组

void HPPush(HP* php, HPDataType x); //往堆中插入元素
void HPPop(HP* php);    //删除堆顶元素
//取堆顶元素

HPDataType HPTop(HP* php);

//判空
bool HPEmpty(HP* php);

//向下调整算法,时间复杂度：O(n)
void AdjustDown(HPDataType* arr, int parent, int n);

//向上调整算法,时间复杂度：O（n*logn）
void AdjustUp(HPDataType* arr, int child);

2.Heap.c

#define _CRT_SECURE_NO_WARNINGS
#include"Heap.h"

//初始化堆
void HPInit(HP* php)
{
	php->arr = NULL;   //先将数组的地址指向NULL
	php->size = php->capacity = 0;  
}
//堆的销毁
void HPDestroy(HP* php)
{
	if (php->arr)
		free(php->arr); //将数组给释放掉
	php->arr = NULL;
	php->size = php->capacity = 0;
}

void HPPrint(HP* php)
{
	for (int i = 0; i < php->size; i++)
	{
		printf("%d ", php->arr[i]);
	}
	printf("\n");
}
void Swap(int* x, int* y)
{
	int tmp = *x;
	*x = *y;
	*y = tmp;
}
//向上调整算法,时间复杂度：O（n*logn）
//从插入的叶子结点向上调整
void AdjustUp(HPDataType* arr, int child)
{
	int parent = (child - 1) / 2; //找到父节点
	while (child > 0)
	{
		//大堆：>
		//小堆：<
		if (arr[child] > arr[parent]) //这里创建大根堆
		{
			Swap(&arr[child], &arr[parent]); //孩子结点大于父亲结点，不符合大根堆，要交换
			child = parent;     //将父节点设置为孩子结点，重复上面的步骤
			parent = (child - 1) / 2; 

		}
		else
		{
			break;
		}
	}
}
void HPPush(HP* php, HPDataType x)
{
	assert(php);
	//判断空间是否足够
	if (php->size == php->capacity)
	{
		int newCapacity = php->capacity == 0 ? 4 : 2 * php->capacity;
		HPDataType* tmp = (HPDataType*)realloc(php->arr, newCapacity * sizeof(HPDataType));
		if (tmp == NULL)
		{
			perror("realloc fail!");
			exit(1);
		}
		php->arr = tmp;
		php->capacity = newCapacity;
	}
	//插入新结点
	php->arr[php->size] = x;
	AdjustUp(php->arr, php->size);//将插入的结点调整成符合堆的位置
	++php->size; //有效元素加1
}
//判空
bool HPEmpty(HP* php)
{
	assert(php);
	return php->size == 0;
}

//向下调整算法,时间复杂度：O(n)
void AdjustDown(HPDataType* arr, int parent, int n)
{
	int child = parent * 2 + 1; //左孩子
	while (child < n)
	{
		//大堆：<
		//小堆：>
		if (child + 1 < n && arr[child] < arr[child + 1]) child++;
		//大堆：>
		//小堆:<
		if (arr[child] > arr[parent])
		{
			Swap(&arr[child], &arr[parent]);
			parent = child;
			child = parent * 2 + 1; //还是左孩子
		}
		else
		{
			break;
		}
	}
}

void HPPop(HP* php)
{
	assert(!HPEmpty(php));
	//0 php->size - 1
	Swap(&php->arr[0], &php->arr[php->size - 1]); //交换根节点和最后一个结点
	php->size--;
	AdjustDown(php->arr, 0, php->size);
}

//取堆顶数据
HPDataType HPTop(HP* php)
{
	assert(!HPEmpty(php));
	return php->arr[0];
}

3.test.c中的代码

#include"Heap.h"

void test01()
{
	HP hp;
	HPInit(&hp);
	HPPush(&hp, 10);
	HPPush(&hp, 34);
	HPPush(&hp, 34);
	HPPush(&hp, 30);
	HPPush(&hp, 70);
	HPPush(&hp, 26);
	HPPrint(&hp);
	HPPop(&hp);
	HPPrint(&hp);
	HPPop(&hp);
	HPPrint(&hp);
	HPPop(&hp);
	HPPrint(&hp);
	HPPop(&hp);
	HPPrint(&hp);
	HPDestroy(&hp);
	
}
void test02()
{
	HP hp;
	HPInit(&hp);
	HPPush(&hp, 10);
	HPPush(&hp, 34);
	HPPush(&hp, 60);
	HPPush(&hp, 30);
	HPPush(&hp, 70);
	HPPush(&hp, 26);
	HPPrint(&hp);
	while (!HPEmpty(&hp))
	{
		int top = HPTop(&hp);
		printf("%d ", top);
		HPPop(&hp);
	}
}
int main()
{
	//test01();
	//test02();
	return 0;
}

四、堆的应用

（这里只是举例了两个例子）

1、堆排序

故名：是一种实现排序的算法

有一种想法是借用堆这种数据结构实现排序，但是这是假的堆排序。

//借用堆这种数据结构（假的堆排序）
//思想：先将数组中的元素存储在堆中，每次取堆顶元素放入数组中
// 升序：大根堆
// 降序：小根堆
void HeapSort1(int* arr, int n)
{
	HP hp; // 借用堆这种数据结构（假的堆排序）
	HPInit(&hp);
	for (int i = 0; i < n; i++)
	{
		HPPush(&hp, arr[i]);
	}
	int i = 0;
	while (!HPEmpty(&hp))
	{
		int top = HPTop(&hp);
		arr[i++] = top;
		HPPop(&hp);
	}
	HPDestroy(&hp);
}

真正的堆排序：

1.先根据数组，建堆
2.将堆顶元素和最后一个元素交换，size--后剩下的元素向下调整建堆，重复该过程
升序：建大堆
降序：建小堆

//1.先根据数组，建堆
//2.将堆顶元素和最后一个元素交换，size--后剩下的元素向下调整建堆，重复该过程
//升序：建大堆
//降序：建小堆
void HeapSort(int* arr, int n)
{
	//建堆：用向下调整算法（时间复杂度素数O(n)）（向上调整算法时间复杂度是O（n*logn））
	for (int i = (n - 1 - 1) / 2; i >= 0; i--)//先找到每个元素对应的根结点
	{
		AdjustDown(arr, i, n);
	}
	//堆排序
	int end = n - 1;
	while (end > 0)
	{
		Swap(&arr[0], &arr[end]);
		end--;
		AdjustDown(arr, 0, end);
	}

}

2、TOP-K问题

TOP-K问题：即求数据集合中前K个最大的元素或者最小的元素，⼀般情况下数据量都比较大。

比如：专业前10名、世界500强、富豪榜、游戏中前100的活跃玩家等。

对于Top-K问题，能想到的最简单直接的方式就是排序，但是：如果数据量非常大，排序就不太可取了 (可能数据都不能一下全部加载到内存中)。

最佳的方式就是用堆来解决，基本思路如下：

1）用数据集合中前K个元素来建堆

求前k个最大的元素，则建小堆

求前k个最小的元素，则建大堆

2）用剩余的N-K个元素依次与堆顶元素来比较，不满足（比如是小堆，N-K中取出的元素比小堆中的堆顶元素大，就和堆顶元素交换，交换完后调整成小堆，重复次操作）则替换堆顶元素，将剩余N-K个元素依次与堆顶元素比较完之后，堆中剩余的K个元素就是所求的前K个最小或者最大的元素

2.1创造数据

这时你可能会问：“大量数据不能一下加载到内存中，那么怎么存储数据呢？”

回答：“存到文件中”

void CreateNDate()
{
	//造数据
	int n = 1000000;
	srand(time(0));
	const char* file = "data.txt"; //文件名
	FILE* fin = fopen(file, "w");  //打开文件
	if (fin == NULL)
	{
		perror("fopen error!");
		exit(1);
	}
	for (int i = 0; i < n; i++)
	{
		int x = (rand() + i) % 1000000;
		fprintf(fin, "%d\n", x);   //写入数据
	}
	fclose(fin);
}

里面生成1000000个数据

2.2对这些数据进行排序

先在一大堆数据中取k个数据，（这里以找前k大的数据来说）建小跟堆，然后取其他数据与堆顶进行比较，比堆顶大的就和堆顶交换，重复这个步骤，最后，堆里面的数据就是前k大的数据了。

看代码注释为主

void TopK()
{
	int k = 0;
	printf("请输入k: ");
	scanf("%d", &k);

	const char* file = "data.txt";
	FILE* fout = fopen(file, "r");
	if (fout == NULL)
	{
		perror("fopen fail!");
		exit(1);
	}
	//先申请个数组存k个数据
	//找最大的前K个数据，建小堆
	int* minHeap = (int*)malloc(sizeof(int) * k);
	if (minHeap == NULL)
	{
		perror("malloc fail!");
		exit(2);
	}
	for (int i = 0; i < k; i++)
	{
		fscanf(fout, "%d", &minHeap[i]); //取k个数据用来建堆
	}
	//minHeap----向下
	for (int i = (k - 1 - 1) / 2; i >= 0; i--)
	{
		AdjustDown(minHeap, i, k);
	}
	//遍历剩下的n-k个数据，跟堆顶比较，比堆顶大的就和堆顶替换
	//最后堆里面剩下的就是前k个最大的数据了
	int x = 0;
	while (fscanf(fout, "%d", &x) != EOF)
	{
		if (x > minHeap[0]) //和堆顶元素比较
		{
			minHeap[0] = x;
			AdjustDown(minHeap, 0, k);
		}
	}
	for (int i = 0; i < k; i++)
	{
		printf("%d ", minHeap[i]);
	}
}