外排序介绍
外排序是指能够处理极大量数据的排序算法。通常来说,外排序处理的数据不能一次装入内存,只能放在读写较慢的外存储器(通常是硬盘)上。外排序通常采用的是⼀种“排序-归并”的策略。在排序阶段,先读入能放在内存中的数据量,将其排序输出到⼀个临时文件,依此进行,将待排序数据组织为多个有序的临时文件。然后在归并阶段将这些临时文件组合为⼀个大的有序文件,也即排序结果。
跟外排序对应的就是内排序,之前常见的排序,都是内排序,这些排序思想适应的是数据在内存中,支持随机访问。归并排序的思想不需要随机访问数据,只需要依次按序列读取数据,所以归并排序既是⼀个内排序,也是⼀个外排序。
创建随机数据文件的代码
void CreatData()
{
int n = 100000;
srand((unsigned int)time(NULL));
const char* file = "data.txt";
FILE* fin = fopen(file, "w");
if (fin == NULL)
{
perror("foprn fail!");
return;
}
for (int i = 0; i < n; i++)
{
int x = rand() % n + 1;
fprintf(fin, "%d\n", x);
}
fclose(fin);
}
文件归并排序思路分析
- 读取n个值排序后写到
file1
,再读取n个值排序后写到file2
file1
和file2
利⽤归并排序的思想,依次读取比较,取小的尾插到mfile
,mfile
归并为⼀个有序文件- 将
file1
和file2
删掉,mfile
重命名为file1
- 再次读取n个数据排序后写到
file2
- 继续⾛
file1
和file2
归并,重复步骤2,直到文件中无法读出数据。最后归并出的有序数据放到了file1
中
文件归并排序代码实现
- 创建随机数据文件的代码
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
void CreatData()
{
int n = 100000;
srand((unsigned int)time(NULL));
const char* file = "data.txt";
FILE* fin = fopen(file, "w");
if (fin == NULL)
{
perror("foprn fail!");
return;
}
for (int i = 0; i < n; i++)
{
int x = rand()+i;
fprintf(fin, "%d\n", x);
}
fclose(fin);
}
- 取n个数据放入
file1
中,并进行排序
int compare(const void* a, const void* b)
{
return (*(int*)a - *(int*)b);
}
int ReadNDataSortToFile(FILE* fout, int n, const char* file1)
{
int x = 0;
int* a = (int*)malloc(sizeof(int) * n);
if (a == NULL)
{
perror("malloc fail!");
return 0;
}
int j = 0;
for (int i = 0; i < n; i++)
{
if (fscanf(fout, "%d", &x) == EOF)
break;
a[j++] = x;
}
if (j == 0)
{
free(a);
return 0;
}
//排序
qsort(a, j, sizeof(int), compare);
//写入file1
FILE* fin = fopen(file1, "w");
if (fin == NULL)
{
free(a);
perror("fopen fail!");
return 0;
}
for (int i = 0; i < j; i++)
{
fprintf(fin, "%d\n", a[i]);
}
free(a);
fclose(fin);
return j;
}
- 将
file1
和file2
合并成mfile
void MergeFile(const char* file1, const char* file2, const char* mfile)
{
FILE* fout1 = fopen(file1, "r");
if (fout1 == NULL)
{
perror("fopen fail!");
return;
}
FILE* fout2 = fopen(file2, "r");
if (fout1 == NULL)
{
perror("fopen fail!");
return;
}
FILE* fin = fopen(mfile, "w");
if (fin == NULL)
{
perror("fopen fail!");
return;
}
int x1 = 0, x2 = 0;
int ret1 = fscanf(fout1,"%d",&x1);
int ret2= fscanf(fout2, "%d", &x2);
while (ret1 != EOF && ret2 != EOF)
{
if (x1 < x2)
{
fprintf(fin,"%d\n",x1);
ret1 = fscanf(fout1, "%d", &x1);
}
else
{
fprintf(fin, "%d\n", x2);
ret2 = fscanf(fout2, "%d", &x2);
}
}
while (ret1 != EOF)
{
fprintf(fin, "%d\n", x1);
ret1 = fscanf(fout1, "%d", &x1);
}
while (ret2 != EOF)
{
fprintf(fin, "%d\n", x2);
ret2 = fscanf(fout2, "%d", &x2);
}
fclose(fout1);
fclose(fout2);
fclose(fin);
}
- 文件归并的主思路
int main()
{
CreatData();
const char* file1 = "file1.txt";
const char* file2 = "file2.txt";
const char* mfile = "mfile.txt";
FILE* fout = fopen("data.txt", "r");
if (fout == NULL)
{
perror("fopen fail!");
return 0;
}
int m = 1000;
ReadNDataSortToFile(fout, m, file1);
ReadNDataSortToFile(fout, m, file2);
while (1)
{
MergeFile(file1, file2, mfile);
//删除file1和file2
remove(file1);
remove(file2);
//将mfile改名为file1
rename(mfile,file1);
//若读不到数据则退出
if (ReadNDataSortToFile(fout, m, file2) == 0)
break;
}
return 0;
}