一、string 类的模拟实现

1.1 - string.h

#pragma once

#include <assert.h>
#include <string.h>
#include <iostream>

namespace yzz
{
	class string
	{
	public:
		/*-------- 构造函数和析构函数 --------*/
		// 默认构造函数
		string(const char* str = "")
		{
			assert(str);  // 前提是 str 非空
			_size = strlen(str);
			_capacity = _size;
			_str = new char[_capacity + 1];
			memcpy(_str, str, _size + 1);
		}

		// 拷贝构造函数（实现深拷贝）
		string(const string& s)
			: _size(s._size), _capacity(s._capacity), _str(new char[s._capacity + 1])
		{
			memcpy(_str, s._str, _size + 1);  
			// 注意：因为字符串和 string 类对象有所区别，所以此处不能使用 strcpy
		}

		// 析构函数
		~string()
		{
			if (_str)
			{
				delete[] _str;
				_str = nullptr;
				_size = _capacity = 0;
			}
		}

		/*-------- 赋值运算符重载（实现深拷贝）--------*/
		// 1. 传统写法
		/*string& operator=(const string& s)
		{
			if (this != &s)
			{
				delete[] _str;
				_str = new char[s._capacity + 1];
				memcpy(_str, s._str, s._size + 1);

				_size = s._size;
				_capacity = s._capacity;
			}
			return *this;
		}*/

		// 2. 现代写法
		void swap(string& s)
		{
			std::swap(_str, s._str);
			std::swap(_size, s._size);
			std::swap(_capacity, s._capacity);
		}

		/*string& operator=(const string& s)
		{
			if (this != &s)
			{
				string tmp(s);  // 利用上面写好的拷贝构造函数实现深拷贝
				swap(tmp);  // 注意：经过交换后，函数结束会析构 tmp
				// 正所谓 "兔死狗烹"、"鸟尽弓藏"、"过河拆桥"
			}
			return *this;
		}*/

		// 3. 现代写法的改良版
		string& operator=(string tmp)
		{
			swap(tmp);
			return *this;
		}

		/*-------- 容量操作 --------*/
		size_t size() const
		{
			return _size;
		}

		size_t capacity() const
		{
			return _capacity;
		}

		void clear()
		{
			_str[0] = '\0';
			_size = 0;
		}

		void reserve(size_t n = 0)
		{
			if (n > _capacity)
			{
				char* tmp = new char[n + 1];
				memcpy(tmp, _str, _size + 1);
				delete[] _str;
				_str = tmp;
				_capacity = n;
			}
		}

		void resize(size_t n, char ch = '\0')
		{
			if (n > _size)
			{
				if (n > _capacity)  // 考虑是否需要扩容
				{
					reserve(n);
				}

				for (size_t i = _size; i < n; ++i)
				{
					_str[i] = ch;
				}
			}

			_size = n;
			_str[_size] = '\0';
		}

		/*-------- 遍历及访问操作 --------*/
		char& operator[](size_t pos)
		{
			assert(pos < _size);  // 前提是 pos 合法
			return _str[pos];
		}

		const char& operator[](size_t pos) const
		{
			assert(pos < _size);  // 前提是 pos 合法
			return _str[pos];
		}

		typedef char* iterator;
		typedef const char* const_iterator;
		iterator begin()
		{
			return _str;
		}

		const_iterator begin() const
		{
			return _str;
		}

		iterator end()
		{
			return _str + _size;
		}

		const_iterator end() const
		{
			return _str + _size;
		}

		/*-------- 修改操作 --------*/
		void push_back(char ch)
		{
			if (_size == _capacity)  // 首先判断是否需要扩容
			{
				reserve(_capacity == 0 ? 5 : 2 * _capacity);  // 2 倍扩容
			}
			_str[_size++] = ch;
			_str[_size] = '\0';
		}

		void append(const char* str)
		{
			assert(str);  // 前提是 str 非空
			size_t len = strlen(str);
			if (_size + len > _capacity)  // 判断是否需要扩容
			{
				reserve(_size + len);  // 至少扩至 _size + len
			}
			memcpy(_str + _size, str, len + 1);
			_size += len;
		}

		string& operator+=(char ch)
		{
			push_back(ch);
			return *this;
		}

		string& operator+=(const char* str)
		{
			append(str);
			return *this;
		}

		void insert(size_t pos, size_t n, char ch)
		{
			assert(pos <= _size);  // 前提是 pos 合法（当 pos 等于 _size 时相当于尾插）
			if (_size + n > _capacity)  // 判断是否需要扩容
			{
				reserve(_size + n);  // 至少扩至 _size + n
			}
			// 注意：考虑 pos 为 0 的情况
			for (size_t end = _size; end != npos && end >= pos; --end)
			{
				_str[end + n] = _str[end];
			}
			for (size_t i = 0; i < n; ++i)
			{
				_str[pos + i] = ch;
			}
			_size += n;
		}

		void insert(size_t pos, const char* str)
		{
			assert(pos <= _size && str);  // 前提是 pos 合法且 str 非空
			size_t len = strlen(str);
			if (_size + len > _capacity)
			{
				reserve(_size + len);
			}

			for (size_t end = _size; end != npos && end >= pos; --end)
			{
				_str[end + len] = _str[end];
			}
			for (size_t i = 0; i < len; ++i)
			{
				_str[pos + i] = str[i];
			}
			_size += len;
		}

		void erase(size_t pos = 0, size_t len = npos)
		{
			assert(pos < _size);  // 前提是 pos 合法
			if (len == npos || pos + len - 1 >= _size - 1)
			{
				_size = pos;
				_str[_size] = '\0';
			}
			else
			{
				for (size_t i = pos + len; i <= _size; ++i)
				{
					_str[pos++] = _str[i];
				}
				_size -= len;
			}
		}

		/*-------- 字符串操作 --------*/
		const char* c_str() const
		{
			return _str;
		}

		size_t find(char ch, size_t pos = 0) const
		{
			assert(pos < _size);  // 前提是 pos 合法
			for (size_t i = pos; i < _size; ++i)
			{
				if (_str[i] == ch)
					return i;
			}
			return npos;
		}

		size_t find(const char* str, size_t pos = 0) const
		{
			assert(pos < _size&& str);  // 前提是 pos 合法且 str 非空
			const char* ptr = strstr(_str + pos, str);
			if (ptr)
				return ptr - _str;
			else
				return npos;
		}

		string substr(size_t pos = 0, size_t len = npos) const
		{
			assert(pos < _size);  // 前提是 pos 合法

			size_t n = len;
			if (len == npos || pos + len - 1 >= _size - 1)
			{
				n = _size - pos;
			}

			string tmp;
			tmp.reserve(n);
			for (size_t i = pos; i < pos + n; ++i)
			{
				tmp += _str[i];
			}
			return tmp;
		}

		/*-------- 关系运算符重载 --------*/
		bool operator==(const string& s) const
		{
			if (_size != s._size)
				return false;

			for (size_t i = 0; i < _size; ++i)
			{
				if (_str[i] != s._str[i])
					return false;
			}
			return true;
		}

		bool operator!=(const string& s) const
		{
			return !(*this == s);
		}

		bool operator<(const string& s) const
		{
			size_t i = 0, j = 0;
			while (i < _size && j < s._size)
			{
				if (_str[i] < s._str[j])
					return true;
				else if (_str[i] > s._str[j])
					return false;
				else
					++i, ++j;
			}

			return i == _size && j != s._size;
		}

		bool operator<=(const string& s) const
		{
			return *this < s || *this == s;
		}

		bool operator>(const string& s) const
		{
			return !(*this <= s);
		}

		bool operator>=(const string& s) const
		{
			return !(*this < s);
		}
	private:
		size_t _size;
		size_t _capacity;
		char* _str;
	public:
		static const size_t npos;
	};

	const size_t string::npos = -1;

	/*-------- 流插入（<<）和流提取（>>）运算符重载 --------*/
	std::ostream& operator<<(std::ostream& out, const string& s)
	{
		for (size_t i = 0; i < s.size(); ++i)
		{
			out << s[i];
		}
		return out;
	}

	std::istream& operator>>(std::istream& in, string& s)
	{
		s.clear();  // 清空 s 中的有效字符

		// 清除缓冲区中的空白字符
		char ch = in.get();
		while (ch == ' ' || ch == '\n' || ch == '\t')
		{
			ch = in.get();
		}
		
		char buf[128] = { 0 };
		int i = 0;
		while (ch != ' ' && ch != '\n' && ch != '\t')
		{
			buf[i++] = ch;
			if (i == 127)  // buf[127] 始终等于 '\0'
			{
				s += buf;
				i = 0;
			}
			ch = in.get();
		}
		if (i != 0)
		{
			buf[i] = '\0';
			s += buf;
		}

		return in;
	}
}

注意：

'\0' 是字符串的结束标志，因此在输出字符串时，遇到 '\0' 就停止输出了。

而对于 string 类对象，它的成员变量 _size 表示其有效字符个数，因此在输出 string 类对象时，即便遇到 '\0' 也不会停止输出。

示例：

#include <iostream>
#include <string>
using namespace std;

int main()
{
    // 输出字符串
    const char* str = "Hello\0World";
    cout << str << endl;  // Hello

    // 输出 string 类对象
    string s1("Hello");
    s1 += '\0';
    s1 += "World";
    cout << s1 << endl;  // HelloWorld
    cout << s1.c_str() << endl;  // Hello

    string s2(s1);
    cout << s2 << endl;  // HelloWorld
    cout << s2.c_str() << endl;  // Hello

    string s3;
    s3 = s1;
    cout << s3 << endl;  // HelloWorld
    cout << s3.c_str() << endl;  // Hello
    return 0;
}

1.2 - test.cpp

#define _CRT_SECURE_NO_WARNINGS 1

#include "string.h"
#include <iostream>
using namespace std;

void test_string1()
{
	yzz::string s1;
	cout << s1 << endl;  // 空

	yzz::string s2("Hello");
	s2 += '\0';
	s2 += "World";
	yzz::string s3(s2);
	cout << s2 << endl;  // HelloWorld
	cout << s2.c_str() << endl;  // Hello
	cout << s3 << endl;  // HelloWorld
	cout << s3.c_str() << endl;  // Hello

	yzz::string s4;
	cin >> s4;  // 假设输入 "abcdef"
	cout << s4 << endl;  // abcdef
	cout << s4.c_str() << endl;  // abcdef

	yzz::string s5;
	s5 = s2;
	cout << s5 << endl;  // HelloWorld
	cout << s5.c_str() << endl;  // Hello
}

void test_string2()
{
	yzz::string s("hello world");
	cout << s.size() << endl;  // 11
	cout << s.capacity() << endl;  // 11

	s.clear();
	cout << s.size() << endl;  // 0
	cout << s.capacity() << endl;  // 11

	s = "hello world";
	s.reserve(20);
	cout << s.capacity() << endl;  // 20

	s.resize(5);
	cout << s << endl;  // hello
	cout << s.size() << endl;  // 5
	cout << s.capacity() << endl;  // 20

	s.resize(10, 'x');
	cout << s << endl;  // helloxxxxx
	cout << s.size() << endl;  // 10
	cout << s.capacity() << endl;  // 20
}

void test_string3()
{
	yzz::string s("01234");
	for (size_t i = 0; i < s.size(); ++i)
	{
		s[i] += 5;
		cout << s[i];
	}
	cout << endl;
	// 56789

	for (yzz::string::iterator it = s.begin(); it != s.end(); ++it)
	{
		*it -= 5;
		cout << *it;
	}
	cout << endl;
	// 01234

	for (auto& ch : s)
	{
		ch += 5;
		cout << ch;
	}
	cout << endl;
	// 56789
}

void test_string4()
{
	yzz::string s;
	s.push_back('x');
	s.push_back('y');
	cout << s << endl;  // xy

	s.append("xy");
	cout << s << endl;  // xyxy

	s += 'x';
	s += 'y';
	s += "xy";
	cout << s << endl;  // xyxyxyxy

	s.insert(0, 2, 'M');
	cout << s << endl;  // MMxyxyxyxy
	s.insert(6, "NN");
	cout << s << endl;  // MMxyxyNNxyxy

	s.erase(0, 6);
	cout << s << endl;  // NNxyxy
	s.erase(2, 10);
	cout << s << endl;  // NN
}

void test_string5()
{
	yzz::string url("https://legacy.cplusplus.com/reference/string/string/");

	size_t begin1 = 0;
	size_t end1 = url.find("://", begin1);
	yzz::string protocol;
	if (end1 != yzz::string::npos)
	{
		protocol = url.substr(begin1, end1 - begin1);
		cout << protocol << endl;  // https
	}

	size_t begin2 = end1 + 3;
	size_t end2 = url.find('/', begin2);
	yzz::string domainName, uri;
	if (end2 != yzz::string::npos)
	{
		domainName = url.substr(begin2, end2 - begin2);
		uri = url.substr(end2 + 1);
		cout << domainName << endl;  // legacy.cplusplus.com
		cout << uri << endl;  // reference/string/string/
	}
}

void test_string6()
{
	yzz::string s1("hello");
	yzz::string s2("hello");
	yzz::string s3("helloxyz");
	cout << (s1 == s2) << endl;  // 1
	cout << (s1 < s3) << endl;  // 1
	cout << (s3 > s1) << endl;  // 1
}

int main()
{
	// test_string1();
	// test_string2();
	// test_string3();
	// test_string4();
	// test_string5();
	test_string6();
	return 0;
}

二、string 类的写时拷贝

Scott Meyers 在《More Effective C++》中举了个例子：在你还在上学的时候，你的父母要你不要看电视，而去复习功课，于是你把自己关在房间里，做出一副正在复习功课的样子，其实你在干着别的诸如给班上的某位女生写情书之类的事，而一旦你的父母进来在你房间要检查你是否在复习时，你才真正捡起课本看书。这就是 "拖延战术"，直到你非要做的时候才去做。

当然，这种事情在现实生活中时往往会出事，但其在编程世界中摇身一变，就成为了最有用的技术，写时拷贝（Copy-On-Write，简称 COW）技术，就是 "拖延战术" 的产物。

2.1 - 示例

我们可以通过下面的程序来了解 string 类的写时拷贝技术。

#include <string>
#include <stdio.h>
using namespace std;

int main()
{
    string s1("hello world");
    string s2(s1);
    string s3;
    s3 = s1;
    printf("s1's address:%p\n", s1.c_str());
    printf("s2's address:%p\n", s2.c_str());
    printf("s3's address:%p\n", s3.c_str());

    s1[0] = 'x';
    s2[0] = 'y';
    s3[0] = 'z';
    printf("After Copy-On-Write...\n");
    printf("s1's address:%p\n", s1.c_str());
    printf("s2's address:%p\n", s2.c_str());
    printf("s3's address:%p\n", s2.c_str());
    return 0;
}

这个程序的输出结果是（测试平台为 Linux）：

我们可以看到，当用 s1 构造 s2，以及把 s1 赋值给 s3 时，s1、s2 和 s3 存放数据的地址都是一样的，即共享内存。而共享同一块内存的对象发生内容改变时，触发了 string 类的写时拷贝，s1 和 s2 存放数据的地址发生了改变，s3 存放数据的地址不变。修改内容才触发写时拷贝，不修改内容则不触发，这就是 "拖延战术" 的真谛，非要做的时候才去做。