C++初阶-string类的模拟实现

string类的模拟实现

一、经典的string类问题
- 1.1 构造函数
- - 1.1.1 全缺省的构造函数
- 2.1 拷贝构造
- 3.1 赋值
- 4.1 析构函数
- 5.1 c_str
- 6.1 operator[]
- 7.1 size
- 8.1 capacity
- 9.1 比较（ASCII）大小
- 10.1 resize
- 11.1 reserve
- 12.1 push_back(尾插字符)
- 13.1 append(尾插字符串)
- 14.1 +=字符/字符串
- 15.1 insert插入字符/字符串
- 16.1 erase
- 17.1 swap
- 18.1 find
- 19.1 clear
- 20.1 流插入<<
- 21.1 流提取>>
二、整体代码实现
- 2.1 string.h
- 2.2 test.cpp

一、经典的string类问题

string底层是一个字符数组，模拟实现string类，最主要是实现string类的构造、拷贝构造、赋值运算符重载以及析构函数。

1.1 构造函数

1.1.1 全缺省的构造函数

若写成两个构造函数，一个设置成无参，一个设置成带参，若调用如上的带参构造函数就会报错，将str传给_str，属于权限放大，为了解决这个问题，可以将_str改为const char*类型，但是无法修改_str所指向的内容，调用operator[]函数就会报错。
因为后续要考虑扩容等问题，所以最好是new一块空间，而无参的构造函数为了保持析构都用delete[]，所以使用new[]。
不可以将缺省值设置为nullptr，strlen(str)对于str指针解引用，遇到’\0’终止，解引用NULL会报错。
将缺省值设置成一个空字符串，结尾默认为’\0’。

		/*string()
			:_str(new char[1])
			, _size(0)
			, _capacity(0)
		{
			_str[0] = '\0';
		}*/

		//string(const char* str = nullptr)  //不可以
		//string(const char* str = '\0')  //类型不匹配
		//string(const char* str = "\0")   //可以，常量字符串
		//string(const char* str)
		string(const char* str = "")    //可以，无论写和不写，默认最后以\0结束
			:_size(strlen(str))
			//:_str(str)
			//, _size(strlen(str))
			//, _capacity(strlen(str))
		{
			_capacity = _size == 0 ? 3 : _size;
			_str = new char[_capacity + 1];
			strcpy(_str, str);
		}

	private:
		char* _str;
		size_t _size;
		size_t _capacity;

2.1 拷贝构造

创建一块同样大小的空间，并将原来的数据拷贝下来，这样两个指针就指向自己的空间，一个被修改也不会影响另一个

	//s3(s2)
		string(const string& s)
			:_size(s._size)
			, _capacity(s._capacity)
		{
			_str = new char[s._capacity + 1];
			strcpy(_str, s._str);
		}

3.1 赋值

赋值运算符也是默认成员函数，如果不写会进行浅拷贝/值拷贝
三种情况
在这里插入图片描述
1.若为第一种两者空间大小相同，则进行值拷贝
2.若为第二种s1的空间远大于s2的空间，进行值拷贝会浪费时间，所以系统会按照第三种做法执行
3.若为第三种，s1的空间大小，需要new开辟一块空间，将旧空间销毁，将s2拷贝到新开辟的空间
编译器不会这样处理，直接将旧空间释放，再去开辟新空间，并将值拷贝过来

	//s1=s3
		string& operator=(const string& s)
		{
			if (this != &s)
			{
				/*delete[] _str;
				_str = new char[s._capacity + 1];
				strcpy(_str, s._str);
				_size = s._size;
				_capacity = s._capacity;*/

				char* tmp = new char[s._capacity + 1];
				strcpy(tmp, s._str);
				delete[] _str;
				_str = tmp;

				_size = s._size;
				_capacity = s._capacity;
			}

			return *this;
		}

4.1 析构函数

	~string()
	{
		delete[] _str;
		_str = nullptr;
		_size = _capacity = 0;
	}

释放_str，并置为空指针，将_size和_capacity置为0

5.1 c_str

	const char* c_str()
	{
		return _str;
	}

返回const char*类型的指针相当于返回字符串

6.1 operator[]

		char& operator[](size_t pos)
		{
			assert(pos < _size);
			return _str[pos];
		}

		const char& operator[](size_t pos) const
		{
			assert(pos < _size);
			return _str[pos];
		}

由于可能存在string与const string类型所以设置成两个函数构成函数重载

7.1 size

	size_t size() const
	{
		return _size;
	}

返回字符串的大小

8.1 capacity

	size_t capacity() const
	{
		return _capacity;
	}

返回字符串的容量

9.1 比较（ASCII）大小

	//不修改成员变量数据的函数，最好都加上const
		bool operator>(const string& s) const
		{
			return strcmp(_str, s._str) > 0;
		}

		bool operator==(const string& s)const
		{
			return strcmp(_str, s._str) == 0;
		}

		bool operator>=(const string& s)const
		{
			return *this > s || *this == s;
			//return *this > s || s == *this;
		}

		bool operator<(const string& s)const
		{
			return !(*this >= s);
		}

		bool operator<=(const string& s)const
		{
			return !(*this > s);
		}

		bool operator!=(const string& s)const
		{
			return (*this == s);
		}

通过C语言函数strcmp，比较字符串从头开始字符的ASCII值，再通过复用来实现剩下的
如果不小心在复用时将const修饰的传给非const成员就会报错，所以括号外面加上const，修饰this指针

10.1 resize

分为三种情况
n<size 删除数据
size<n<capacity 剩余空间初始化
n>capacity 扩容+初始化

	void resize(size_t n, char ch = '\0')
		{
			if (n <= _size)
			{
				//删除数据--保留前n个
				_size = n;
				_str[_size] = '\0';
			}
			else
			{
				if (n > _capacity)
				{
					reserve(n);
				}

				size_t i = _size;
				while (i < n)
				{
					_str[i] = ch;
					++i;
				}

				_size = n;
				_str[_size] = '\0';
			}
		}

11.1 reserve

为了防止new失败，所以使用临时变量tmp指向new出来的空间，若new成功，释放旧空间，并将 _str指向新空间

	void reserve(size_t n)
		{
			if (n > _capacity)
			{
				char* tmp = new char[n + 1];
				strcpy(tmp, _str);
				delete[] _str;
				_str = tmp;

				_capacity = n;
			}
		}

12.1 push_back(尾插字符)

	void push_back(char ch)
		{
			/*if (_size + 1 > _capacity)
			{
				reserve(_capacity * 2);
			}
			_str[_size] = ch;
			++_size;

			_str[_size] = '\0';*/

			insert(_size, ch);
		}

通过reserve进行类似扩容的操作，再将ch赋值给当前最后一个字符

13.1 append(尾插字符串)

	void append(const char* str)
		{
			//size_t len = strlen(str);
			//if (_size + len > _capacity)
			//{
			//	reserve(_size + len);
			//}

			//strcpy(_str + _size, str);
			strcat(_str, str);
			//_size += len;

			insert(_size, str);
		}

通过reserve类似扩容的操作，扩大了字符串长度的空间，并且在原字符串’\0’的位置开始拷贝str字符串

14.1 +=字符/字符串

	string& operator+=(char ch)
		{
			push_back(ch);
			return *this;
		}

		string& operator+=(const char* str)
		{
			append(str);
			return *this;
		}

使用上面实现好的push_back和append

15.1 insert插入字符/字符串

在pos位置插入字符

string& insert(size_t pos, char ch)
		{
			assert(pos <= _size);
			if (_size + 1 > _capacity)
			{
				reserve(2 * _capacity);
			}

			size_t end = _size+1;
			while (end > pos)
			{
				_str[end] = _str[end-1];
				--end;
			}

			_str[pos] = ch;
			++_size;

			return *this;
		}

由于pos与end都是size_t类型，没有负数
所以当while循环条件设置为end>=pos并且pos=0时，–end，end变为负数，计算的是其补码，所以一直成立，无法结束循环
需要将end指向_size的下一个位置，才能是循环结束

在pos位置插入字符串

	string& insert(size_t pos, const char* str)
		{
			assert(pos <= _size);

			size_t len = strlen(str);

			if (_size + len > _capacity)
			{
				reserve(_size + len);
			}

			//挪动数据
			size_t end = _size + len;
			while (end > pos + len - 1)
			{
				_str[end] = _str[end-len];
				--end;
			}

			/*size_t end = _size;
			for (size_t i = 0; i < _size + 1; ++i)
			{
				_str[end + len] = _str[end];
				--end;
			}*/

			//拷贝插入
			strncpy(_str + pos, str, len);
			_size += len;

			return *this;
		}

临界条件为保证最后一次下标end减去len，在下标为0的位置上，所以取边界为pos+len
end>pos+len-1，最后一次取值即为pos+len
使用strncpy函数，不包含’\0’，将str拷贝给_str+pos下标位置开始的len个字符

16.1 erase

pos位置开始删除len个数据
分两种情况：
1.当pos+len<总长度时，使用strcpy函数拷贝，从而覆盖删除要被删除的字符
2.当pos+len大于总长度或者len等于npos时，剩余长度全部删除

	string& erase(size_t pos, size_t len = npos)
		{
			assert(pos < _size);
			if (len == npos || pos + len >= _size)
			{
				_str[pos] = '\0';
				_size = pos;
			}
			else
			{
				strcpy(_str + pos, _str + pos + len);
				_size -= len;
			}

			return *this;
		}

17.1 swap

	//swap(s1, s2);
	//s1.swap(s2);
	void swap(string& s)
		{
			std::swap(_str, s._str);
			std::swap(_capacity, s._capacity);
			std::swap(_size, s._size);
		}

交换两个字符串

18.1 find

查找字符，返回字符的下标

	size_t find(char ch,size_t pos=0)
		{
			assert(pos < _size);
			for (size_t i = pos; i < _size; ++i)
			{
				if (_str[i] == ch)
				{
					return i;
				}
			}
			return npos;
		}

查找字符字串，返回字串开始的下标

	size_t find(const char* str, size_t pos = 0)
		{
			assert(pos < _size);

			char* p = strstr(_str + pos, str);
			if (p == nullptr)
			{
				return npos;
			}
			else
			{
				return p - _str;
			}
		}

19.1 clear

	void clear()
		{
			_str[0] = '\0';
			_size = 0;
		}

清除有效字符

20.1 流插入<<

流插入重载必须实现为友元函数么？
不对，使用友元函数是为了在类外面调用类的私有的成员变量，若不需要调用则不用友元函数

	ostream& operator<<(ostream& out, const string& s)
	{
		for (auto ch : s)
		{
			out << ch;
		}
		return out;
	}

实现流插入不可以调用c_str(),因为c_str()返回的是一个字符串，遇到’\0’就会结束，但若打印结果有好几个’\0’，则遇见第一个就会结束，不符合预期

21.1 流提取>>

输入多个值，C++规定空格/换行是值与值之间的区分

istream& operator>>(istream& in,  string& s)//>>
    {//错误写法
        char ch;
        in >> ch;
        while (ch != ' ' && ch != '\n')
        {
            s += ch;
            in >> ch;
        }
        return in;
    }

上述代码在循环中无法找到空格/换行，导致循环无法停止
输入的数据在缓冲区中，使用循环在缓冲区中提取数据，但是空格/换行不在缓冲区中，因为认为它是多个值之间的间隔
使用get就不会认为空格/换行是多个值之间的间隔，若遇见空格/换行就会存储缓冲区中等待提取

istream& operator>>(istream& in, string& s)
	{
		s.clear();
		char ch = in.get();
		char buff[128];
		size_t i = 0;
		while (ch != ' ' && ch != '\n')
		{
			//s += ch;
			buff[i++] = ch;
			if (i == 127)
			{
				buff[127] = '\0';
				s += buff;
				i = 0;
			}
			ch = in.get();
		}

		if (i != 0)
		{
			buff[i] = '\0';
			s += buff;
		}

		return in;
	}

当需要输入的string对象中有值存在时，需要先使用clear清空，再输入新的数据
为了避免频繁扩容，使用一个128的字符数组接收，若输入的数据比128小，跳出循环将数组中的数据传给string类s，若输入的数据比128大，则将字符数组整体传给string类s，再正常扩容

二、整体代码实现

2.1 string.h

#pragma once
#pragma once
#include <assert.h>

namespace zl
{
	class string
	{
	public:
		typedef char* iterator;
		typedef const char* const_iterator;

		iterator begin()
		{
			return _str;
		}

		iterator end()
		{
			return _str + _size;
		}

		const_iterator begin() const
		{
			return _str;
		}

		const_iterator end() const
		{
			return _str + _size;
		}

		/*string()
			:_str(new char[1])
			, _size(0)
			, _capacity(0)
		{
			_str[0] = '\0';
		}*/

		//string(const char* str = nullptr)  //不可以
		//string(const char* str = '\0')  //类型不匹配
		//string(const char* str = "\0")   //可以，常量字符串
		//string(const char* str)
		string(const char* str = "")    //可以，无论写和不写，默认最后以\0结束
			:_size(strlen(str))
			//:_str(str)
			//, _size(strlen(str))
			//, _capacity(strlen(str))
		{
			_capacity = _size == 0 ? 3 : _size;
			_str = new char[_capacity + 1];
			strcpy(_str, str);
		}

		//s3(s2)
		string(const string& s)
			:_size(s._size)
			, _capacity(s._capacity)
		{
			_str = new char[s._capacity + 1];
			strcpy(_str, s._str);
		}

		//s1=s3
		string& operator=(const string& s)
		{
			if (this != &s)
			{
				/*delete[] _str;
				_str = new char[s._capacity + 1];
				strcpy(_str, s._str);
				_size = s._size;
				_capacity = s._capacity;*/

				char* tmp = new char[s._capacity + 1];
				strcpy(tmp, s._str);
				delete[] _str;
				_str = tmp;

				_size = s._size;
				_capacity = s._capacity;
			}

			return *this;
		}

		~string()
		{
			delete[] _str;
			_str = nullptr;
			_size = _capacity = 0;
		}

		const char* c_str()
		{
			return _str;
		}

		char& operator[](size_t pos)
		{
			assert(pos < _size);
			return _str[pos];
		}

		const char& operator[](size_t pos) const
		{
			assert(pos < _size);
			return _str[pos];
		}

		size_t size() const
		{
			return _size;
		}

		size_t capacity() const
		{
			return _capacity;
		}

		//不修改成员变量数据的函数，最好都加上const
		bool operator>(const string& s) const
		{
			return strcmp(_str, s._str) > 0;
		}

		bool operator==(const string& s)const
		{
			return strcmp(_str, s._str) == 0;
		}

		bool operator>=(const string& s)const
		{
			return *this > s || *this == s;
			//return *this > s || s == *this;
		}

		bool operator<(const string& s)const
		{
			return !(*this >= s);
		}

		bool operator<=(const string& s)const
		{
			return !(*this > s);
		}

		bool operator!=(const string& s)const
		{
			return (*this == s);
		}

		void resize(size_t n, char ch = '\0')
		{
			if (n <= _size)
			{
				//删除数据--保留前n个
				_size = n;
				_str[_size] = '\0';
			}
			else
			{
				if (n > _capacity)
				{
					reserve(n);
				}

				size_t i = _size;
				while (i < n)
				{
					_str[i] = ch;
					++i;
				}

				_size = n;
				_str[_size] = '\0';
			}
		}

		void reserve(size_t n)
		{
			if (n > _capacity)
			{
				char* tmp = new char[n + 1];
				strcpy(tmp, _str);
				delete[] _str;
				_str = tmp;

				_capacity = n;
			}
		}

		void push_back(char ch)
		{
			/*if (_size + 1 > _capacity)
			{
				reserve(_capacity * 2);
			}
			_str[_size] = ch;
			++_size;

			_str[_size] = '\0';*/

			insert(_size, ch);
		}

		void append(const char* str)
		{
			//size_t len = strlen(str);
			//if (_size + len > _capacity)
			//{
			//	reserve(_size + len);
			//}

			//strcpy(_str + _size, str);
			strcat(_str, str);
			//_size += len;

			insert(_size, str);
		}

		string& operator+=(char ch)
		{
			push_back(ch);
			return *this;
		}

		string& operator+=(const char* str)
		{
			append(str);
			return *this;
		}

		string& insert(size_t pos, char ch)
		{
			assert(pos <= _size);
			if (_size + 1 > _capacity)
			{
				reserve(2 * _capacity);
			}

			size_t end = _size+1;
			while (end > pos)
			{
				_str[end] = _str[end-1];
				--end;
			}

			_str[pos] = ch;
			++_size;

			return *this;
		}

		string& insert(size_t pos, const char* str)
		{
			assert(pos <= _size);

			size_t len = strlen(str);

			if (_size + len > _capacity)
			{
				reserve(_size + len);
			}

			//挪动数据
			size_t end = _size + len;
			while (end > pos + len - 1)
			{
				_str[end] = _str[end-len];
				--end;
			}

			/*size_t end = _size;
			for (size_t i = 0; i < _size + 1; ++i)
			{
				_str[end + len] = _str[end];
				--end;
			}*/

			//拷贝插入
			strncpy(_str + pos, str, len);
			_size += len;

			return *this;
		}

		string& erase(size_t pos, size_t len = npos)
		{
			assert(pos < _size);
			if (len == npos || pos + len >= _size)
			{
				_str[pos] = '\0';
				_size = pos;
			}
			else
			{
				strcpy(_str + pos, _str + pos + len);
				_size -= len;
			}

			return *this;
		}

		//swap(s1, s2);
		//s1.swap(s2);

		void swap(string& s)
		{
			std::swap(_str, s._str);
			std::swap(_capacity, s._capacity);
			std::swap(_size, s._size);
		}

		size_t find(char ch,size_t pos=0)
		{
			assert(pos < _size);
			for (size_t i = pos; i < _size; ++i)
			{
				if (_str[i] == ch)
				{
					return i;
				}
			}
			return npos;
		}

		size_t find(const char* str, size_t pos = 0)
		{
			assert(pos < _size);

			char* p = strstr(_str + pos, str);
			if (p == nullptr)
			{
				return npos;
			}
			else
			{
				return p - _str;
			}
		}

		void clear()
		{
			_str[0] = '\0';
			_size = 0;
		}

	private:
		char* _str;
		size_t _size;
		size_t _capacity;

		static const size_t npos;
		//可以
		//static const size_t npos = -1;

		//不可以
		//static const double dpos=1.1;
	};

	const size_t string::npos = -1;

	ostream& operator<<(ostream& out, const string& s)
	{
		for (auto ch : s)
		{
			out << ch;
		}
		return out;
	}

	istream& operator>>(istream& in, string& s)
	{
		s.clear();
		char ch = in.get();
		char buff[128];
		size_t i = 0;
		while (ch != ' ' && ch != '\n')
		{
			//s += ch;
			buff[i++] = ch;
			if (i == 127)
			{
				buff[127] = '\0';
				s += buff;
				i = 0;
			}
			ch = in.get();
		}

		if (i != 0)
		{
			buff[i] = '\0';
			s += buff;
		}

		return in;
	}

	void test_string1()
	{
		string s1;
		string s2("hello world");

		cout << s1.c_str() << endl;
		cout << s2.c_str() << endl;

		s2[0]++;

		cout << s1.c_str() << endl;
		cout << s2.c_str() << endl;

	}

	void test_string2()
	{
		string s1;
		string s2("hello world");

		string s3(s2);
		cout << s1.c_str() << endl;
		cout << s2.c_str() << endl;
		cout << s3.c_str() << endl;

		s2[0]++;
		cout << s2.c_str() << endl;
		cout << s3.c_str() << endl;

		s1 = s3;
		cout << s1.c_str() << endl;
		cout << s3.c_str() << endl;
	}

	void Print(const string& s)
	{
		for (size_t i = 0; i < s.size(); ++i)
		{
			cout << s[i] << " ";
		}
		cout << endl;

		string::const_iterator it = s.begin();
		while (it != s.end())
		{
			//*it = 'x';
			++it;
		}
		cout << endl;

		for (auto ch : s)
		{
			cout << ch << " ";
		}
	}

	void test_string3()
	{
		string s1("hello world");
		for (size_t i = 0; i < s1.size(); ++i)
		{
			s1[i]++;
		}
		cout << endl;

		for (size_t i = 0; i < s1.size(); ++i)
		{
			cout << s1[i] << " ";
		}
		cout << endl;

		Print(s1);

		string::iterator it = s1.begin();
		while (it != s1.end())
		{
			(*it)--;
			++it;
		}
		cout << endl;

		it = s1.begin();
		while (it != s1.end())
		{
			cout << *it << " ";
			++it;
		}
		cout << endl;

		for (auto ch : s1)
		{
			cout << ch << " ";
		}
		cout << endl;
	}

	void test_string4()
	{
		string s1("hello world");
		string s2("hello world");
		string s3("xx");

		cout << (s1 < s2) << endl;
		cout << (s1 == s2) << endl;
		cout << (s1 >= s2) << endl;
	}

	void test_string5()
	{
		string s1("hello world");
		s1.push_back(' ');
		s1.append("xxxxxxxxxxxxxxxxxxx");

		cout << s1.c_str() << endl;

		string s2;
		s2 += 'a';
		s2 += 'b';
		s2 += 'c';
		cout << s2.c_str() << endl;

		s1.insert(5, 'x');
		cout << s1.c_str() << endl;

		s1.insert(0, 'x');
		cout << s1.c_str() << endl;
	}

	void test_string6()
	{
		string s1("hello world1111111111111111111111111111");
		cout << s1.capacity() << endl;
		s1.reserve(10);
		cout << s1.capacity() << endl;
	}

	void test_string7()
	{
		string s1;
		s1.resize(20, 'x');
		cout << s1.c_str() << endl;
		s1.resize(30, 'y');
		cout << s1.c_str() << endl;

		s1.resize(10);
		cout << s1.c_str() << endl;
	}

	void test_string8()
	{
		string s1("11111111111");
		s1.insert(0, 'x');
		cout << s1.c_str() << endl;

		s1.insert(3, 'x');
		cout << s1.c_str() << endl;

		s1.insert(3, "yyy");
		cout << s1.c_str() << endl;

		s1.insert(0, "yyy");
		cout << s1.c_str() << endl;
	}

	void test_string9()
	{
		string s1("0123456789");
		cout << s1.c_str() << endl;

		s1.erase(4, 3);
		cout << s1.c_str() << endl;

		s1.erase(4, 30);
		cout << s1.c_str() << endl;

		s1.erase(2);
		cout << s1.c_str() << endl;
	}

	//流插入重载必须实现成友元函数？不对
	void test_string10()
	{
		string s1("0123456789");
		s1 += '\0';
		s1 += "xxxxxxxx";

		cout << s1 << endl;
		cout << s1.c_str() << endl;

		string s2;
		cin >> s2;
		cout << s2 << endl;

		cin >> s1;
		cout << s1 << endl;
	}

}

2.2 test.cpp

#define _CRT_SECURE_NO_WARNINGS 1

#define _CRT_SECURE_NO_WARNINGS 1
#include <iostream>
using namespace std;

#include "string.h"


int main()
{
	try
	{
		//zl::test_string1();
		//zl::test_string2();
		//zl::test_string3();
		//zl::test_string4();
		//zl::test_string5();
		//zl::test_string6();
		//zl::test_string7();
		//zl::test_string8();
		//zl::test_string9();
		zl::test_string10();
	}
	catch (const exception& e)
	{
		cout << e.what() << endl;
	}

	//zl::string s1;
	//std::string s2;
	//cout << sizeof(s1) << endl;//24    //12
	//cout << sizeof(s2) << endl;//40    //28

	//zl::string s3("11111");
	//std::string s4("111111");
	//cout << sizeof(s3) << endl;//24      //12
	//cout << sizeof(s4) << endl;//40       //28

	return 0;
}