【C++模拟实现】string的模拟实现

【C++模拟实现】string的模拟实现
- - string模拟实现的标准代码
  - string模拟实现中的要点
  - - - string构造函数的实现
      - 赋值运算符重载
      - 迭代器的实现
      - 对流插入和流提取运算符的重载
      - find函数的实现
      - insert函数的实现

作者：爱写代码的刚子
时间：2023.7.22
前言：本篇博客主要介绍string的模拟实现，之后会更新一系列STL中一些重要容器的模拟实现。

string模拟实现的标准代码

#pragma once

#include<assert.h>

namespace string_imitate
{
	class string
	{
	public:
		typedef char* iterator;
		typedef const char* const_iterator;

		iterator begin()
		{
			return _str;
		}

		iterator end()
		{
			return _str + _size;
		}

		const_iterator begin() const
		{
			return _str;
		}

		const_iterator end() const
		{
			return _str + _size;
		}

		/*string(const char* str)
			:_size(strlen(str))
			,_capacity(_size)
			, _str(new char[_capacity + 1])
		{
			strcpy(_str, str);
		}*/

		/*string()
			:_size(0)
			,_capacity(0)
			,_str(new char[1])
		{
			_str[0] = '\0';
		}*/

		//string(const char* str = '\0')
		//string(const char* str = nullptr)
		//string(const char* str = "\0")
		string(const char* str = "")
		{
			_size = strlen(str);
			_capacity = _size;
			_str = new char[_capacity + 1];
			//strcpy(_str, str);
			memcpy(_str, str, _size+1);
		}

		string(const string& s)
		{
			_str = new char[s._capacity + 1];
			//strcpy(_str, s._str);
			memcpy(_str, s._str, s._size + 1);
			_size = s._size;
			_capacity = s._capacity;
		}

		// s1 = s3
		/*string& operator=(const string& s)
		{
			if (this != &s)
			{
				char* tmp = new char[s._capacity + 1];
				memcpy(tmp, s._str, s._size+1);
				delete[] _str;
				_str = tmp;

				_size = s._size;
				_capacity = s._capacity;
			}

			return *this;
		}*/

		void swap(string& s)
		{
			std::swap(_str, s._str);
			std::swap(_size, s._size);
			std::swap(_capacity, s._capacity);
		}

		//string& operator=(const string& s)
		//{
		//	if (this != &s)
		//	{
		//		string tmp(s);

		//		//this->swap(tmp);
		//		swap(tmp);
		//	}

		//	return *this;
		//}

		string& operator=(string tmp)
		{
			swap(tmp);

			return *this;
		}

		~string()
		{
			delete[] _str;
			_str = nullptr;
			_size = _capacity = 0;
		}

		const char* c_str() const
		{
			return _str;
		}

		size_t size() const
		{
			return _size;
		}

		char& operator[](size_t pos)
		{
			assert(pos < _size);

			return _str[pos];
		}

		const char& operator[](size_t pos) const 
		{
			assert(pos < _size);

			return _str[pos];
		}

		void reserve(size_t n)
		{
			if (n > _capacity)
			{
				cout << "reserve()->" << n << endl;

				char* tmp = new char[n + 1];
				//strcpy(tmp, _str);
				memcpy(tmp, _str, _size+1);

				delete[] _str;
				_str = tmp;
				_capacity = n;
			}
		}

		void resize(size_t n, char ch = '\0')
		{
			if (n < _size)
			{
				_size = n;
				_str[_size] = '\0';
			}
			else
			{
				reserve(n);

				for (size_t i = _size; i < n; i++)
				{
					_str[i] = ch;
				}

				_size = n;
				_str[_size] = '\0';
			}
		}

		void push_back(char ch)
		{
			if (_size == _capacity)
			{
				// 2倍扩容
				reserve(_capacity == 0 ? 4 : _capacity * 2);
			}

			_str[_size] = ch;

			++_size;
			_str[_size] = '\0';
		}

		void append(const char* str)
		{
			size_t len = strlen(str);
			if (_size + len > _capacity)
			{
				// 至少扩容到_size + len
				reserve(_size+len);
			}

			//strcpy(_str + _size, str);
			memcpy(_str + _size, str, len+1);

			_size += len;
		}

		string& operator+=(char ch)
		{
			push_back(ch);
			return *this;
		}

		string& operator+=(const char* str)
		{
			append(str);
			return *this;
		}

		void insert(size_t pos, size_t n, char ch)
		{
			assert(pos <= _size);

			if (_size +n > _capacity)
			{
				// 至少扩容到_size + len
				reserve(_size + n);
			}

			// 挪动数据
			/*int end = _size;
			while (end >= (int)pos)
			{
				_str[end + n] = _str[end];
				--end;
			}*/


			// 添加注释最好
			size_t end = _size;
			while (end >= pos && end != npos)
			{
				_str[end + n] = _str[end];
				--end;
			}

			for (size_t i = 0; i < n; i++)
			{
				_str[pos + i] = ch;
			}

			_size += n;
		}

		void insert(size_t pos, const char* str)
		{
			assert(pos <= _size);

			size_t len = strlen(str);
			if (_size + len > _capacity)
			{
				// 至少扩容到_size + len
				reserve(_size + len);
			}

			// 添加注释最好
			size_t end = _size;
			while (end >= pos && end != npos)
			{
				_str[end + len] = _str[end];
				--end;
			}

			for (size_t i = 0; i < len; i++)
			{
				_str[pos + i] = str[i];
			}

			_size += len;
		}

		void erase(size_t pos, size_t len = npos)
		{
			assert(pos <= _size);

			if (len == npos || pos + len >= _size)
			{
				//_str[pos] = '\0';
				_size = pos;

				_str[_size] = '\0';
			}
			else
			{
				size_t end = pos + len;
				while (end <= _size)
				{
					_str[pos++] = _str[end++];
				}
				_size -= len;
			}
		}

		size_t find(char ch, size_t pos = 0)
		{
			assert(pos < _size);

			for (size_t i = pos; i < _size; i++)
			{
				if (_str[i] == ch)
				{
					return i;
				}
			}

			return npos;
		}

		size_t find(const char* str , size_t pos = 0)
		{
			assert(pos < _size);

			const char* ptr = strstr(_str + pos, str);
			if (ptr)
			{
				return ptr - _str;
			}
			else
			{
				return npos;
			}
		}

		string substr(size_t pos = 0, size_t len = npos)
		{
			assert(pos < _size);

			size_t n = len;
			if (len == npos || pos + len > _size)
			{
				n = _size - pos;
			}

			string tmp;
			tmp.reserve(n);
			for (size_t i = pos; i < pos + n; i++)
			{
				tmp += _str[i];
			}

			return tmp;
		}

		void clear()
		{
			_str[0] = '\0';
			_size = 0;
		}

		//bool operator<(const string& s)
		//{
		//	size_t i1 = 0;
		//	size_t i2 = 0;
		//	while (i1 < _size && i2 < s._size)
		//	{
		//		if (_str[i1] < s._str[i2])
		//		{
		//			return true;
		//		}
		//		else if (_str[i1] > s._str[i2])
		//		{
		//			return false;
		//		}
		//		else
		//		{
		//			++i1;
		//			++i2;
		//		}
		//	}

		//	/*if (i1 == _size && i2 != s._size)
		//	{
		//		return true;
		//	}
		//	else
		//	{
		//		return false;
		//	}*/

		//	//return i1 == _size && i2 != s._size;
		//	return _size < s._size;
		//}

		bool operator<(const string& s) const
		{
			int ret = memcmp(_str, s._str, _size < s._size ? _size : s._size);
			
			// "hello" "hello"   false
			// "helloxx" "hello" false
			// "hello" "helloxx" true
			return ret == 0 ? _size < s._size : ret < 0;
		}

		bool operator==(const string& s) const
		{
			return _size == s._size 
				&& memcmp(_str, s._str, _size) == 0;
		}

		bool operator<=(const string& s) const
		{
			return *this < s || *this == s;
		}

		bool operator>(const string& s) const
		{
			return !(*this <= s);
		}

		bool operator>=(const string& s) const
		{
			return !(*this < s);
		}

		bool operator!=(const string& s) const
		{
			return !(*this == s);
		}

	private:
		size_t _size;
		size_t _capacity;
		char* _str;
	
	public:
		//const static size_t npos = -1; // 虽然可以这样用，但是不建议
		const static size_t npos;

		//const static double x;
	};

	const size_t string::npos = -1;
	//const double string::x = 1.1;


	ostream& operator<<(ostream& out, const string& s)
	{
		/*for (size_t i = 0; i < s.size(); i++)
		{
			out << s[i];
		}*/

		for (auto ch : s)
		{
			out << ch;
		}

		return out;
	}

	istream& operator>>(istream& in, string& s)
	{
		s.clear();

		char ch = in.get();
		// 处理前缓冲区前面的空格或者换行
		while (ch == ' ' || ch == '\n')
		{
			ch = in.get();
		}

		//in >> ch;
		char buff[128];
		int i = 0;

		while (ch != ' ' && ch != '\n')
		{
			buff[i++] = ch;
			if (i == 127)
			{
				buff[i] = '\0';
				s += buff;
				i = 0;
			}

			//in >> ch;
			ch = in.get();
		}

		if (i != 0)
		{
			buff[i] = '\0';
			s += buff;
		}

		return in;
	}
};

对于string的模拟实现，我们需要多敲代码，进一步熟悉string。作者也敲了好多次，但给出的是标准的代码不是作者敲的。

string模拟实现中的要点

string构造函数的实现

拷贝数据的选择strcpy还是memcpy？
一般来说，两者并没有太大的区别，但是strcpy在底层中拷贝到‘/0’就结束了，而memcpy则是内存的拷贝。但是当我们实现对运算符+=进行重载的时候会发现，如果遇到字符中含有‘/0’，如：“1234/01234”，如果使用strcpy并不会将字符串完全拷贝，所以我们选择用memcpy进行拷贝。
默认构造较好的实现（给缺省值）

string(const char* str = "")
                :_capacity(strlen(str))
                ,_size(strlen(str))
        {
            _str=new char[_size+1];
            memcpy(_str,str,_size+1);
        }

赋值运算符重载

operator的现代写法：
1.传统写法：

 string& operator=(const string &s)
        {
            _capacity=s._capacity;
            _size=s._size;
            _str=new char[_size+1];
            memcpy(_str,s._str,s._size+1);
            return *this;
        }

2.现代写法：

string& operator=(const string &s)
        {
            string tmp(s);
            return *this;
        }
void swap(string& s)
        {
            if(this!=&s)
            {
                string tmp(s);
                std::swap(_str,s._str);
                std::swap(_capacity,s._capacity);
                std::swap(_size,s._size);
            }
        }

这种写法在很多场景下都可以借鉴。

迭代器的实现

如何区分const迭代器和非const迭代器？
先用typedef定义两种类型的迭代器：

typedef char* iterator;
typedef const char* const_iterator;

普通迭代器：

iterator begin()
    {
        return _str;
    }
iterator end()
    {
        return _str+_size;
    }

const迭代器：

  const_iterator begin() const
    {
        return _str;
    }

    const_iterator end() const
    {
        return _str+_size;
    }

对流插入和流提取运算符的重载

前提：
流插入和流提取的函数实现必须在类外，不然在类里面定义的函数使用时会不符合习惯，所以我们将流插入和流提取函数作为string类的友元函数。
对<<的重载

ostream& operator<<(ostream& _out, const bit::string& s)
    {
        for(auto ch : s)
        {
            _out<<ch;
        }
        return _out;
    }

注意以上代码中变量_out的类型必须为ostream&，返回值必须为ostream&（必须加上引用），否则不能实现连续的流提取
对于参数s要加上const，否则传入的const参数将无法使用。

对>>的重载
错误写法：

istream& operator>>(istream& _cin, bit::string& s)
    {
        char ch;
        _cin>>ch;
        while(ch!=' '&&ch!='\n')
        {
            s+=ch;
            _cin>>ch;
        }
        return _cin;
    }

这种写法遇见空格缓冲字符时读取就会停止，并不能很好地实现流提取。

 istream& operator>>(istream& _cin, bit::string& s)
    {
        char ch=_cin.get();
        while(ch!=' '&&ch!='\n')
        {
            s+=ch;
            ch=_cin.get();
        }
        return _cin;
    }

cin里面提供了get()函数，遇见空格并不会停止读取。
在这里插入图片描述

find函数的实现

可以利用C语言库中的strstr实现。
还有一个易忘点：指针-指针的绝对值等于指针对应数据之间的偏移量。

 size_t find (const char* s, size_t pos = 0) const
        {
            assert(pos<_size);
            const char*ptr=std::strstr(_str+pos,s);
            if(ptr)
            {
                return ptr-_str;
            }
            else
            {
                return npos;
            }
        }

insert函数的实现

注意以下代码中end的变化，以及while语句中的条件：

string& insert(size_t pos, const char* str)
        {
            assert(pos<=_size);
            size_t len=strlen(str);
            if(_size+len>_capacity)
            {
                reserve(_size+len);
            }
            size_t end=_size;
            while(end>=pos&&end!=npos)
            {
                _str[end+len]=_str[end];
                --end;
            }
            for(int i=0;i<len;i++)
            {
                _str[pos+i]=str[i];
            }
            _size+=len;
            return *this;
        }

如果我们只考虑到while(end>=pos)那insert的实现就错了。因为pos的类型是size_t,所以pos只能为正数，当插入的位置为0时，end将会为-1，end始终大于0，所以陷入循环中。
我们考虑用npos常变量来解决此问题，于是有了循环条件：while(end>=pos&&end!=npos)。
如何定义npos变量？
可以在string类中设置一个public变量：const static size_t npos; 在类外进行初始化：const size_t string::npos = -1;也可以直接：const static size_t npos = -1;