数据结构之二叉搜索树_最优二叉查找树

作者：天景科技苑 | 2024-08-08 14:06:18

踩

最优二叉查找树

1.二叉搜索树

1.1 二叉搜索树概念

二叉搜索树又称二叉排序树，它或者是一棵空树，或者是具有以下性质的二叉树:
·若它的左子树不为空，则左子树上所有节点的值都小于根节点的值
·若它的右子树不为空，则右子树上所有节点的值都大于根节点的值
·它的左右子树也分别为二叉搜索树

通过观察可以发现：搜索二叉树走一遍中序遍历可以排成升序.

1.2 二叉搜索树操作

1. 二叉搜索树的查找

2. 二叉搜索树的插入

插入的具体过程如下：
a. 树为空，则直接插入

b. 树不空，按二叉搜索树性质查找插入位置，插入新节点

3. 二叉搜索树的删除

删除节点的原则是：必须保持搜索二叉树的特性.

1.3 二叉搜索树的实现


namespace K
{
	template<class K>
	struct BSTreeNode//定义节点的结构体
	{
		BSTreeNode<K>* _left;//左孩子指针
		BSTreeNode<K>* _right;//右孩子指针
		K _key;//存的值
 
		BSTreeNode(const K& key)//节点的构造函数
			:_left(nullptr)
			,_right(nullptr)
			,_key(key)
		{}
	};
 
	template<class K>
	class BSTree
	{
		typedef BSTreeNode<K> Node;
	public:
		BSTree()//构造函数
			:_root(nullptr)
		{}
		BSTree(const BSTree<K>& t)
		{
			_root = Copy(t._root);//调用递归copy去拷贝构造
		}
		BSTree<K>& operator=(BSTree<K> t)//利用现代写法进行赋值拷贝
		{
			swap(_root, t._root);
			return *this;
		}
		~BSTree()
		{
			Destory(_root);//调用Destory去析构
			_root = nullptr;//将root置空
		}
		bool Insert(const K& key)
		{
			if (_root == nullptr)//如果刚开始树为空树，那么直接给一个节点给root
			{
				_root = new Node(key);
				return true;
			}
			Node* cur = _root;//利用cur去循环迭代找到空位
			Node* parent = nullptr;//利用parent去记录cur的父亲节点,并且要注意这个刚开始给成nullptr
			while (cur)//利用循环去找适合key的空位,找到为空的位置就结束
			{
				if (key > cur->_key)//大于就去右子树
				{
					parent = cur;//进入语句先把父节点赋值
					cur = cur->_right;//然后再把cur迭代
				}
				else if (key < cur->_key)//小于就去左子树
				{
					parent = cur;
					cur = cur->_left;
				}
				else
				{
					return false;//如果找到与key值相等的节点那么就删除
				}
			}//出了while循环说明找到了合适的空位，此时可以开始插入新节点
 
			cur = new Node(key);//给cur申请一个节点
			if (key > parent->_key)//这里的if语句判断一定要注意，是用值判断而不是指针
				parent->_right = cur;
			else if (key < parent->_key)
				parent->_left = cur;
 
			return true;//插入成功返回
		}
		Node* Find(const K& key)
		{
			Node* cur = _root;
			while (cur)//利用循环去找
			{
				if (key > cur->_key)
				{
					cur = cur->_right;
				}
				else if (key < cur->_key)
				{
					cur = cur->_left;
				}
				else
				{
					return cur;
				}
			}
			return false;//出了循环还未找到就返回false
		}
		bool Erase(const K& key)
		{
			Node* cur = _root;
			Node* parent = nullptr;
			while (cur)
			{
				if (key > cur->_key)
				{
					parent = cur;
					cur = cur->_right;
				}
				else if (key < cur->_key)
				{
					parent = cur;
					cur = cur->_left;
				}//上面两段if语句都是去定位key的位置
				else//进入else语句则找到了需要删除的节点
				{
					if (cur->_left == nullptr)//如果需要删除的节点的左节点为空，即没有左孩子
					{
						if (cur == _root)//注：这种情况非常容易被忽略
						{
							_root = cur->_right;//如果cur就为根节点的话，也就是右单支的情况，那么把root赋值为cur的右节点
						}
						else//cur不为根节点的情况
						{
							if (cur == parent->_left)//需要判断cur为父亲的左孩子还是右孩子
							{
								parent->_left = cur->_right;//需要将cur的右孩子托孤给父亲
							}
							else if (cur == parent->_right)
							{
								parent->_right = cur->_right;
							}
						}
						delete cur;//最后删除cur节点
					}
					else if (cur->_right == nullptr)//如果需要删除的节点的右节点为空，即没有右孩子
					{
						if (cur == _root)//需要特别注意
						{
							_root = cur->_left;//如果cur就为根节点的话，也就是左单支的情况，那么把root赋值为cur的左节点
						}
						else//cur不为根节点的情况
						{
							if (cur == parent->_left)//需要判定的是cur节点为父节点的左孩子还是右孩子
							{
								parent->_left = cur->_left;//将cur节点的左孩子托孤给父亲
							}
							else if (cur == parent->_right)
							{
								parent->_right = cur->_left;
							}
						}
						delete cur;//最后删除cur节点
					}
					else
					{
						//这种情况最复杂：也就是需要删除的节点左孩子和右孩子都存在的情况
						//此时我们采用替代法删除
						Node* minRight = cur->_right;//去被删除的节点的右子树找最小节点，左子树找最大节点道理也是一样的
						Node* minParent = cur;//注意：这里一定不能给空节点,必须给cur节点
						//至于为什么不能给空节点,万一下面的while循环进不去，后面的if语句就会造成非法访问
						while (minRight->_left)//注意:括号里面是左节点不为空时为循环结束的条件,因为我们的目的是去找到最左节点，最左节点已经走到空了，就不要再进入循环了
						{
							minParent = minRight;
							minRight = minRight->_left;
						}
						cur->_key = minRight->_key;//将cur的值替换成右子树的最小节点值
						//走到这里就很容易知道最小节点的左子树一定为空了，但是右子树呢？
						//右子树如果不为空我们就需要进行托孤，这种托孤方式也就演变成了上面两种简单的方式
						//但是这一部很容易被遗忘：大家都会觉得minRight一定会是minParent的左孩子
						//这是错误的理解，这里一定要去判断minRight为minParent右孩子的情况，这里画图理解
						if (minRight == minParent->_left)//如果为父节点的左孩子，那么就把我的右孩子交给父节点的左孩子
								minParent->_left = minRight->_right;
						else if (minRight == minParent->_right)//如果为父节点的右孩子，那么就把我的右孩子交给父节点的右孩子
								minParent->_right = minRight->_right;
						delete minRight;
					}
					return true;//删除完节点过后返回true
				}
			}
			return false;//出了while循环还没有找到需要删除的节点返回false
		}
		bool InsertR(const K& key)//递归插入key
		{
			return _InsertR(_root, key);
		}
		Node* FindR(const K& key)//递归查找key
		{
			return _FindR(_root, key);
		}
		bool EraseR(const K& key)//递归删除key
		{
			return _EraseR(_root, key);
		}
		void Inorder()//中序遍历
		{
			_Inorder(_root);//把中序遍历的递归封装一层，传root指针，保护root不被改变
			cout << endl;
		}
	private:
		void Destory(Node* root)
		{
			if (root == nullptr)
				return;
			Destory(root->_left);
			Destory(root->_right);
			delete root;//利用后序遍历递归删除节点
		}
		Node* Copy(Node* root)
		{
			if (root == nullptr)
				return nullptr;
			Node* newroot = new Node(root->_key);//建立新节点
			newroot->_left = Copy(root->_left);//新节点的左右节点再去转换成子问题
			newroot->_right = Copy(root->_right);
			return newroot;//最后返回新节点
		}
		bool _InsertR(Node*& root, const K& key)//注意这里要用引用传参
		{
			if (root == nullptr)//root为空时new一个节点
			{
				root = new Node(key);
				return true;
			}
 
			if (key > root->_key)
			{
				return _InsertR(root->_right, key);
			}
			else if (key < root->_key)
			{
				return _InsertR(root->_left, key);
			}
			else
			{
				return false;
			}
		}
		Node* _FindR(Node* root, const K& key)
		{
			if (root == nullptr)
			{
				return nullptr;
			}
			if (key > root->_key)
			{
				return _FindR(root->_right, key);
			}
			else if (key < root->_key)
			{
				return _FindR(root->left, key);
			}
			else
			{
				return root;
			}
		}
		bool _EraseR(Node*& root, const K& key)//注意这里一定要使用引用传参
		{
			if (root == nullptr)//如果root为空则返回false
			{
				return false;
			}
			if (key > root->_key)
			{
				return _EraseR(root->_right, key);
			}
			else if (key < root->_key)
			{
				return _EraseR(root->_left, key);
			}
			else
			{
				//找到了该节点
				if (root->_left == nullptr)//左孩子为空，右孩子存在的情况
				{
					Node* del = root;//用del记录一下这个带删除的节点
					root = root->_right;//然后将root的右节点赋值给root
					//上面这一步非常巧，左边的root实际是上一层父节点的左孩子或右孩子的引用
					//这里就体现出了引用传参的作用
					delete del;//链接关系处理完过后，最后删除这个root节点
				}
				else if (root->_right == nullptr)
				{
					Node* del = root;
					root = root->_left;
					delete del;
				}
				else
				{
					Node* minRight = root->_right;
					while (minRight->_left)//递归去找root的右子树中的最左节点，也就是最小节点
					{
						minRight = minRight->_left;
					}
					K min = minRight->_key;//用变量min存一下这个节点
					//转换成在root的右子树删除min
					_EraseR(root->_right, min);//这里转换成子问题去删除min
					root->_key = min;//再将root的值替换成min
				}
				return true;
			}
		}
		void _Inorder(Node* root)
		{
			if (root == nullptr)
				return;
 
			_Inorder(root->_left);
			cout << root->_key << " ";
			_Inorder(root->_right);
		}
		Node* _root;
	};
}

二叉树实现的代码测试：


void TestBSTree1()
{
	K::BSTree<int> t;
	int a[] = { 5, 3, 4, 1, 7, 8, 2, 6, 0, 9 };
	for (auto e : a)
	{
		t.InsertR(e);
	}
	t.Inorder();
 
	t.EraseR(5);
	t.Inorder();
 
	t.EraseR(8);
	t.Inorder();
 
	t.EraseR(7);
	t.Inorder();
 
	t.EraseR(5);
	t.Inorder();
 
	// 测试时，把树中的节点删除干净，确保没问题 
	for (auto e : a)
	{
		t.EraseR(e);
		t.Inorder();
	}
	t.Inorder();
}
void TestBSTree2()
{
	K::BSTree<int> t;
	int a[] = { 5, 3, 4, 1, 7, 8, 2, 6, 0, 9 };
	for (auto e : a)
	{
		t.InsertR(e);
	}
	t.Inorder();
 
	K::BSTree<int> copy = t;
	copy.Inorder();
}

1.4 二叉搜索树的应用

1. K模型：K模型即只有key作为关键码，结构中只需要存储Key即可，关键码即为需要搜索到的值。核心是想要体现在不在的问题。
比如：给一个单词word，判断该单词是否拼写正确，具体方式如下：
·以单词集合中的每个单词作为key，构建一棵二叉搜索树
·在二叉搜索树中检索该单词是否存在，存在则拼写正确，不存在则拼写错误。
2. KV模型：每一个关键码key，都有与之对应的值Value，即<Key, Value>的键值对。该种方式在现实生活中非常常见。核心是想要通过一个值去找另一个值。

比如：英汉词典就是英文与中文的对应关系，通过英文可以快速找到与其对应的中文，英
文单词与其对应的中文<word, chinese>就构成一种键值对；再比如统计单词次数，统计成功后，给定单词就可快速找到其出现的次数，单词与其出现次数就是<word, count>就构成一种键值对。
比如：实现一个简单的英汉词典dict，可以通过英文找到与其对应的中文，具体实现方式如下：

·<单词，中文含义>为键值对构造二叉搜索树，注意：二叉搜索树需要比较，键值对比较时只比较Key

·查询英文单词时，只需给出英文单词，就可快速找到与其对应的key

KV模型的代码：


namespace KV
{
	template<class K, class V>
	struct BSTreeNode
	{
		BSTreeNode<K, V>* _left;
		BSTreeNode<K, V>* _right;
 
		K _key;
		V _value;
 
		BSTreeNode(const K& key, const V& value)
			: _left(nullptr)
			, _right(nullptr)
			, _key(key)
			, _value(value)
		{}
	};
	template<class K, class V>
	class BSTree
	{
		typedef BSTreeNode<K, V> Node;
	private:
	// 如果树中已经存在key，返回false
	bool _InsertR(Node*& root, const K& key, const V& value)
	{
		if (root == NULL)  // 插入
		{
			root = new Node(key, value);
			return true;
		}
 
		if (root->_key < key)
		{
			return _InsertR(root->_right, key, value);
		}
		else if (root->_key > key)
		{
			return _InsertR(root->_left, key, value);
		}
		else
		{
			return false;
		}
	}
 
	Node* _FindR(Node* root, const K& key)
	{
		if (root == nullptr)
		{
			return nullptr;
		}
 
		if (root->_key < key)
		{
			return _FindR(root->_right, key);
		}
		else if (root->_key > key)
		{
			return _FindR(root->_left, key);
		}
		else
		{
			return root;
		}
	}
 
	// 如果树中不存在key，返回false
	// 存在，删除后，返回true
	bool _EraseR(Node*& root, const K& key)
	{
		if (root == NULL)
		{
			return false;
		}
 
		if (root->_key < key)
		{
			return _EraseR(root->_right, key);
		}
		else if (root->_key > key)
		{
			return _EraseR(root->_left, key);
		}
		else
		{
			// 找到了，root就是要删除的节点
			if (root->_left == nullptr)
			{
				Node* del = root;
				root = root->_right;
				delete del;
			}
			else if (root->_right == nullptr)
			{
				Node* del = root;
				root = root->_left;
				delete del;
			}
			else
			{
				Node* minRight = root->_right;
				while (minRight->_left)
				{
					minRight = minRight->_left;
				}
				K kmin = minRight->_key;
				V vMin = minRight->_value;
 
				// 转换成在root的右子树删除min
				_EraseR(root->_right, kmin);
				root->_key = kmin;
				root->_value = vMin;
			}
 
			return true;
		}
	}
 
	void _Destory(Node* root)
	{
		if (root == NULL)
		{
			return;
		}
 
		_Destory(root->_left);
		_Destory(root->_right);
		delete root;
	}
 
	Node* _Copy(Node* root)
	{
		if (root == nullptr)
		{
			return nullptr;
		}
 
		Node* copyNode = new Node(root->_key, root->_value);
		copyNode->_left = _Copy(root->_left);
		copyNode->_right = _Copy(root->_right);
 
		return copyNode;
	}
 
	public:
		BSTree()
		:_root(nullptr)
		{}
 
		BSTree(const BSTree<K, V>& t)
		{
			_root = _Copy(t._root);
		}
 
		// t1 = t2
		BSTree<K, V>& operator=(BSTree<K, V> t)
		{
			swap(_root, t._root);
			return *this;
		}
 
		~BSTree()
		{
			_Destory(_root);
			_root = nullptr;
		}
		bool InsertR(const K& key, const V& value)
		{
			return _InsertR(_root, key, value);
		}
 
		Node* FindR(const K& key)
		{
			return _FindR(_root, key);
		}
 
		bool EraseR(const K& key)
		{
			return _EraseR(_root, key);
		}
 
		void _InOrder(Node* root)
		{
			if (root == nullptr)
				return;
 
			_InOrder(root->_left);
			cout << root->_key << ":" << root->_value << endl;
			_InOrder(root->_right);
		}	
 
		void InOrder()
		{
			_InOrder(_root);
			cout << endl;
		}
 
	private:
		Node* _root;
	};
}

KV模型代码测试：


void TestBSTree3()
{
	KV::BSTree<string, string> dict;
	dict.InsertR("string", "字符串");
	dict.InsertR("tree", "树");
	dict.InsertR("left", "左边、剩余");
	dict.InsertR("right", "右边");
	dict.InsertR("sort", "排序");
	// ...插入词库中所有单词
	string str;
	while (cin >> str)
	{
		KV::BSTreeNode<string, string>* ret = dict.FindR(str);
		if (ret == nullptr)
		{
			cout << "单词拼写错误，词库中没有这个单词:" << str << endl;
		}
		else
		{
			cout << str << "中文翻译:" << ret->_value << endl;
		}
	}
}
void TestBSTree4()
{
	// 统计水果出现的次数
	string arr[] = { "苹果", "西瓜", "苹果", "西瓜", "苹果", "苹果", "西瓜", "苹果", "香蕉", "苹果", "香蕉" };
	KV::BSTree<string, int> countTree;
	for (const auto& str : arr)
	{
		// 先查找水果在不在搜索树中
		// 1、不在，说明水果第一次出现，则插入<水果, 1>
		//KV::BSTreeNode<string, int>* ret = countTree.FindR(str);
		auto ret = countTree.FindR(str);
		if (ret == NULL)
		{
			countTree.InsertR(str, 1);
		}
		else
		{
			ret->_value++;
		}
	}
 
	countTree.InOrder();
}

1.5 二叉搜索树的性能分析

插入和删除操作都必须先查找，查找效率代表了二叉搜索树中各个操作的性能。
对有n个结点的二叉搜索树，若每个元素查找的概率相等，则二叉搜索树平均查找长度是结点在二叉搜索树的深度的函数，即结点越深，则比较次数越多。
但对于同一个关键码集合，如果各关键码插入的次序不同，可能得到不同结构的二叉搜索树：

最优情况下，二叉搜索树为完全二叉树，其平均比较次数为：log2N
最差情况下，二叉搜索树退化为单支树，其平均比较次数为： N/2

问题：如果退化成单支树，二叉搜索树的性能就失去了。那能否进行改进，不论按照什么次序插入关键码，都可以是二叉搜索树的性能最佳？

所以实际中搜索二叉树，极端情况下没有办法保证效率，所以后面会对搜索二叉树进一步扩展延申:AVLTree,红黑树.他们对搜索二叉树，左右高度提出了要求，非常接近完全二叉树，他们的效率可以达到O(logN)

本文内容由网友自发贡献，转载请注明出处：【wpsshop博客】