赞
踩
代码说明:
1)输入:给定数据集;
2)属性:tree,根据孤立森林建立的二叉树;
3)用法示例:
# >>> np.random.seed(10)
# >>> temp_data = np.random.rand(10, 10)
# >>> temp_model = IsolationForest(temp_data)
# >>> temp_model.display()
4)树的结构说明:
4.1)若为叶子节点,则节点.value包含:当前数据在传入数据中的索引,所在层以及标志变量;
4.2)若为父母节点,则节点.value包含:划分到该节点的数据样本个数,层数,当前划分所选择属性的序号,所选择的阈值以及标志变量。
""" @author: Inki @email: inki.yinji@qq.com @version: Created in 2020 1230, last modified in 2020 1231. @note: The paper name --> isolation forest. """ import numpy as np class Tree: """ The tree structure. """ def __init__(self, value, left, right): self.value = value self.left = left self.right = right class IsolationForest: """ The isolation forest algorithm. @param: data: The given data. @attribute: tree: The isolation forest. @example: # >>> np.random.seed(10) # >>> temp_data = np.random.rand(10, 10) # >>> temp_model = IsolationForest(temp_data) # >>> temp_model.display() """ def __init__(self, data): self.data = data self.size_data = 0 self.size_attribute = 0 self.tree = None self.__initialize_isolation_forest() def __initialize_isolation_forest(self): """ The initialize of isolation forest. """ self.size_data = len(self.data) self.size_attribute = len(self.data[0]) self.tree = self.__get_isolation_forest(np.arange(self.size_data), -1, "") def __get_isolation_forest(self, idx, height, flag): """ Get the isolation forest. """ if len(idx) == 1: return Tree((idx[0], height + 1, flag), None, None) elif len(idx) == 0: return else: temp_random_attribute_idx = np.random.choice(self.size_attribute) temp_attribute = self.data[idx][:, temp_random_attribute_idx] temp_threshold = np.random.choice(temp_attribute) temp_left_idx, temp_right_idx = self.__filter(idx, temp_attribute, temp_threshold) return Tree((len(idx), height + 1, temp_random_attribute_idx, temp_threshold, flag), self.__get_isolation_forest(temp_left_idx, height + 1, "left"), self.__get_isolation_forest(temp_right_idx, height + 1, "right")) def __filter(self, idx, attribute, threshold): """ Filter the data. """ ret_left, ret_right = [], [] for i in range(len(idx)): if attribute[i] < threshold: ret_left.append(idx[i]) else: ret_right.append(idx[i]) return ret_left, ret_right def display(self): """ Display tree """ temp_tree = [self.tree] while len(temp_tree) > 0 and temp_tree is not None: temp_node = temp_tree.pop(0) temp_value = temp_node.value if len(temp_value) == 5: if temp_value[-1] == "": print("Len: %d; layer: %d; attribute idx: %d; threshold: %.2f; flag: root" % temp_value[:-1]) else: print("Len: %d; layer: %d; attribute idx: %d; threshold: %.2f; flag: %s" % temp_value) else: print("Data idx: %d; layer: %d; flag: %s" % temp_value) if temp_node.left is not None: temp_tree.append(temp_node.left) if temp_node.right is not None: temp_tree.append(temp_node.right) if __name__ == '__main__': np.random.seed(10) temp_data = np.random.rand(10, 10) temp_model = IsolationForest(temp_data) temp_model.display()
""" Author: Inki Email: inki.yinji@qq.com Create: 2021 0224 Last modify: 2021 0302 """ import numpy as np class Tree: """ The tree structure. """ def __init__(self, value, left, right): self.value = value self.left = left self.right = right class IsolationTree: """ The designed isolation tree. :param mat: The given data matrix. feature_idx: The index of selected attributes. attribute_choose_mechanism: The feature choose mechanism. Including "random", "cycle", and the default setting is "cycle". attribute_value_choose_mechanism: The feature value choose mechanism. Including "random", "average", and the default setting is "random". """ def __init__(self, mat, feature_idx=None, attribute_choose_mechanism="random", attribute_value_choose_mechanism="random"): self.__mat = mat self.__feature_idx = feature_idx self.__attribute_choose_mechanism = attribute_choose_mechanism self.__attribute_value_choose_mechanism = attribute_value_choose_mechanism self.__m = 0 self.__n = 0 self.__idx_count = 0 self.tree_ = None self.__init_isolation_tree() def __init_isolation_tree(self): """ The initialize of IsolationTree """ self.__m = len(self.__mat) self.__n = len(self.__mat[0]) if self.__feature_idx is None: self.__feature_idx = np.arange(self.__n) self.tree_ = self.__get_tree(np.arange(self.__m), -1, "Root") def __get_tree(self, idx, height, flag): """ Getting tree. """ if len(idx) == 0: return elif len(idx) == 1: return Tree((idx[0], height + 1, flag), None, None) else: attribute_idx = self.__get_attribute_idx() attribute_arr = self.__mat[idx, attribute_idx] attribute_value = self.__get_attribute_value(attribute_arr) attribute_arr = list(set(attribute_arr)) if len(attribute_arr) == 1: left_idx, right_idx = [idx[0]], idx[1:] else: left_idx, right_idx = self.__filter(idx, attribute_arr, attribute_value) return Tree((len(idx), height + 1, attribute_idx, attribute_value, flag), self.__get_tree(left_idx, height + 1, "Left"), self.__get_tree(right_idx, height + 1, "Right")) def __get_attribute_idx(self): """ Getting attribute index. """ if self.__attribute_choose_mechanism == "random": return np.random.choice(self.__feature_idx) elif self.__attribute_choose_mechanism == "cycle": if self.__idx_count == len(self.__feature_idx): self.__idx_count = 0 ret_feature_idx = self.__feature_idx[self.__idx_count] self.__idx_count += 1 return ret_feature_idx def __get_attribute_value(self, attribute_arr): """ Taking a value from the specified attribute. """ if self.__attribute_value_choose_mechanism == "random": return np.random.choice(attribute_arr) elif self.__attribute_value_choose_mechanism == "average": return np.average(attribute_arr) def __filter(self, idx, attribute_arr, attribute_value): """ Filtering data. """ ret_left, ret_right = [], [] for idx_i, att_i in zip(idx, attribute_arr): if att_i < attribute_value: ret_left.append(idx_i) else: ret_right.append(idx_i) return ret_left, ret_right def show_tree(self): """ Showing tree. """ if self.tree_ is None: return tree = [self.tree_] while len(tree) > 0: node = tree.pop(0) value = node.value if len(value) == 5: # The non-leaf node. print("Len: %d; layer: %d; attribute idx: %d; attribute value: %.2f; flag: %s" % value) else: print("Data idx: %d; layer: %d; flag: %s" % value) if node.left is not None: tree.append(node.left) if node.right is not None: tree.append(node.right)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。