当前位置:   article > 正文

Python实现孤立森林 (Isolation Forest)_孤立森林python代码

孤立森林python代码

引入

  代码说明:
  1)输入:给定数据集;
  2)属性:tree,根据孤立森林建立的二叉树
  3)用法示例:

        # >>> np.random.seed(10)
        # >>> temp_data = np.random.rand(10, 10)
        # >>> temp_model = IsolationForest(temp_data)
        # >>> temp_model.display()
  • 1
  • 2
  • 3
  • 4

  4)树的结构说明:
  4.1)若为叶子节点,则节点.value包含:当前数据在传入数据中的索引,所在层以及标志变量;
  4.2)若为父母节点,则节点.value包含:划分到该节点的数据样本个数,层数,当前划分所选择属性的序号,所选择的阈值以及标志变量。

代码

"""
@author: Inki
@email: inki.yinji@qq.com
@version: Created in 2020 1230, last modified in 2020 1231.
@note:
    The paper name --> isolation forest.
"""

import numpy as np


class Tree:
    """
    The tree structure.
    """

    def __init__(self, value, left, right):
        self.value = value
        self.left = left
        self.right = right


class IsolationForest:
    """
    The isolation forest algorithm.
    @param:
        data: The given data.
    @attribute:
        tree: The isolation forest.
    @example:
        # >>> np.random.seed(10)
        # >>> temp_data = np.random.rand(10, 10)
        # >>> temp_model = IsolationForest(temp_data)
        # >>> temp_model.display()
    """

    def __init__(self, data):
        self.data = data
        self.size_data = 0
        self.size_attribute = 0
        self.tree = None
        self.__initialize_isolation_forest()

    def __initialize_isolation_forest(self):
        """
        The initialize of isolation forest.
        """
        self.size_data = len(self.data)
        self.size_attribute = len(self.data[0])
        self.tree = self.__get_isolation_forest(np.arange(self.size_data), -1, "")

    def __get_isolation_forest(self, idx, height, flag):
        """
        Get the isolation forest.
        """
        if len(idx) == 1:
            return Tree((idx[0], height + 1, flag), None, None)
        elif len(idx) == 0:
            return
        else:
            temp_random_attribute_idx = np.random.choice(self.size_attribute)
            temp_attribute = self.data[idx][:, temp_random_attribute_idx]
            temp_threshold = np.random.choice(temp_attribute)
            temp_left_idx, temp_right_idx = self.__filter(idx, temp_attribute, temp_threshold)
            return Tree((len(idx), height + 1, temp_random_attribute_idx, temp_threshold, flag),
                        self.__get_isolation_forest(temp_left_idx, height + 1, "left"),
                        self.__get_isolation_forest(temp_right_idx, height + 1, "right"))

    def __filter(self, idx, attribute, threshold):
        """
        Filter the data.
        """
        ret_left, ret_right = [], []
        for i in range(len(idx)):
            if attribute[i] < threshold:
                ret_left.append(idx[i])
            else:
                ret_right.append(idx[i])

        return ret_left, ret_right

    def display(self):
        """
        Display tree
        """
        temp_tree = [self.tree]
        while len(temp_tree) > 0 and temp_tree is not None:
            temp_node = temp_tree.pop(0)
            temp_value = temp_node.value
            if len(temp_value) == 5:
                if temp_value[-1] == "":
                    print("Len: %d; layer: %d; attribute idx: %d; threshold: %.2f; flag: root" % temp_value[:-1])
                else:
                    print("Len: %d; layer: %d; attribute idx: %d; threshold: %.2f; flag: %s" % temp_value)
            else:
                print("Data idx: %d; layer: %d; flag: %s" % temp_value)
            if temp_node.left is not None:
                temp_tree.append(temp_node.left)
            if temp_node.right is not None:
                temp_tree.append(temp_node.right)


if __name__ == '__main__':
    np.random.seed(10)
    temp_data = np.random.rand(10, 10)
    temp_model = IsolationForest(temp_data)
    temp_model.display()

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108

改进代码

"""
Author: Inki
Email: inki.yinji@qq.com
Create: 2021 0224
Last modify: 2021 0302
"""

import numpy as np


class Tree:
    """
    The tree structure.
    """

    def __init__(self, value, left, right):
        self.value = value
        self.left = left
        self.right = right


class IsolationTree:
    """
    The designed isolation tree.
    :param
        mat:                                The given data matrix.
        feature_idx:                        The index of selected attributes.
        attribute_choose_mechanism:         The feature choose mechanism.
            Including "random", "cycle", and the default setting is "cycle".
        attribute_value_choose_mechanism:   The feature value choose mechanism.
            Including "random", "average", and the default setting is "random".
    """

    def __init__(self, mat, feature_idx=None, attribute_choose_mechanism="random",
                 attribute_value_choose_mechanism="random"):
        self.__mat = mat
        self.__feature_idx = feature_idx
        self.__attribute_choose_mechanism = attribute_choose_mechanism
        self.__attribute_value_choose_mechanism = attribute_value_choose_mechanism
        self.__m = 0
        self.__n = 0
        self.__idx_count = 0
        self.tree_ = None
        self.__init_isolation_tree()

    def __init_isolation_tree(self):
        """
        The initialize of IsolationTree
        """
        self.__m = len(self.__mat)
        self.__n = len(self.__mat[0])
        if self.__feature_idx is None:
            self.__feature_idx = np.arange(self.__n)
        self.tree_ = self.__get_tree(np.arange(self.__m), -1, "Root")

    def __get_tree(self, idx, height, flag):
        """
        Getting tree.
        """
        if len(idx) == 0:
            return
        elif len(idx) == 1:
            return Tree((idx[0], height + 1, flag), None, None)
        else:
            attribute_idx = self.__get_attribute_idx()
            attribute_arr = self.__mat[idx, attribute_idx]
            attribute_value = self.__get_attribute_value(attribute_arr)
            attribute_arr = list(set(attribute_arr))

            if len(attribute_arr) == 1:
                left_idx, right_idx = [idx[0]], idx[1:]
            else:
                left_idx, right_idx = self.__filter(idx, attribute_arr, attribute_value)

            return Tree((len(idx), height + 1, attribute_idx, attribute_value, flag),
                        self.__get_tree(left_idx, height + 1, "Left"),
                        self.__get_tree(right_idx, height + 1, "Right"))

    def __get_attribute_idx(self):
        """
        Getting attribute index.
        """
        if self.__attribute_choose_mechanism == "random":
            return np.random.choice(self.__feature_idx)
        elif self.__attribute_choose_mechanism == "cycle":
            if self.__idx_count == len(self.__feature_idx):
                self.__idx_count = 0

            ret_feature_idx = self.__feature_idx[self.__idx_count]
            self.__idx_count += 1

            return ret_feature_idx

    def __get_attribute_value(self, attribute_arr):
        """
        Taking a value from the specified attribute.
        """
        if self.__attribute_value_choose_mechanism == "random":
            return np.random.choice(attribute_arr)
        elif self.__attribute_value_choose_mechanism == "average":
            return np.average(attribute_arr)

    def __filter(self, idx, attribute_arr, attribute_value):
        """
        Filtering data.
        """
        ret_left, ret_right = [], []
        for idx_i, att_i in zip(idx, attribute_arr):
            if att_i < attribute_value:
                ret_left.append(idx_i)
            else:
                ret_right.append(idx_i)

        return ret_left, ret_right

    def show_tree(self):
        """
        Showing tree.
        """
        if self.tree_ is None:
            return
        tree = [self.tree_]

        while len(tree) > 0:
            node = tree.pop(0)
            value = node.value
            if len(value) == 5:
                # The non-leaf node.
                print("Len: %d; layer: %d; attribute idx: %d; attribute value: %.2f; flag: %s" % value)
            else:
                print("Data idx: %d; layer: %d; flag: %s" % value)

            if node.left is not None:
                tree.append(node.left)

            if node.right is not None:
                tree.append(node.right)

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/从前慢现在也慢/article/detail/648510
推荐阅读
相关标签
  

闽ICP备14008679号