当前位置:   article > 正文

bert-modeling代码学习_modelingbert

modelingbert
# coding=utf-8

# Copyright 2018 The Google AI Language Team Authors.

#

# Licensed under the Apache License, Version 2.0 (the "License");

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

#

#     http://www.apache.org/licenses/LICENSE-2.0

#

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

"""The main BERT model and related functions."""



from __future__ import absolute_import

from __future__ import division

from __future__ import print_function



import collections

import copy

import json

import math

import re

import numpy as np

import six

import tensorflow as tf





class BertConfig(object):

  """Configuration for `BertModel`."""



  def __init__(self,

               vocab_size,

               hidden_size=768,

               num_hidden_layers=12,

               num_attention_heads=12,

               intermediate_size=3072,

               hidden_act="gelu",

               hidden_dropout_prob=0.1,

               attention_probs_dropout_prob=0.1,

               max_position_embeddings=512,

               type_vocab_size=16,

               initializer_range=0.02):

    """Constructs BertConfig.



    Args:

      vocab_size: Vocabulary size of `inputs_ids` in `BertModel`.

      hidden_size: Size of the encoder layers and the pooler layer.

      num_hidden_layers: Number of hidden layers in the Transformer encoder.

      num_attention_heads: Number of attention heads for each attention layer in

        the Transformer encoder.

      intermediate_size: The size of the "intermediate" (i.e., feed-forward)

        layer in the Transformer encoder.

      hidden_act: The non-linear activation function (function or string) in the

        encoder and pooler.

      hidden_dropout_prob: The dropout probability for all fully connected

        layers in the embeddings, encoder, and pooler.

      attention_probs_dropout_prob: The dropout ratio for the attention

        probabilities.

      max_position_embeddings: The maximum sequence length that this model might

        ever be used with. Typically set this to something large just in case

        (e.g., 512 or 1024 or 2048).

      type_vocab_size: The vocabulary size of the `token_type_ids` passed into

        `BertModel`.

      initializer_range: The stdev of the truncated_normal_initializer for

        initializing all weight matrices.

    """

    self.vocab_size = vocab_size

    self.hidden_size = hidden_size

    self.num_hidden_layers = num_hidden_layers

    self.num_attention_heads = num_attention_heads

    self.hidden_act = hidden_act

    self.intermediate_size = intermediate_size

    self.hidden_dropout_prob = hidden_dropout_prob

    self.attention_probs_dropout_prob = attention_probs_dropout_prob

    self.max_position_embeddings = max_position_embeddings

    self.type_vocab_size = type_vocab_size

    self.initializer_range = initializer_range



  @classmethod

  def from_dict(cls, json_object):

    """Constructs a `BertConfig` from a Python dictionary of parameters."""

    config = BertConfig(vocab_size=None)

    for (key, value) in six.iteritems(json_object):

      config.__dict__[key] = value

    return config



  @classmethod

  def from_json_file(cls, json_file):

    """Constructs a `BertConfig` from a json file of parameters."""

    with tf.gfile.GFile(json_file, "r") as reader:

      text = reader.read()

    return cls.from_dict(json.loads(text))



  def to_dict(self):

    """Serializes this instance to a Python dictionary."""

    output = copy.deepcopy(self.__dict__)

    return output



  def to_json_string(self):

    """Serializes this instance to a JSON string."""

    return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"





class BertModel(object):

  """BERT model ("Bidirectional Encoder Representations from Transformers").



  Example usage:



  ```python

  # Already been converted into WordPiece token ids

  input_ids = tf.constant([[31, 51, 99], [15, 5, 0]])

  input_mask = tf.constant([[1, 1, 1], [1, 1, 0]])

  token_type_ids = tf.constant([[0, 0, 1], [0, 2, 0]])



  config = modeling.BertConfig(vocab_size=32000, hidden_size=512,

    num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)



  model = modeling.BertModel(config=config, is_training=True,

    input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids)



  label_embeddings = tf.get_variable(...)

  pooled_output = model.get_pooled_output()

  logits = tf.matmul(pooled_output, label_embeddings)

  ...

  ```

  """



  def __init__(self,

               config,

               is_training,

               input_ids,

               input_mask=None,

               token_type_ids=None,

               use_one_hot_embeddings=False,

               scope=None):

    """Constructor for BertModel.



    Args:

      config: `BertConfig` instance.

      is_training: bool. true for training model, false for eval model. Controls

        whether dropout will be applied.

      input_ids: int32 Tensor of shape [batch_size, seq_length].

      input_mask: (optional) int32 Tensor of shape [batch_size, seq_length].

      token_type_ids: (optional) int32 Tensor of shape [batch_size, seq_length].

      use_one_hot_embeddings: (optional) bool. Whether to use one-hot word

        embeddings or tf.embedding_lookup() for the word embeddings.

      scope: (optional) variable scope. Defaults to "bert".



    Raises:

      ValueError: The config is invalid or one of the input tensor shapes

        is invalid.

    """

    config = copy.deepcopy(config)

    if not is_training:

      config.hidden_dropout_prob = 0.0

      config.attention_probs_dropout_prob = 0.0



    input_shape = get_shape_list(input_ids, expected_rank=2)

    batch_size = input_shape[0]

    seq_length = input_shape[1]



    if input_mask is None:

      input_mask = tf.ones(shape=[batch_size, seq_length], dtype=tf.int32)



    if token_type_ids is None:

      token_type_ids = tf.zeros(shape=[batch_size, seq_length], dtype=tf.int32)



    with tf.variable_scope(scope, default_name="bert"):

      with tf.variable_scope("embeddings"):

        # Perform embedding lookup on the word ids.

        (self.embedding_output, self.embedding_table) = embedding_lookup(

            input_ids=input_ids,

            vocab_size=config.vocab_size,

            embedding_size=config.hidden_size,

            initializer_range=config.initializer_range,

            word_embedding_name="word_embeddings",

            use_one_hot_embeddings=use_one_hot_embeddings)



        # Add positional embeddings and token type embeddings, then layer

        # normalize and perform dropout.

        self.embedding_output = embedding_postprocessor(

            input_tensor=self.embedding_output,

            use_token_type=True,

            token_type_ids=token_type_ids,

            token_type_vocab_size=config.type_vocab_size,

            token_type_embedding_name="token_type_embeddings",

            use_position_embeddings=True,

            position_embedding_name="position_embeddings",

            initializer_range=config.initializer_range,

            max_position_embeddings=config.max_position_embeddings,

            dropout_prob=config.hidden_dropout_prob)



      with tf.variable_scope("encoder"):

        
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家自动化/article/detail/348646
推荐阅读
相关标签
  

闽ICP备14008679号