TensorFlow + JavaScript。现在,最流行,最先进的AI框架支持地球上使用最广泛的编程语言。因此,让我们在Web浏览器中通过深度学习使文本和NLP(自然语言处理)聊天机器人神奇地发生,使用TensorFlow.js通过WebGL加速GPU!
TriviaQA JSON文件由一个Data数组组成,该数组具有各个Q&A元素,这些元素看起来类似于以下示例文件:
- {
- "Data": [
- {
- "Answer": {
- "Aliases": [
- "Sunset Blvd",
- "West Sunset Boulevard",
- "Sunset Boulevard",
- "Sunset Bulevard",
- "Sunset Blvd."
- ],
- "MatchedWikiEntityName": "Sunset Boulevard",
- "NormalizedAliases": [
- "west sunset boulevard",
- "sunset blvd",
- "sunset boulevard",
- "sunset bulevard"
- ],
- "NormalizedMatchedWikiEntityName": "sunset boulevard",
- "NormalizedValue": "sunset boulevard",
- "Type": "WikipediaEntity",
- "Value": "Sunset Boulevard"
- },
- "EntityPages": [
- {
- "DocSource": "TagMe",
- "Filename": "Andrew_Lloyd_Webber.txt",
- "LinkProbability": "0.02934",
- "Rho": "0.22520",
- "Title": "Andrew Lloyd Webber"
- }
- ],
- "Question": "Which Lloyd Webber musical premiered in the US on 10th December 1993?",
- "QuestionId": "tc_33",
- "QuestionSource": "http://www.triviacountry.com/",
- "SearchResults": [
- {
- "Description": "The official website for Andrew Lloyd Webber, ... from the Andrew Lloyd Webber/Jim Steinman musical Whistle ... American premiere on 9th December 1993 at the ...",
- "DisplayUrl": "www.andrewlloydwebber.com",
- "Filename": "35/35_995.txt",
- "Rank": 0,
- "Title": "Andrew Lloyd Webber | The official website for Andrew ...",
- "Url": "http://www.andrewlloydwebber.com/"
- }
- ]
- }
- ],
- "Domain": "Web",
- "VerifiedEval": false,
- "Version": 1.0
- }

- (async () => {
- // Load TriviaQA data
- let triviaData = await fetch( "web/verified-wikipedia-dev.json" ).then( r => r.json() );
- let data = triviaData.Data;
- // Process all QA to map to answers
- let questions = data.map( qa => qa.Question );
- })();
对于这些聊天问题以及一般的英语句子、单词的位置和顺序可能会影响其含义。因此,当将句子变成向量时,我们不能简单地使用不保留单词位置信息的“单词袋”。因此,在准备训练数据时,我们将使用一种称为word embedding的方法,并创建一个表示单词及其位置的单词索引列表。
- let bagOfWords = {};
- let allWords = [];
- let wordReference = {};
- questions.forEach( q => {
- let words = q.replace(/[^a-z ]/gi, "").toLowerCase().split( " " ).filter( x => !!x );
- words.forEach( w => {
- if( !bagOfWords[ w ] ) {
- bagOfWords[ w ] = 0;
- }
- bagOfWords[ w ]++; // Counting occurrence just for word frequency fun
- });
- });
- allWords = Object.keys( bagOfWords );
- allWords.forEach( ( w, i ) => {
- wordReference[ w ] = i + 1;
- });

- // Create a tokenized vector for each question
- const maxSentenceLength = 30;
- let vectors = [];
- questions.forEach( q => {
- let qVec = [];
- // Use a regex to only get spaces and letters and remove any blank elements
- let words = q.replace(/[^a-z ]/gi, "").toLowerCase().split( " " ).filter( x => !!x );
- for( let i = 0; i < maxSentenceLength; i++ ) {
- if( words[ i ] ) {
- qVec.push( wordReference[ words[ i ] ] );
- }
- else {
- // Add padding to keep the vectors the same length
- qVec.push( 0 );
- }
- }
- vectors.push( qVec );
- });
- let outputs = questions.map( ( q, index ) => {
- let output = [];
- for( let i = 0; i < questions.length; i++ ) {
- output.push( i === index ? 1 : 0 );
- }
- return output;
- });

- // Define our RNN model with several hidden layers
- const model = tf.sequential();
- // Add 1 to inputDim for the "padding" character
- model.add(tf.layers.embedding( { inputDim: allWords.length + 1, outputDim: 128, inputLength: maxSentenceLength } ) );
- // model.add(tf.layers.simpleRNN( { units: 32 } ) );
- model.add(tf.layers.bidirectional( { layer: tf.layers.simpleRNN( { units: 32 } ), mergeMode: "concat" } ) );
- model.add(tf.layers.dense( { units: 50 } ) );
- model.add(tf.layers.dense( { units: 25 } ) );
- model.add(tf.layers.dense( {
- units: questions.length,
- activation: "softmax"
- } ) );
- model.compile({
- optimizer: tf.train.adam(),
- loss: "categoricalCrossentropy",
- metrics: [ "accuracy" ]
- });

- const xs = tf.stack( vectors.map( x => tf.tensor1d( x ) ) );
- const ys = tf.stack( outputs.map( x => tf.tensor1d( x ) ) );
- await model.fit( xs, ys, {
- epochs: 20,
- shuffle: true,
- callbacks: {
- onEpochEnd: ( epoch, logs ) => {
- setText( `Training... Epoch #${epoch} (${logs.acc})` );
- console.log( "Epoch #", epoch, logs );
- }
- }
- } );
- setText( "Trivia Know-It-All Bot is Ready!" );
- document.getElementById( "question" ).addEventListener( "keyup", function( event ) {
- // Number 13 is the "Enter" key on the keyboard
- if( event.keyCode === 13 ) {
- // Cancel the default action, if needed
- event.preventDefault();
- // Trigger the button element with a click
- document.getElementById( "submit" ).click();
- }
- });
- document.getElementById( "submit" ).addEventListener( "click", async function( event ) {
- let text = document.getElementById( "question" ).value;
- document.getElementById( "question" ).value = "";
- // Our prediction code will go here
- });

- <html>
- <head>
- <title>Trivia Know-It-All: Chatbots in the Browser with TensorFlow.js</title>
- <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@2.0.0/dist/tf.min.js"></script>
- </head>
- <body>
- <h1 id="status">Trivia Know-It-All Bot</h1>
- <label>Ask a trivia question:</label>
- <input id="question" type="text" />
- <button id="submit">Submit</button>
- <p id="bot-question"></p>
- <p id="bot-answer"></p>
- <script>
- function setText( text ) {
- document.getElementById( "status" ).innerText = text;
- }
- (async () => {
- // Load TriviaQA data
- let triviaData = await fetch( "web/verified-wikipedia-dev.json" ).then( r => r.json() );
- let data = triviaData.Data;
- // Process all QA to map to answers
- let questions = data.map( qa => qa.Question );
- let bagOfWords = {};
- let allWords = [];
- let wordReference = {};
- questions.forEach( q => {
- let words = q.replace(/[^a-z ]/gi, "").toLowerCase().split( " " ).filter( x => !!x );
- words.forEach( w => {
- if( !bagOfWords[ w ] ) {
- bagOfWords[ w ] = 0;
- }
- bagOfWords[ w ]++; // Counting occurrence just for word frequency fun
- });
- });
- allWords = Object.keys( bagOfWords );
- allWords.forEach( ( w, i ) => {
- wordReference[ w ] = i + 1;
- });
- // Create a tokenized vector for each question
- const maxSentenceLength = 30;
- let vectors = [];
- questions.forEach( q => {
- let qVec = [];
- // Use a regex to only get spaces and letters and remove any blank elements
- let words = q.replace(/[^a-z ]/gi, "").toLowerCase().split( " " ).filter( x => !!x );
- for( let i = 0; i < maxSentenceLength; i++ ) {
- if( words[ i ] ) {
- qVec.push( wordReference[ words[ i ] ] );
- }
- else {
- // Add padding to keep the vectors the same length
- qVec.push( 0 );
- }
- }
- vectors.push( qVec );
- });
- let outputs = questions.map( ( q, index ) => {
- let output = [];
- for( let i = 0; i < questions.length; i++ ) {
- output.push( i === index ? 1 : 0 );
- }
- return output;
- });
- // Define our RNN model with several hidden layers
- const model = tf.sequential();
- // Add 1 to inputDim for the "padding" character
- model.add(tf.layers.embedding( { inputDim: allWords.length + 1, outputDim: 128, inputLength: maxSentenceLength, maskZero: true } ) );
- model.add(tf.layers.simpleRNN( { units: 32 } ) );
- // model.add(tf.layers.bidirectional( { layer: tf.layers.simpleRNN( { units: 32 } ), mergeMode: "concat" } ) );
- model.add(tf.layers.dense( { units: 50 } ) );
- model.add(tf.layers.dense( { units: 25 } ) );
- model.add(tf.layers.dense( {
- units: questions.length,
- activation: "softmax"
- } ) );
- model.compile({
- optimizer: tf.train.adam(),
- loss: "categoricalCrossentropy",
- metrics: [ "accuracy" ]
- });
- const xs = tf.stack( vectors.map( x => tf.tensor1d( x ) ) );
- const ys = tf.stack( outputs.map( x => tf.tensor1d( x ) ) );
- await model.fit( xs, ys, {
- epochs: 20,
- shuffle: true,
- callbacks: {
- onEpochEnd: ( epoch, logs ) => {
- setText( `Training... Epoch #${epoch} (${logs.acc})` );
- console.log( "Epoch #", epoch, logs );
- }
- }
- } );
- setText( "Trivia Know-It-All Bot is Ready!" );
- document.getElementById( "question" ).addEventListener( "keyup", function( event ) {
- // Number 13 is the "Enter" key on the keyboard
- if( event.keyCode === 13 ) {
- // Cancel the default action, if needed
- event.preventDefault();
- // Trigger the button element with a click
- document.getElementById( "submit" ).click();
- }
- });
- document.getElementById( "submit" ).addEventListener( "click", async function( event ) {
- let text = document.getElementById( "question" ).value;
- document.getElementById( "question" ).value = "";
- // Run the calculation things
- let qVec = [];
- let words = text.replace(/[^a-z ]/gi, "").toLowerCase().split( " " ).filter( x => !!x );
- for( let i = 0; i < maxSentenceLength; i++ ) {
- if( words[ i ] ) {
- qVec.push( wordReference[ words[ i ] ] );
- }
- else {
- // Add padding to keep the vectors the same length
- qVec.push( 0 );
- }
- }
- let prediction = await model.predict( tf.stack( [ tf.tensor1d( qVec ) ] ) ).data();
- // Get the index of the highest value in the prediction
- let id = prediction.indexOf( Math.max( ...prediction ) );
- document.getElementById( "bot-question" ).innerText = questions[ id ];
- document.getElementById( "bot-answer" ).innerText = data[ id ].Answer.Value;
- });
- })();
- </script>
- </body>
- </html>

