赞
踩
npm i langchain
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter"); const { PDFLoader } = require("langchain/document_loaders/fs/pdf"); const { TextLoader } = require("langchain/document_loaders/fs/text"); const { DocxLoader } = require("langchain/document_loaders/fs/docx"); const { EPubLoader } = require("langchain/document_loaders/fs/epub"); documentClassification(filepath, mimeType) { if (!filepath) throw new Error(`路径出现问题:${filepath}`) let loader = null; switch (mimeType) { case 'pdf': loader = new PDFLoader(filepath) break; case 'epub': loader = new EPubLoader(filepath) break; case 'txt': loader = new TextLoader(filepath) break; case 'docx': loader = new DocxLoader(filepath) break; default: break; } if (!loader) throw new Error(`无法解析的类型:${mimeType}`) return loader; } //文档读取 /** * 上传资料分割 * @param {String} filepath 文件路径 * @param {String} filename 文件名称 * @returns {Object} 上传状态 */ async documentReading( { filepath, filename, }) { let mimeType = filepath.split('.')[1]; console.log('文件类型', mimeType) //初始化分割文档分割工具 const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 //分割大小 }); // 加载 const loader = documentClassification(filepath, mimeType); const docs = await loader.load(); // 分割 const splitterDocs = await splitter.splitDocuments(docs); console.log( '文档分割完成'); return { splitterDocs, useTokens: splitterDocs.length * 1000 } } module.exports = documentReading
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。