赞
踩
1,kettle下载
kettle本身是Java开发的开源项目,由于某种原因,目前官网已经不支持下载了,可到我的CSDN中下载
下载完后,还需要安装JDK环境以及对应的数据库驱动包,需要注意的是 该kettle的版本较低,如果你是用的 mysql数据库,需要下载 5.4.7以下的驱动包,下载地址参照以下:
2,一切准备就绪后就可以开始使用啦
假设现在有一个场景:需要将本地的所有库同步到目标服务器上对应的数据库中
其中涉及到 数据表的自动创建 以及 数据的自动同步,这里以全量同步为例。
实现步骤分为三大块:定义一个入口 任务,该任务中涵盖 start、库表初始化(删除表及数据)、全量同步、结束
3,main 任务脚本内容
- <?xml version="1.0" encoding="UTF-8"?>
- <job>
- <name>main</name>
- <description />
- <extended_description />
- <job_version />
- <directory>/</directory>
- <created_user>-</created_user>
- <created_date>2024/06/03 09:54:18.592</created_date>
- <modified_user>-</modified_user>
- <modified_date>2024/06/03 09:54:18.592</modified_date>
- <parameters>
- </parameters>
- <slaveservers>
- </slaveservers>
- <job-log-table>
- <connection />
- <schema />
- <table />
- <size_limit_lines />
- <interval />
- <timeout_days />
- <field>
- <id>ID_JOB</id>
- <enabled>Y</enabled>
- <name>ID_JOB</name>
- </field>
- <field>
- <id>CHANNEL_ID</id>
- <enabled>Y</enabled>
- <name>CHANNEL_ID</name>
- </field>
- <field>
- <id>JOBNAME</id>
- <enabled>Y</enabled>
- <name>JOBNAME</name>
- </field>
- <field>
- <id>STATUS</id>
- <enabled>Y</enabled>
- <name>STATUS</name>
- </field>
- <field>
- <id>LINES_READ</id>
- <enabled>Y</enabled>
- <name>LINES_READ</name>
- </field>
- <field>
- <id>LINES_WRITTEN</id>
- <enabled>Y</enabled>
- <name>LINES_WRITTEN</name>
- </field>
- <field>
- <id>LINES_UPDATED</id>
- <enabled>Y</enabled>
- <name>LINES_UPDATED</name>
- </field>
- <field>
- <id>LINES_INPUT</id>
- <enabled>Y</enabled>
- <name>LINES_INPUT</name>
- </field>
- <field>
- <id>LINES_OUTPUT</id>
- <enabled>Y</enabled>
- <name>LINES_OUTPUT</name>
- </field>
- <field>
- <id>LINES_REJECTED</id>
- <enabled>Y</enabled>
- <name>LINES_REJECTED</name>
- </field>
- <field>
- <id>ERRORS</id>
- <enabled>Y</enabled>
- <name>ERRORS</name>
- </field>
- <field>
- <id>STARTDATE</id>
- <enabled>Y</enabled>
- <name>STARTDATE</name>
- </field>
- <field>
- <id>ENDDATE</id>
- <enabled>Y</enabled>
- <name>ENDDATE</name>
- </field>
- <field>
- <id>LOGDATE</id>
- <enabled>Y</enabled>
- <name>LOGDATE</name>
- </field>
- <field>
- <id>DEPDATE</id>
- <enabled>Y</enabled>
- <name>DEPDATE</name>
- </field>
- <field>
- <id>REPLAYDATE</id>
- <enabled>Y</enabled>
- <name>REPLAYDATE</name>
- </field>
- <field>
- <id>LOG_FIELD</id>
- <enabled>Y</enabled>
- <name>LOG_FIELD</name>
- </field>
- <field>
- <id>EXECUTING_SERVER</id>
- <enabled>N</enabled>
- <name>EXECUTING_SERVER</name>
- </field>
- <field>
- <id>EXECUTING_USER</id>
- <enabled>N</enabled>
- <name>EXECUTING_USER</name>
- </field>
- <field>
- <id>START_JOB_ENTRY</id>
- <enabled>N</enabled>
- <name>START_JOB_ENTRY</name>
- </field>
- <field>
- <id>CLIENT</id>
- <enabled>N</enabled>
- <name>CLIENT</name>
- </field>
- </job-log-table>
- <jobentry-log-table>
- <connection />
- <schema />
- <table />
- <timeout_days />
- <field>
- <id>ID_BATCH</id>
- <enabled>Y</enabled>
- <name>ID_BATCH</name>
- </field>
- <field>
- <id>CHANNEL_ID</id>
- <enabled>Y</enabled>
- <name>CHANNEL_ID</name>
- </field>
- <field>
- <id>LOG_DATE</id>
- <enabled>Y</enabled>
- <name>LOG_DATE</name>
- </field>
- <field>
- <id>JOBNAME</id>
- <enabled>Y</enabled>
- <name>TRANSNAME</name>
- </field>
- <field>
- <id>JOBENTRYNAME</id>
- <enabled>Y</enabled>
- <name>STEPNAME</name>
- </field>
- <field>
- <id>LINES_READ</id>
- <enabled>Y</enabled>
- <name>LINES_READ</name>
- </field>
- <field>
- <id>LINES_WRITTEN</id>
- <enabled>Y</enabled>
- <name>LINES_WRITTEN</name>
- </field>
- <field>
- <id>LINES_UPDATED</id>
- <enabled>Y</enabled>
- <name>LINES_UPDATED</name>
- </field>
- <field>
- <id>LINES_INPUT</id>
- <enabled>Y</enabled>
- <name>LINES_INPUT</name>
- </field>
- <field>
- <id>LINES_OUTPUT</id>
- <enabled>Y</enabled>
- <name>LINES_OUTPUT</name>
- </field>
- <field>
- <id>LINES_REJECTED</id>
- <enabled>Y</enabled>
- <name>LINES_REJECTED</name>
- </field>
- <field>
- <id>ERRORS</id>
- <enabled>Y</enabled>
- <name>ERRORS</name>
- </field>
- <field>
- <id>RESULT</id>
- <enabled>Y</enabled>
- <name>RESULT</name>
- </field>
- <field>
- <id>NR_RESULT_ROWS</id>
- <enabled>Y</enabled>
- <name>NR_RESULT_ROWS</name>
- </field>
- <field>
- <id>NR_RESULT_FILES</id>
- <enabled>Y</enabled>
- <name>NR_RESULT_FILES</name>
- </field>
- <field>
- <id>LOG_FIELD</id>
- <enabled>N</enabled>
- <name>LOG_FIELD</name>
- </field>
- <field>
- <id>COPY_NR</id>
- <enabled>N</enabled>
- <name>COPY_NR</name>
- </field>
- </jobentry-log-table>
- <channel-log-table>
- <connection />
- <schema />
- <table />
- <timeout_days />
- <field>
- <id>ID_BATCH</id>
- <enabled>Y</enabled>
- <name>ID_BATCH</name>
- </field>
- <field>
- <id>CHANNEL_ID</id>
- <enabled>Y</enabled>
- <name>CHANNEL_ID</name>
- </field>
- <field>
- <id>LOG_DATE</id>
- <enabled>Y</enabled>
- <name>LOG_DATE</name>
- </field>
- <field>
- <id>LOGGING_OBJECT_TYPE</id>
- <enabled>Y</enabled>
- <name>LOGGING_OBJECT_TYPE</name>
- </field>
- <field>
- <id>OBJECT_NAME</id>
- <enabled>Y</enabled>
- <name>OBJECT_NAME</name>
- </field>
- <field>
- <id>OBJECT_COPY</id>
- <enabled>Y</enabled>
- <name>OBJECT_COPY</name>
- </field>
- <field>
- <id>REPOSITORY_DIRECTORY</id>
- <enabled>Y</enabled>
- <name>REPOSITORY_DIRECTORY</name>
- </field>
- <field>
- <id>FILENAME</id>
- <enabled>Y</enabled>
- <name>FILENAME</name>
- </field>
- <field>
- <id>OBJECT_ID</id>
- <enabled>Y</enabled>
- <name>OBJECT_ID</name>
- </field>
- <field>
- <id>OBJECT_REVISION</id>
- <enabled>Y</enabled>
- <name>OBJECT_REVISION</name>
- </field>
- <field>
- <id>PARENT_CHANNEL_ID</id>
- <enabled>Y</enabled>
- <name>PARENT_CHANNEL_ID</name>
- </field>
- <field>
- <id>ROOT_CHANNEL_ID</id>
- <enabled>Y</enabled>
- <name>ROOT_CHANNEL_ID</name>
- </field>
- </channel-log-table>
- <pass_batchid>N</pass_batchid>
- <shared_objects_file />
- <entries>
- <entry>
- <name>START</name>
- <description />
- <type>SPECIAL</type>
- <start>Y</start>
- <dummy>N</dummy>
- <repeat>N</repeat>
- <schedulerType>0</schedulerType>
- <intervalSeconds>0</intervalSeconds>
- <intervalMinutes>60</intervalMinutes>
- <hour>12</hour>
- <minutes>0</minutes>
- <weekDay>1</weekDay>
- <DayOfMonth>1</DayOfMonth>
- <parallel>N</parallel>
- <draw>Y</draw>
- <nr>0</nr>
- <xloc>176</xloc>
- <yloc>208</yloc>
- </entry>
- <entry>
- <name>同步aav</name>
- <description />
- <type>TRANS</type>
- <specification_method>filename</specification_method>
- <trans_object_id />
- <filename>${Internal.Entry.Current.Directory}/sync_aav.ktr</filename>
- <transname />
- <arg_from_previous>N</arg_from_previous>
- <params_from_previous>N</params_from_previous>
- <exec_per_row>N</exec_per_row>
- <clear_rows>N</clear_rows>
- <clear_files>N</clear_files>
- <set_logfile>N</set_logfile>
- <logfile />
- <logext />
- <add_date>N</add_date>
- <add_time>N</add_time>
- <loglevel>Basic</loglevel>
- <cluster>N</cluster>
- <slave_server_name />
- <set_append_logfile>N</set_append_logfile>
- <wait_until_finished>Y</wait_until_finished>
- <follow_abort_remote>N</follow_abort_remote>
- <create_parent_folder>N</create_parent_folder>
- <logging_remote_work>N</logging_remote_work>
- <run_configuration>Pentaho local</run_configuration>
- <parameters>
- <pass_all_parameters>Y</pass_all_parameters>
- </parameters>
- <parallel>N</parallel>
- <draw>Y</draw>
- <nr>0</nr>
- <xloc>640</xloc>
- <yloc>48</yloc>
- </entry>
- <entry>
- <name>初始化所有表</name>
- <description />
- <type>JOB</type>
- <specification_method>filename</specification_method>
- <job_object_id />
- <filename>${Internal.Entry.Current.Directory}/init.kjb</filename>
- <jobname />
- <arg_from_previous>N</arg_from_previous>
- <params_from_previous>N</params_from_previous>
- <exec_per_row>N</exec_per_row>
- <set_logfile>N</set_logfile>
- <logfile />
- <logext />
- <add_date>N</add_date>
- <add_time>N</add_time>
- <loglevel>Nothing</loglevel>
- <slave_server_name />
- <wait_until_finished>Y</wait_until_finished>
- <follow_abort_remote>N</follow_abort_remote>
- <expand_remote_job>N</expand_remote_job>
- <create_parent_folder>N</create_parent_folder>
- <pass_export>N</pass_export>
- <parameters>
- <pass_all_parameters>Y</pass_all_parameters>
- </parameters>
- <set_append_logfile>N</set_append_logfile>
- <parallel>N</parallel>
- <draw>Y</draw>
- <nr>0</nr>
- <xloc>352</xloc>
- <yloc>208</yloc>
- </entry>
- <entry>
- <name>DUMMY</name>
- <description />
- <type>SPECIAL</type>
- <start>N</start>
- <dummy>Y</dummy>
- <repeat>N</repeat>
- <schedulerType>0</schedulerType>
- <intervalSeconds>0</intervalSeconds>
- <intervalMinutes>60</intervalMinutes>
- <hour>12</hour>
- <minutes>0</minutes>
- <weekDay>1</weekDay>
- <DayOfMonth>1</DayOfMonth>
- <parallel>N</parallel>
- <draw>Y</draw>
- <nr>0</nr>
- <xloc>928</xloc>
- <yloc>208</yloc>
- </entry>
- <entry>
- <name>同步aps</name>
- <description />
- <type>TRANS</type>
- <specification_method>filename</specification_method>
- <trans_object_id />
- <filename>${Internal.Entry.Current.Directory}/sync_aps.ktr</filename>
- <transname />
- <arg_from_previous>N</arg_from_previous>
- <params_from_previous>N</params_from_previous>
- <exec_per_row>N</exec_per_row>
- <clear_rows>N</clear_rows>
- <clear_files>N</clear_files>
- <set_logfile>N</set_logfile>
- <logfile />
- <logext />
- <add_date>N</add_date>
- <add_time>N</add_time>
- <loglevel>Basic</loglevel>
- <cluster>N</cluster>
- <slave_server_name />
- <set_append_logfile>N</set_append_logfile>
- <wait_until_finished>Y</wait_until_finished>
- <follow_abort_remote>N</follow_abort_remote>
- <create_parent_folder>N</create_parent_folder>
- <logging_remote_work>N</logging_remote_work>
- <run_configuration>Pentaho local</run_configuration>
- <parameters>
- <pass_all_parameters>Y</pass_all_parameters>
- </parameters>
- <parallel>N</parallel>
- <draw>Y</draw>
- <nr>0</nr>
- <xloc>640</xloc>
- <yloc>128</yloc>
- </entry>
- <entry>
- <name>同步douban</name>
- <description />
- <type>TRANS</type>
- <specification_method>filename</specification_method>
- <trans_object_id />
- <filename>${Internal.Entry.Current.Directory}/sync_douban.ktr</filename>
- <transname />
- <arg_from_previous>N</arg_from_previous>
- <params_from_previous>N</params_from_previous>
- <exec_per_row>N</exec_per_row>
- <clear_rows>N</clear_rows>
- <clear_files>N</clear_files>
- <set_logfile>N</set_logfile>
- <logfile />
- <logext />
- <add_date>N</add_date>
- <add_time>N</add_time>
- <loglevel>Basic</loglevel>
- <cluster>N</cluster>
- <slave_server_name />
- <set_append_logfile>N</set_append_logfile>
- <wait_until_finished>Y</wait_until_finished>
- <follow_abort_remote>N</follow_abort_remote>
- <create_parent_folder>N</create_parent_folder>
- <logging_remote_work>N</logging_remote_work>
- <run_configuration>Pentaho local</run_configuration>
- <parameters>
- <pass_all_parameters>Y</pass_all_parameters>
- </parameters>
- <parallel>N</parallel>
- <draw>Y</draw>
- <nr>0</nr>
- <xloc>640</xloc>
- <yloc>208</yloc>
- </entry>
- <entry>
- <name>同步sp-dev</name>
- <description />
- <type>TRANS</type>
- <specification_method>filename</specification_method>
- <trans_object_id />
- <filename>${Internal.Entry.Current.Directory}/sync_sp-dev.ktr</filename>
- <transname />
- <arg_from_previous>N</arg_from_previous>
- <params_from_previous>N</params_from_previous>
- <exec_per_row>N</exec_per_row>
- <clear_rows>N</clear_rows>
- <clear_files>N</clear_files>
- <set_logfile>N</set_logfile>
- <logfile />
- <logext />
- <add_date>N</add_date>
- <add_time>N</add_time>
- <loglevel>Basic</loglevel>
- <cluster>N</cluster>
- <slave_server_name />
- <set_append_logfile>N</set_append_logfile>
- <wait_until_finished>Y</wait_until_finished>
- <follow_abort_remote>N</follow_abort_remote>
- <create_parent_folder>N</create_parent_folder>
- <logging_remote_work>N</logging_remote_work>
- <run_configuration>Pentaho local</run_configuration>
- <parameters>
- <pass_all_parameters>Y</pass_all_parameters>
- </parameters>
- <parallel>N</parallel>
- <draw>Y</draw>
- <nr>0</nr>
- <xloc>640</xloc>
- <yloc>368</yloc>
- </entry>
- <entry>
- <name>同步lottery</name>
- <description />
- <type>TRANS</type>
- <specification_method>filename</specification_method>
- <trans_object_id />
- <filename>${Internal.Entry.Current.Directory}/sync_lottery.ktr</filename>
- <transname />
- <arg_from_previous>N</arg_from_previous>
- <params_from_previous>N</params_from_previous>
- <exec_per_row>N</exec_per_row>
- <clear_rows>N</clear_rows>
- <clear_files>N</clear_files>
- <set_logfile>N</set_logfile>
- <logfile />
- <logext />
- <add_date>N</add_date>
- <add_time>N</add_time>
- <loglevel>Basic</loglevel>
- <cluster>N</cluster>
- <slave_server_name />
- <set_append_logfile>N</set_append_logfile>
- <wait_until_finished>Y</wait_until_finished>
- <follow_abort_remote>N</follow_abort_remote>
- <create_parent_folder>N</create_parent_folder>
- <logging_remote_work>N</logging_remote_work>
- <run_configuration>Pentaho local</run_configuration>
- <parameters>
- <pass_all_parameters>Y</pass_all_parameters>
- </parameters>
- <parallel>N</parallel>
- <draw>Y</draw>
- <nr>0</nr>
- <xloc>640</xloc>
- <yloc>288</yloc>
- </entry>
- </entries>
- <hops>
- <hop>
- <from>START</from>
- <to>初始化所有表</to>
- <from_nr>0</from_nr>
- <to_nr>0</to_nr>
- <enabled>Y</enabled>
- <evaluation>Y</evaluation>
- <unconditional>Y</unconditional>
- </hop>
- <hop>
- <from>初始化所有表</from>
- <to>同步aav</to>
- <from_nr>0</from_nr>
- <to_nr>0</to_nr>
- <enabled>Y</enabled>
- <evaluation>Y</evaluation>
- <unconditional>N</unconditional>
- </hop>
- <hop>
- <from>同步aav</from>
- <to>DUMMY</to>
- <from_nr>0</from_nr>
- <to_nr>0</to_nr>
- <enabled>Y</enabled>
- <evaluation>Y</evaluation>
- <unconditional>N</unconditional>
- </hop>
- <hop>
- <from>初始化所有表</from>
- <to>同步douban</to>
- <from_nr>0</from_nr>
- <to_nr>0</to_nr>
- <enabled>Y</enabled>
- <evaluation>Y</evaluation>
- <unconditional>N</unconditional>
- </hop>
- <hop>
- <from>初始化所有表</from>
- <to>同步lottery</to>
- <from_nr>0</from_nr>
- <to_nr>0</to_nr>
- <enabled>Y</enabled>
- <evaluation>Y</evaluation>
- <unconditional>N</unconditional>
- </hop>
- <hop>
- <from>初始化所有表</from>
- <to>同步sp-dev</to>
- <from_nr>0</from_nr>
- <to_nr>0</to_nr>
- <enabled>Y</enabled>
- <evaluation>Y</evaluation>
- <unconditional>N</unconditional>
- </hop>
- <hop>
- <from>初始化所有表</from>
- <to>同步aps</to>
- <from_nr>0</from_nr>
- <to_nr>0</to_nr>
- <enabled>Y</enabled>
- <evaluation>Y</evaluation>
- <unconditional>N</unconditional>
- </hop>
- <hop>
- <from>同步aps</from>
- <to>DUMMY</to>
- <from_nr>0</from_nr>
- <to_nr>0</to_nr>
- <enabled>Y</enabled>
- <evaluation>Y</evaluation>
- <unconditional>N</unconditional>
- </hop>
- <hop>
- <from>同步douban</from>
- <to>DUMMY</to>
- <from_nr>0</from_nr>
- <to_nr>0</to_nr>
- <enabled>Y</enabled>
- <evaluation>Y</evaluation>
- <unconditional>N</unconditional>
- </hop>
- <hop>
- <from>同步lottery</from>
- <to>DUMMY</to>
- <from_nr>0</from_nr>
- <to_nr>0</to_nr>
- <enabled>Y</enabled>
- <evaluation>Y</evaluation>
- <unconditional>N</unconditional>
- </hop>
- <hop>
- <from>同步sp-dev</from>
- <to>DUMMY</to>
- <from_nr>0</from_nr>
- <to_nr>0</to_nr>
- <enabled>Y</enabled>
- <evaluation>Y</evaluation>
- <unconditional>N</unconditional>
- </hop>
- </hops>
- <notepads>
- </notepads>
- <attributes>
- <group>
- <name>METASTORE.pentaho</name>
- <attribute>
- <key>Default Run Configuration</key>
- <value>{"namespace":"pentaho","id":"Default Run Configuration","name":"Default Run Configuration","description":"Defines a default run configuration","metaStoreName":null}</value>
- </attribute>
- </group>
- <group>
- <name>{"_":"Embedded MetaStore Elements","namespace":"pentaho","type":"Default Run Configuration"}</name>
- <attribute>
- <key>Pentaho local</key>
- <value>{"children":[{"children":[],"id":"server","value":null},{"children":[],"id":"clustered","value":"N"},{"children":[],"id":"name","value":"Pentaho local"},{"children":[],"id":"description","value":null},{"children":[],"id":"readOnly","value":"Y"},{"children":[],"id":"sendResources","value":"N"},{"children":[],"id":"logRemoteExecutionLocally","value":"N"},{"children":[],"id":"remote","value":"N"},{"children":[],"id":"local","value":"Y"},{"children":[],"id":"showTransformations","value":"N"}],"id":"Pentaho local","value":null,"name":"Pentaho local","owner":null,"ownerPermissionsList":[]}</value>
- </attribute>
- </group>
- </attributes>
- </job>
4,kettle.properties变量
- 名称 值
- local_ip localhost
- local_user root
- password 123456
- schemeName_aav aav
- schemeName_aav1 aav1
- schemeName_aps aps
- schemeName_aps1 aps1
- schemeName_douban douban
- schemeName_douban1 douban1
- schemeName_lottery lottery
- schemeName_lottery1 lottery1
- schemeName_sp-dev sp-dev
- schemeName_sp-dev1 sp-dev1
5,在使用组件时,经常会遇到一个问题:数据库中某个值设置的是非null,在同步数据时,起始源数据库的表字段值为空字符串,在进入到kettle中时会默认编程null,这时直接同步会报错,需要更改kettle的配置变量。
kettle.properties -》KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL=Y
更改后需要重启kettle
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。