当前位置:   article > 正文

【DataX实战】DataX同步ClickHouse数据到Hive_datax clickhouse replace into

datax clickhouse replace into

1.准备数据

1.1 clickhouse建表并插入数据

  1. CREATE TABLE cell_towers_10
  2. (
  3.     radio Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5),
  4.     mcc UInt16,
  5.     net UInt16,
  6.     area UInt16,
  7.     cell UInt64,
  8.     unit Int16,
  9.     lon Float64,
  10.     lat Float64,
  11.     range UInt32,
  12.     samples UInt32,
  13.     changeable UInt8,
  14.     created DateTime,
  15.     updated DateTime,
  16.     averageSignal UInt8
  17. )
  18. ENGINE = MergeTree ORDER BY (radio, mcc, net, created);
  1. INSERT INTO datasets.cell_towers_10 (radio,mcc,net,area,cell,unit,lon,lat,`range`,samples,changeable,created,updated,averageSignal) VALUES
  2.  ('CDMA',302,86,130,4113,-1,-112.069237,48.978268,1000,1,1,'2017-09-15 10:09:44','2017-09-15 10:10:44',0),
  3.  ('CDMA',302,86,130,0,-1,-112.069237,48.978268,1000,1,1,'2017-09-15 10:09:45','2017-09-15 10:10:45',0),
  4.  ('CDMA',302,86,130,4114,-1,-112.069237,48.978268,1000,1,1,'2017-09-15 10:09:46','2017-09-15 10:10:46',0),
  5.  ('CDMA',302,1168,15002,59995,-1,-79.462952,44.009564,1000,7,1,'2017-09-14 19:22:48','2017-09-14 19:30:04',0),
  6.  ('CDMA',302,1168,15002,59506,-1,-79.522812,43.79319,1000,1,1,'2017-09-14 19:57:04','2017-09-14 20:33:33',0),
  7.  ('CDMA',302,1168,15004,60815,-1,-79.315284,43.838686,1000,7,1,'2017-09-14 20:22:45','2017-09-14 21:06:28',0),
  8.  ('CDMA',302,1168,15002,59507,-1,-79.459198,43.797741,1000,3,1,'2017-09-14 20:38:37','2017-09-14 21:20:47',0),
  9.  ('CDMA',302,1168,15002,59946,-1,-79.462547,44.01469,1000,1,1,'2017-09-14 22:19:45','2017-09-14 22:56:46',0),
  10.  ('CDMA',302,1168,16000,14113,-1,-80.480919,43.435841,1000,1,1,'2017-09-14 22:53:59','2017-09-15 00:22:24',0),
  11.  ('CDMA',302,1168,15004,60516,-1,-79.37619,43.84483,1000,2,1,'2017-09-14 23:11:07','2017-09-15 00:57:57',0);

1.2 hive中建表

  1. CREATE TABLE ck_cell_towers_10
  2. (
  3. radio string,
  4. mcc smallint,
  5. net smallint,
  6. area int,
  7. cell bigint,
  8. unit smallint,
  9. lon double,
  10. lat double,
  11. range_a int,
  12. samples int,
  13. changeable tinyint,
  14. created date,
  15. updated date,
  16. averageSignal tinyint
  17. )row format delimited fields terminated by ",";

2. 准备工作

由于Datax没有clickhousereader组件,用rdbmsreader替代。

需要把clickhousewriter/libs下的所有jar包复制到rdbmsreader/libs下,同名jar包直接替换,另外,删掉rm -f guava-r05.jar这个包,否则会报错。

修改plugin.json文件:在"driver" 增加 "ru.yandex.clickhouse.ClickHouseDriver"。

编辑json文件时,name改为rdbmsreader。

"name": "rdbmsreader" 

3. 创建任务

可以在datax-web中创建任务生成json,也可以直接编辑json

  1. {
  2. "job": {
  3. "setting": {
  4. "speed": {
  5. "channel": 3
  6. },
  7. "errorLimit": {
  8. "record": 0,
  9. "percentage": 0.02
  10. }
  11. },
  12. "content": [
  13. {
  14. "reader": {
  15. "name": "rdbmsreader",
  16. "parameter": {
  17. "username": "yRjwDFuoPKlqya9h9H2Amg==",
  18. "password": "yRjwDFuoPKlqya9h9H2Amg==",
  19. "column": [
  20. "radio",
  21. "mcc",
  22. "net",
  23. "area",
  24. "cell",
  25. "unit",
  26. "lon",
  27. "lat",
  28. "range",
  29. "samples",
  30. "changeable",
  31. "created",
  32. "updated",
  33. "averageSignal"
  34. ],
  35. "splitPk": "",
  36. "connection": [
  37. {
  38. "table": [
  39. "cell_towers"
  40. ],
  41. "jdbcUrl": [
  42. "jdbc:clickhouse://10.16.60.44:8123/datasets"
  43. ]
  44. }
  45. ]
  46. }
  47. },
  48. "writer": {
  49. "name": "hdfswriter",
  50. "parameter": {
  51. "defaultFS": "hdfs://10.16.60.31:8020",
  52. "fileType": "text",
  53. "path": "/user/hive/warehouse/datasets.db/ck_cell_towers",
  54. "fileName": "ck_cell_towers",
  55. "writeMode": "append",
  56. "fieldDelimiter": ",",
  57. "column": [
  58. {
  59. "name": "radio",
  60. "type": "string"
  61. },
  62. {
  63. "name": "mcc",
  64. "type": "smallint"
  65. },
  66. {
  67. "name": "net",
  68. "type": "smallint"
  69. },
  70. {
  71. "name": "area",
  72. "type": "int"
  73. },
  74. {
  75. "name": "cell",
  76. "type": "bigint"
  77. },
  78. {
  79. "name": "unit",
  80. "type": "smallint"
  81. },
  82. {
  83. "name": "lon",
  84. "type": "double"
  85. },
  86. {
  87. "name": "lat",
  88. "type": "double"
  89. },
  90. {
  91. "name": "range_a",
  92. "type": "int"
  93. },
  94. {
  95. "name": "samples",
  96. "type": "int"
  97. },
  98. {
  99. "name": "changeable",
  100. "type": "tinyint"
  101. },
  102. {
  103. "name": "created",
  104. "type": "date"
  105. },
  106. {
  107. "name": "updated",
  108. "type": "date"
  109. },
  110. {
  111. "name": "averagesignal",
  112. "type": "tinyint"
  113. }
  114. ]
  115. }
  116. }
  117. }
  118. ]
  119. }
  120. }

4. 执行结果

fe6660d5abad4a41b059b54600e1cf84.png

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/繁依Fanyi0/article/detail/141127
推荐阅读
  

闽ICP备14008679号