datax可以理解为sqoop的优化版,
速度比sqoop快
因为sqoop底层是map任务,而datax底层是基于内存
DataX 是一个异构数据源离线同步工具,致力于实现包括关系型数据库(MySQL、Oracle等)、HDFS、Hive、ODPS、HBase、FTP等各种异构数据源之间稳定高效的数据同步功能
datax 是让你编写 json
flume 是让你编写 conf
azkaban 是让你编写 flow
sqoop 是让你写命令
将mysql中的数据导入到hdfs上
{
"job": {
"setting": {
"speed": {
"channel": 1
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"connection": [
{
"querySql": ["select empno,ename,job,hiredate,sal from emp;"],
"jdbcUrl": ["jdbc:mysql://bigdata01:3306/sqoop"]
}
]
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://bigdata01:9820",
"path": "/datax/emp",
"fileName": "emp",
"column": [
{"name": "empno", "type": "int"},
{"name": "ename", "type": "string"},
{"name": "job", "type": "string"},
{"name": "hiredate", "type": "string"},
{"name": "sal", "type": "double"}
],
"fileType": "text",
"writeMode": "append",
"fieldDelimiter": "\t"
}
}
}
]
}
}
将hdfs上的数据导入到mysql中
{
"job": {
"setting": {
"speed": {
"channel": 1
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/datax/emp/*",
"defaultFS": "hdfs://bigdata01:9820",
"column":[
{"index": 0, "type": "string"},
{"index": 1, "type": "string"},
{"index": 2, "type": "string"},
{"index": 3, "type": "string"},
{"index": 4, "type": "string"}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": "\t"
}
},"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "replace",
"username": "root",
"password": "123456",
"column": ["empno", "ename", "job", "hiredate", "sal"],
"connection": [
{
"jdbcUrl": "jdbc:mysql://bigdata01:3306/sqoop",
"table": ["eemmpp"]
}
]
}
}
}
]
}
}
使用注意
注意点:
1)指定字段的类型时,datax中的类型只有下面几种,而不是像java一样的
2)默认的分隔符,即 "fieldDelimiter": "xxx" 不指定或者不写的时候,默认为 ' , ' 分割
3)
将mysql 中的数据导入hive(重要)*
说是把mysql中的数据导入hive,其实本质还是将mysql中的数据导入hdfs中
首先先创建一个hive表 指定到hdfs的路径上,再将mysql中的数据导入到这个路径即可
1)首先先创建一个hive表
reate external table if not exists ods_01_base_area ( id int COMMENT 'id标识', area_code string COMMENT '省份编码', province_name string COMMENT '省份名称', iso string COMMENT 'ISO编码' )row format delimited fields terminated by ',' stored as TextFile location '/data/nshop/ods/ods_01_base_area/'; -- 指定到hdfs的路径
2)将mysql的数据通过datax导入hdfs
注意指定路径和分隔符 ! 一定要与创建hive表指定的路径一致 且 分隔符也保持一致
{
"job": {
"setting": {
"speed": {
"channel": 1
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"id",
"area_code",
"province_name",
"iso"
],
"splitPk": "id",
"connection": [
{
"table": [
"base_area"
],
"jdbcUrl": [
"jdbc:mysql://bigdata01:3306/datax"
]
}
]
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://bigdata01:9820",
"path": "/data/nshop/ods/ods_01_base_area/",
"fileName": "base_area",
"column": [
{
"name": "id",
"type": "int"
},
{
"name": "area_code",
"type": "string"
},
{
"name": "province_name",
"type": "string"
},
{
"name": "iso",
"type": "string"
}
],
"fileType": "text",
"writeMode": "append",
"fieldDelimiter": ","
}
}
}
]
}
}