3.Elasticsearch初步进阶
[toc]
1.文档批量操作
批量获取文档数据
批量获取文档数据是通过_mget的API来实现的
在URL中不指定index和type
请求方式:GET
请求地址:_mget
功能说明:可以通过ID批量获取不同index和type的数据
请求参数
docs:文档数组参数
_index:指定index
_type:指定type
_id:指定id
_source:指定要查询的字段
# 批量操作文档
GET _mget
{
"docs": [
{
"_index": "es_db",
"_type": "_doc",
"_id": 1
},
{
"_index": "es_db",
"_type": "_doc",
"_id": 3
}
]
}
查询结果
#! Deprecation: [types removal] Specifying types in multi get requests is deprecated.
{
"docs" : [
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "1",
"_version" : 1,
"_seq_no" : 0,
"_primary_term" : 1,
"found" : true,
"_source" : {
"name" : "张三",
"sex" : 1,
"age" : 25,
"address" : "广州天河公园",
"remark" : "java developer"
}
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "3",
"_version" : 1,
"_seq_no" : 2,
"_primary_term" : 1,
"found" : true,
"_source" : {
"name" : "rod",
"sex" : 0,
"age" : 26,
"address" : "广州白云山公园",
"remark" : "php developer"
}
}
]
}
在URL中指定index
请求方式:GET
请求地址:/{{indexName}}/_mget
功能说明 : 可以通过ID批量获取不同index和type的数据
请求参数:
docs:文档数组参数
_index:指定index
_type:指定type
_id:指定id
_source:指定要查询的字段
GET /es_db/_mget
{
"docs": [
{
"_type": "_doc",
"_id": 3
},
{
"_type": "_doc",
"_id": 4
}
]
}
返回结果
#! Deprecation: [types removal] Specifying types in multi get requests is deprecated.
{
"docs" : [
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "3",
"_version" : 1,
"_seq_no" : 2,
"_primary_term" : 1,
"found" : true,
"_source" : {
"name" : "rod",
"sex" : 0,
"age" : 26,
"address" : "广州白云山公园",
"remark" : "php developer"
}
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "4",
"_version" : 1,
"_seq_no" : 3,
"_primary_term" : 1,
"found" : true,
"_source" : {
"name" : "admin",
"sex" : 0,
"age" : 22,
"address" : "长沙橘子洲头",
"remark" : "python assistant"
}
}
]
}
在URL中指定index和type
请求方式:GET
请求地址:/{{indexName}}/{{typeName}}/_mget
功能说明 : 可以通过ID批量获取不同index和type的数据
请求参数:
docs:文档数组参数
_index:指定index
_type:指定type
_id:指定id
_source:指定要查询的字段
GET /es_db/_doc/_mget
{
"docs": [
{
"_id": 1
},
{
"_id": 2
}
]
}
返回结果
#! Deprecation: [types removal] Specifying types in multi get requests is deprecated.
{
"docs" : [
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "1",
"_version" : 1,
"_seq_no" : 0,
"_primary_term" : 1,
"found" : true,
"_source" : {
"name" : "张三",
"sex" : 1,
"age" : 25,
"address" : "广州天河公园",
"remark" : "java developer"
}
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "2",
"found" : false
}
]
}
2.批量操作文档数据
批量对文档进行写操作是通过_bulk的API来实现的
请求方式:POST
请求地址:_bulk
请求参数:通过_bulk操作文档,一般至少有两行参数(或偶数行参数)
第一行参数为指定操作的类型及操作的对象(index,type和id)
第二行参数才是操作的数据
参数类似于
#! 第一行参数
{
"actionName": {
"_index": "indexName",
"_type": "typeName",
"_id": "id"
}
}
#! 第二行参数
{
"field1": "value1",
"field2": "value2"
}
actionName:表示操作类型,主要有以下几个
create
index
delete
update
批量创建文档create
POST _bulk
{"create":{"_index":"article", "_type":"_doc", "_id":3}}
{"id":3,"title":"王者荣耀1","content":"王者荣耀666","tags":["java", "面向对象"],"create_time":1554015482530}
{"create":{"_index":"article", "_type":"_doc", "_id":4}}
{"id":4,"title":"王者荣耀2","content":"王者荣耀NB","tags":["java", "面向对象"],"create_time":1554015482530}
执行结果
#! Deprecation: [types removal] Specifying types in bulk requests is deprecated.
{
"took" : 146,
"errors" : false,
"items" : [
{
"create" : {
"_index" : "article",
"_type" : "_doc",
"_id" : "3",
"_version" : 1,
"result" : "created",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 0,
"_primary_term" : 1,
"status" : 201
}
},
{
"create" : {
"_index" : "article",
"_type" : "_doc",
"_id" : "4",
"_version" : 1,
"result" : "created",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 1,
"_primary_term" : 1,
"status" : 201
}
}
]
}
普通创建或全量替换index
POST _bulk
{"index":{"_index":"article", "_type":"_doc", "_id":3}}
{"id":3,"title":"王者荣耀(一)","content":"王者荣耀老师666","tags":["java", "面向对象"],"create_time":1554015482530}
{"index":{"_index":"article", "_type":"_doc", "_id":4}}
{"id":4,"title":"王者荣耀(二)","content":"王者荣耀NB","tags":["java", "面向对象"],"create_time":1554015482530}
执行结果
#! Deprecation: [types removal] Specifying types in bulk requests is deprecated.
{
"took" : 8,
"errors" : false,
"items" : [
{
"index" : {
"_index" : "article",
"_type" : "_doc",
"_id" : "3",
"_version" : 2,
"result" : "updated",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 2,
"_primary_term" : 1,
"status" : 200
}
},
{
"index" : {
"_index" : "article",
"_type" : "_doc",
"_id" : "4",
"_version" : 2,
"result" : "updated",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 3,
"_primary_term" : 1,
"status" : 200
}
}
]
}
备注
如果原文档不存在,则是创建
如果原文档存在,则是替换(全量修改原文档)
批量删除delete
POST _bulk
{"delete":{"_index":"article", "_type":"_doc", "_id":3}}
{"delete":{"_index":"article", "_type":"_doc", "_id":4}}
执行结果
#! Deprecation: [types removal] Specifying types in bulk requests is deprecated.
{
"took" : 8,
"errors" : false,
"items" : [
{
"delete" : {
"_index" : "article",
"_type" : "_doc",
"_id" : "3",
"_version" : 3,
"result" : "deleted",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 4,
"_primary_term" : 1,
"status" : 200
}
},
{
"delete" : {
"_index" : "article",
"_type" : "_doc",
"_id" : "4",
"_version" : 3,
"result" : "deleted",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 5,
"_primary_term" : 1,
"status" : 200
}
}
]
}
批量修改update
POST _bulk
{"update":{"_index":"article", "_type":"_doc", "_id":3}}
{"doc":{"title":"ES大法必修内功"}}
{"update":{"_index":"article", "_type":"_doc", "_id":4}}
{"doc":{"create_time":1554018421008}}
上一步删除了所以会404
#! Deprecation: [types removal] Specifying types in bulk requests is deprecated.
{
"took" : 2,
"errors" : true,
"items" : [
{
"update" : {
"_index" : "article",
"_type" : "_doc",
"_id" : "3",
"status" : 404,
"error" : {
"type" : "document_missing_exception",
"reason" : "[_doc][3]: document missing",
"index_uuid" : "QWPLvY7YSduxTOL_-_o-lw",
"shard" : "0",
"index" : "article"
}
}
},
{
"update" : {
"_index" : "article",
"_type" : "_doc",
"_id" : "4",
"status" : 404,
"error" : {
"type" : "document_missing_exception",
"reason" : "[_doc][4]: document missing",
"index_uuid" : "QWPLvY7YSduxTOL_-_o-lw",
"shard" : "0",
"index" : "article"
}
}
}
]
}
重新创建修改后
#! Deprecation: [types removal] Specifying types in bulk requests is deprecated.
{
"took" : 49,
"errors" : false,
"items" : [
{
"update" : {
"_index" : "article",
"_type" : "_doc",
"_id" : "3",
"_version" : 2,
"result" : "updated",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 8,
"_primary_term" : 1,
"status" : 200
}
},
{
"update" : {
"_index" : "article",
"_type" : "_doc",
"_id" : "4",
"_version" : 2,
"result" : "updated",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 9,
"_primary_term" : 1,
"status" : 200
}
}
]
}
3.DSL语言高级查询
Query DSL概述
Domain Specific Language:领域专用语言
Elasticsearch provides a ful1 Query DSL based on JSON to define queries:Elasticsearch提供了基于JSON的DSL来定义查询
DSL由叶子查询子句和复合查询子句两种子句组成
无条件查询
无查询条件是查询所有,默认是查询所有的,或者使用match_all表示所有
# 无查询条件
GET /es_db/_doc/_search
{
"query": {
"match_all": {}
}
}
执行结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"name" : "张三",
"sex" : 1,
"age" : 25,
"address" : "广州天河公园",
"remark" : "java developer"
}
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"name" : "rod",
"sex" : 0,
"age" : 26,
"address" : "广州白云山公园",
"remark" : "php developer"
}
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.0,
"_source" : {
"name" : "admin",
"sex" : 0,
"age" : 22,
"address" : "长沙橘子洲头",
"remark" : "python assistant"
}
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"name" : "小明",
"sex" : 0,
"age" : 19,
"address" : "长沙岳麓山",
"remark" : "java architect assistant"
}
}
]
}
}
有条件查询
叶子条件查询(单字段查询条件)
模糊匹配
模糊匹配主要是针对文本类型的字段,文本类型的字段会对内容进行分词,对查询时,也会对搜索条件进行分词,然后通过倒排索引查找到匹配的数据,模糊匹配主要通过match等参数来实现
match:通过match关键词模糊匹配条件内容
prefix:前缀匹配
regexp:通过正则表达式来匹配数据
match的复杂用法
match条件还支持以下参数
query : 指定匹配的值
operator : 匹配条件类型
and : 条件分词后都要匹配
or : 条件分词后有一个匹配即可(默认)
minmum_should_match : 指定最小匹配的数量
精确匹配
term : 单个条件相等
terms : 单个字段属于某个值数组内的值
range : 字段属于某个范围内的值
exists : 某个字段的值是否存在
ids : 通过ID批量查询
组合条件查询(多条件查询)
组合条件查询是将叶子条件查询语句进行组合而形成的一个完整的查询条件
bool : 各条件之间有and,or或not的关系
must : 各个条件都必须满足,即各条件是and的关系
should : 各个条件有一个满足即可,即各条件是or的关系
must_not : 不满足所有条件,即各条件是not的关系
filter : 不计算相关度评分,它不计算_score即相关度评分,效率更高
constant_score : 不计算相关度评分
must/filter/shoud/must_not 等的子条件是通过 term/terms/range/ids/exists/match 等叶子条件为参数的
注:以上参数,当只有一个搜索条件时,must等对应的是一个对象,当是多个条件时,对应的是一个数组
连接查询(多文档合并查询)
父子文档查询:parent/child
嵌套文档查询: nested
DSL查询语言中存在两种:查询DSL(query DSL)和过滤DSL(filter DSL)
区别
queries | filters |
relevance | boolean yes/no |
full text | exact values |
not cached | cached |
slower | faster |
filter first,then query remaining docs
query DSL
在查询上下文中,查询会回答这个问题-“这个文档匹不匹配这个查询,它的相关度高么?”
如何验证匹配很好理解,如何计算相关度呢?ES中索引的数据都会存储一个_score分值,分值越高就代表越匹配。另外关于某个搜索的分值计算还是很复杂的,因此也需要一定的时间。
filter DSL
在过滤器上下文中,查询会回答这个问题——“这个文档匹不匹配?”
答案很简单,是或者不是。它不会去计算任何分值,也不会关心返回的排序问题,因此效率会高一点。
过滤上下文 是在使用filter参数时候的执行环境,比如在bool查询中使用must_not或者filter
另外,经常使用过滤器,ES会自动的缓存过滤器的内容,这对于查询来说,会提高很多性能。
Query方式查询:案例
根据名称精确查询姓名 term, term查询不会对字段进行分词查询,会采用精确匹配
注意: 采用term精确查询, 查询字段映射类型属于为keyword.
精确查询
# 根据term精确匹配
POST /es_db/_doc/_search
{
"query": {
"term": {
"name": "admin"
}
}
}
执行结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.3940738,
"hits" : [
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.3940738,
"_source" : {
"name" : "admin",
"sex" : 0,
"age" : 22,
"address" : "长沙橘子洲头",
"remark" : "python assistant"
}
}
]
}
}
SQL: select * from student where name = 'admin'
模糊查询
根据备注信息模糊查询 match, match会根据该字段的分词器,进行分词查询
# 模糊查询
POST /es_db/_doc/_search
{
"from": 0,
"size": 2,
"query": {
"match": {
"address": "广州"
}
}
}
执行结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
"took" : 6,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.3862944,
"hits" : [
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.3862944,
"_source" : {
"name" : "张三",
"sex" : 1,
"age" : 25,
"address" : "广州天河公园",
"remark" : "java developer"
}
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.2978076,
"_source" : {
"name" : "rod",
"sex" : 0,
"age" : 26,
"address" : "广州白云山公园",
"remark" : "php developer"
}
}
]
}
}
SQL: select * from user where address like '%广州%' limit 0, 2
多字段模糊查询
多字段模糊匹配查询与精准查询 multi_match
# 多字段模糊匹配
POST /es_db/_doc/_search
{
"query": {
"multi_match": {
"query": "张三",
"fields": [
"address",
"name"
]
}
}
}
执行结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 2.1189923,
"hits" : [
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "1",
"_score" : 2.1189923,
"_source" : {
"name" : "张三",
"sex" : 1,
"age" : 25,
"address" : "广州天河公园",
"remark" : "java developer"
}
}
]
}
}
SQL: select * from student where name like '%张三%' or address like '%张三%'
未指定字段条件查询
未指定字段条件查询 query_string , 含 AND 与 OR 条件
# 未指定字段条件查询
POST /es_db/_doc/_search
{
"query": {
"query_string": {
"query": "广州 OR 长沙"
}
}
}
执行结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
"took" : 13,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 1.4877305,
"hits" : [
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.4877305,
"_source" : {
"name" : "小明",
"sex" : 0,
"age" : 19,
"address" : "长沙岳麓山",
"remark" : "java architect assistant"
}
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.3862944,
"_source" : {
"name" : "张三",
"sex" : 1,
"age" : 25,
"address" : "广州天河公园",
"remark" : "java developer"
}
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.3862944,
"_source" : {
"name" : "admin",
"sex" : 0,
"age" : 22,
"address" : "长沙橘子洲头",
"remark" : "python assistant"
}
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.2978076,
"_source" : {
"name" : "rod",
"sex" : 0,
"age" : 26,
"address" : "广州白云山公园",
"remark" : "php developer"
}
}
]
}
}
指定字段条件查询
指定字段条件查询 query_string , 含 AND 与 OR 条件
# 未指定字段条件查询
POST /es_db/_doc/_search
{
"query": {
"query_string": {
"query": "admin OR 长沙",
"fields": [
"name",
"address"
]
}
}
}
执行结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 2.7803683,
"hits" : [
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "4",
"_score" : 2.7803683,
"_source" : {
"name" : "admin",
"sex" : 0,
"age" : 22,
"address" : "长沙橘子洲头",
"remark" : "python assistant"
}
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.4877305,
"_source" : {
"name" : "小明",
"sex" : 0,
"age" : 19,
"address" : "长沙岳麓山",
"remark" : "java architect assistant"
}
}
]
}
}
范围查询
json请求字符串中部分字段的含义
range:范围关键字
gte 大于等于
lte 小于等于
gt 大于
lt 小于
now 当前时间
# 范围查询
POST /es_db/_doc/_search
{
"query": {
"range": {
"age": {
"gte": 25,
"lte": 28
}
}
}
}
执行结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
"took" : 6,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"name" : "张三",
"sex" : 1,
"age" : 25,
"address" : "广州天河公园",
"remark" : "java developer"
}
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"name" : "rod",
"sex" : 0,
"age" : 26,
"address" : "广州白云山公园",
"remark" : "php developer"
}
}
]
}
}
SQL: select * from user where age between 25 and 28
分页,输出字段,排序综合查询
# 分页,输出字段,排序综合查询
POST /es_db/_doc/_search
{
"query": {
"range": {
"age": {
"gte": 25,
"lte": 28
}
}
},
"from": 0,
"size": 2,
"_source": [
"name",
"age",
"book"
],
"sort": {
"age": "desc"
}
}
执行结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "3",
"_score" : null,
"_source" : {
"name" : "rod",
"age" : 26
},
"sort" : [
26
]
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "1",
"_score" : null,
"_source" : {
"name" : "张三",
"age" : 25
},
"sort" : [
25
]
}
]
}
}
Filter过滤器方式查询,它的查询不会计算相关性分值,也不会对结果进行排序, 因此效率会高一点,查询的结果可以被缓存
Filter Context 对数据进行过滤
# Filter查询
POST /es_db/_doc/_search
{
"query": {
"bool": {
"filter": {
"term": {
"age": 25
}
}
}
}
}
执行结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.0,
"_source" : {
"name" : "张三",
"sex" : 1,
"age" : 25,
"address" : "广州天河公园",
"remark" : "java developer"
}
}
]
}
}
总结
match
match:模糊匹配,需要指定字段名,但是输入会进行分词,比如"hello world"会进行拆分为hello和world,然后匹配,如果字段中包含hello或者world,或者都包含的结果都会被查询出来,也就是说match是一个部分匹配的模糊查询。查询条件相对来说比较宽松。
term
term: 这种查询和match在有些时候是等价的,比如我们查询单个的词hello,那么会和match查询结果一样,但是如果查询"hello world",结果就相差很大,因为这个输入不会进行分词,就是说查询的时候,是查询字段分词结果中是否有"hello world"的字样,而不是查询字段中包含"hello world"的字样。当保存数据"hello world"时,elasticsearch会对字段内容进行分词,"hello world"会被分成hello和world,不存在"hello world",因此这里的查询结果会为空。这也是term查询和match的区别。
match_phase
match_phase:会对输入做分词,但是需要结果中也包含所有的分词,而且顺序要求一样。以"hello world"为例,要求结果中必须包含hello和world,而且还要求他们是连着的,顺序也是固定的,hello that world不满足,world hello也不满足条件。
query_string
query_string:和match类似,但是match需要指定字段名,query_string是在所有字段中搜索,范围更广泛。
4.文档映射
ES中映射可以分为动态映射和静态映射
动态映射
在关系数据库中,需要事先创建数据库,然后在该数据库下创建数据表,并创建表字段、类型、长度、主键等,最后才能基于表插入数据。而Elasticsearch中不需要定义Mapping映射(即关系型数据库的表、字段等),在文档写入Elasticsearch时,会根据文档字段自动识别类型,这种机制称之为动态映射。
动态映射规则如下
JSON数据 | 自动推测的类型 |
null | 没有字段被添加 |
true/false | boolean类型 |
小数 | float |
数字 | long |
日期 | date或text |
字符串 | text |
数组 | 由数组第一个非空值决定 |
JSON对象 | object类型 |
删除原创建的索引
DELETE /es_db
创建索引
PUT /es_db
创建文档(ES根据数据类型, 会自动创建映射)
# 创建文档
PUT /es_db/_doc/1
{
"name": "Jack",
"sex": 1,
"age": 25,
"book": "java入门至精通",
"address": "广州小蛮腰"
}
获取文档映射
# 获取文档映射
GET /es_db/_mapping
执行结果
{
"es_db" : {
"mappings" : {
"properties" : {
"address" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"age" : {
"type" : "long"
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"remark" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"sex" : {
"type" : "long"
}
}
}
}
}
静态映射
静态映射是在Elasticsearch中也可以事先定义好映射,包含文档的各字段类型、分词器等,这种方式称之为静态映射。
删除原创建的索引
DELETE /es_db
创建索引
PUT /es_db
设置文档映射
# 设置文档映射
PUT /es_db
{
"mappings": {
"properties": {
"name": {
"type": "keyword",
"index": true,
"store": true
},
"sex": {
"type": "integer",
"index": true,
"store": true
},
"age": {
"type": "integer",
"index": true,
"store": true
},
"book": {
"type": "text",
"index": true,
"store": true
},
"address": {
"type": "text",
"index": true,
"store": true
}
}
}
}
执行结果
{
"acknowledged" : true,
"shards_acknowledged" : true,
"index" : "es_db_copy"
}
{
"es_db_copy" : {
"mappings" : {
"properties" : {
"address" : {
"type" : "text",
"store" : true
},
"age" : {
"type" : "integer",
"store" : true
},
"book" : {
"type" : "text",
"store" : true
},
"name" : {
"type" : "keyword",
"store" : true
},
"sex" : {
"type" : "integer",
"store" : true
}
}
}
}
}
根据静态映射创建文档
PUT /es_db_copy/_doc/1
{
"name": "Jack",
"sex": 1,
"age": 25,
"book": "elasticSearch入门至精通",
"address": "广州车陂"
}
执行结果
{
"_index" : "es_db_copy",
"_type" : "_doc",
"_id" : "1",
"_version" : 1,
"result" : "created",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 0,
"_primary_term" : 1
}
获取文档映射
# 获取文档映射
GET /es_db_copy/_mapping
执行结果
{
"es_db_copy" : {
"mappings" : {
"properties" : {
"address" : {
"type" : "text",
"store" : true
},
"age" : {
"type" : "integer",
"store" : true
},
"book" : {
"type" : "text",
"store" : true
},
"name" : {
"type" : "keyword",
"store" : true
},
"sex" : {
"type" : "integer",
"store" : true
}
}
}
}
}
5.核心类型(core data type)
字符串:String,String类型包含text和keyword
text:该类型被用来索引长文本,在创建索引前会将这些文本进行分词,转化为词的组合,建立索引;允许es来检索这些词,text类型不能用来排序和聚合。
keyword:该类型不能分词,可以被用来检索过滤、排序和聚合,keyword类型不可用text进行分词模糊检索。
数值型:long,integer,short,byte,double,float
日期型:date
布尔型:boolean
6.keyword与text映射类型的区别
将 book 字段设置为 keyword 映射(只能精准查询, 不能分词查询,能聚合、排序)
POST /es_db_copy/_doc/_search
{
"query": {
"term": {
"term": "elasticSearch入门至精通"
}
}
}
执行结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
# 根据keyword精确查询
POST /es_db_copy/_doc/_search
{
"query": {
"term": {
"name": "Jack"
}
}
}
执行结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.2876821,
"hits" : [
{
"_index" : "es_db_copy",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.2876821,
"_source" : {
"name" : "Jack",
"sex" : 1,
"age" : 25,
"book" : "elasticSearch入门至精通",
"address" : "广州车陂"
}
}
]
}
}
book是text类型就可以模糊查询
# 模糊查询
POST /es_db_copy/_doc/_search
{
"query": {
"match": {
"book": "elasticSearch入门至精通"
}
}
}
执行结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.7260926,
"hits" : [
{
"_index" : "es_db_copy",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.7260926,
"_source" : {
"name" : "Jack",
"sex" : 1,
"age" : 25,
"book" : "elasticSearch入门至精通",
"address" : "广州车陂"
}
}
]
}
}
7.创建静态映射时指定text类型的ik分词器
设置ik分词器的文档映射
先删除之前的es_db
再创建新的es_db
定义ik_smart的映射
# 创建映射时指定IK分词器
PUT /es_db
{
"mappings": {
"properties": {
"name": {
"type": "keyword",
"index": true,
"store": true
},
"sex": {
"type": "integer",
"index": true,
"store": true
},
"age": {
"type": "integer",
"index": true,
"store": true
},
"book": {
"type": "text",
"index": true,
"store": true,
"analyzer": "ik_smart",
"search_analyzer": "ik_smart"
},
"address": {
"type": "text",
"index": true,
"store": true
}
}
}
# 获取文档映射
GET /es_db/_mapping
执行结果
{
"es_db" : {
"mappings" : {
"properties" : {
"address" : {
"type" : "text",
"store" : true
},
"age" : {
"type" : "integer",
"store" : true
},
"book" : {
"type" : "text",
"store" : true,
"analyzer" : "ik_smart"
},
"name" : {
"type" : "keyword",
"store" : true
},
"remark" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"sex" : {
"type" : "integer",
"store" : true
}
}
}
}
}
分词查询
# 分词查询
POST /es_db/_doc/_search
{
"query": {
"match": {
"address": "广"
}
}
}
POST /es_db/_doc/_search
{
"query": {
"match": {
"address": "广州"
}
}
}
执行结果
#! Deprecation: [types removal] Specifying types in search requests is deprecated.
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.7509375,
"hits" : [
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.7509375,
"_source" : {
"name" : "张三",
"sex" : 1,
"age" : 25,
"address" : "广州天河公园",
"remark" : "java developer"
}
},
{
"_index" : "es_db",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.6391755,
"_source" : {
"name" : "rod",
"sex" : 0,
"age" : 26,
"address" : "广州白云山公园",
"remark" : "php developer"
}
}
]
}
}
8.对已存在的mapping映射进行修改
如果要推倒现有的映射, 你得重新建立一个静态索引
然后把之前索引里的数据导入到新的索引里
删除原创建的索引
为新索引起个别名, 为原索引名
# 对现有的mapping映射进行修改
POST _reindex
{
"source": {
"index": "db_index"
},
"dest": {
"index": "db_index_2"
}
}
DELETE /db_index
PUT /db_index_2/_alias/db_index
注意: 通过这几个步骤就实现了索引的平滑过渡,并且是零停机
9.Elasticsearch乐观并发控制
在数据库领域中,有两种方法来确保并发更新,不会丢失数据:
悲观并发控制
这种方法被关系型数据库广泛使用,它假定有变更冲突可能发生,因此阻塞访问资源以防止冲突。 一个典型的例子是读取一行数据之前先将其锁住,确保只有放置锁的线程能够对这行数据进行修改。
乐观并发控制
Elasticsearch 中使用的这种方法假定冲突是不可能发生的,并且不会阻塞正在尝试的操作。 然而,如果源数据在读写当中被修改,更新将会失败。应用程序接下来将决定该如何解决冲突。 例如,可以重试更新、使用新的数据、或者将相关情况报告给用户。
再以创建一个文档为例 ES老版本
PUT /db_index/_doc/1
{
"name": "Jack",
"sex": 1,
"age": 25,
"book": "Spring Boot 入门到精通",
"remark": "hello world"
}
执行结果
{
"_index" : "db_index",
"_type" : "_doc",
"_id" : "1",
"_version" : 1,
"result" : "created",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 0,
"_primary_term" : 1
}
实现_version乐观锁更新文档
# 实现_version乐观锁更新文档
PUT /db_index/_doc/1?version=1
{
"name": "Jack",
"sex": 1,
"age": 25,
"book": "Spring Boot 入门到精通",
"remark": "hello world"
}
老版本报错
{
"error": {
"root_cause": [
{
"type": "action_request_validation_exception",
"reason": "Validation Failed: 1: internal versioning can not be used for optimistic concurrency control. Please use `if_seq_no` and `if_primary_term` instead;"
}
],
"type": "action_request_validation_exception",
"reason": "Validation Failed: 1: internal versioning can not be used for optimistic concurrency control. Please use `if_seq_no` and `if_primary_term` instead;"
},
"status": 400
}
**ES新版本(7.x)不使用version进行并发版本控制 if_seq_no=版本值&if_primary_term=文档位置 **
_seq_no:文档版本号,作用同_version
_primary_term:文档所在位置
# 新的索引
POST /es_sc/_search
DELETE /es_sc
POST /es_sc/_doc/1
{
"id": 1,
"name": "王者荣耀",
"desc": "王者荣耀和平精英老师",
"create_date": "2021-02-24"
}
POST /es_sc/_update/1
{
"doc": {
"name": "王者荣耀666"
}
}
POST /es_sc/_update/1/?if_seq_no=1&if_primary_term=1
{
"doc": {
"name": "王者荣耀1"
}
}
POST /es_sc/_update/1/?if_seq_no=1&if_primary_term=1
{
"doc": {
"name": "王者荣耀2"
}
}
执行结果
{
"error": {
"root_cause": [
{
"type": "version_conflict_engine_exception",
"reason": "[1]: version conflict, required seqNo [1], primary term [1]. current document has seqNo [2] and primary term [1]",
"index_uuid": "QRMOHFiiS--5j0OdALb-uA",
"shard": "0",
"index": "es_sc"
}
],
"type": "version_conflict_engine_exception",
"reason": "[1]: version conflict, required seqNo [1], primary term [1]. current document has seqNo [2] and primary term [1]",
"index_uuid": "QRMOHFiiS--5j0OdALb-uA",
"shard": "0",
"index": "es_sc"
},
"status": 409
}