Elasticsearch脚本查询
什么/为什么
Scripting是Elasticsearch支持的一种专门用于复杂场景下支持自定义编程的强大的脚本功能,ES支持多种脚本语言,如painless,其语法类似于Java,也有注释、关键字、类型、变量、函数等,其就要相对于其他脚本高出几倍的性能,并且安全可靠,可以用于内联和存储脚本。
核心就是解决复杂的查询以及兼顾性能.
ES支持的一些脚本语言
-
Language Sandboxed Required plugin Purpose painless
Built-in Purpose-built for Elasticsearch expression
Built-in Fast custom ranking and sorting mustache
Built-in Templates java
You write it! Expert API
语法格式
官网文档:https://www.elastic.co/guide/en/elasticsearch/reference/7.17/modules-scripting.html
## ctx._source<.field_name>
## ctx只的是hit到的source数据这里的数据都可支持修改,但是元数据不要修改,修改完数据就变了,最好修改_source内的
GET test_idx_aggs/_doc/1
POST test_idx_aggs/_doc/1
{
"script": {
"source": "ctx._id+=1"
}
}
GET test_idx_aggs/_doc/1
POST test_idx_aggs/_doc/1
{
"script": {
"source": "ctx._source.price+=1"
}
}
不要修改元数据
修改元数据前后
{
"_index" : "test_idx_aggs",
"_type" : "_doc",
"_id" : "2",
"_version" : 1,
"_seq_no" : 1,
"_primary_term" : 1,
"found" : true,
"_source" : {
"name" : "小米NFC手机",
"desc" : "支持全功能NFC,手机中的滑翔机",
"price" : 4999,
"lv" : "旗舰机",
"type" : "手机",
"createtime" : "2020-05-21T08:00:00Z",
"tags" : [
"性价比",
"发烧",
"公交卡"
]
}
}
# POST test_idx_aggs/_doc/2
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
"_index" : "test_idx_aggs",
"_type" : "_doc",
"_id" : "2",
"_version" : 2,
"result" : "updated",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 15,
"_primary_term" : 3
}
# GET test_idx_aggs/_doc/2
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
"_index" : "test_idx_aggs",
"_type" : "_doc",
"_id" : "2",
"_version" : 2,
"_seq_no" : 15,
"_primary_term" : 3,
"found" : true,
"_source" : {
"script" : {
"source" : "ctx._seq_no+=1"
}
}
}
修改数据
GET test_idx_aggs/_doc/3
POST test_idx_aggs/_update/3
{
"script": {
"lang": "painless",
// 注意add方法得看数据具体类型,这里如果是数组链表类似的结构可以执行add,如果源数据是一个字符串则报错
"source": "ctx._source.tags.add('NFC')"
}
}
GET test_idx_aggs/_doc/3
删除数据
脚本
GET test_idx_aggs/_doc/1
POST test_idx_aggs/_update/1
{
"script": {
"lang": "painless",
"source": "ctx.op='delete'"
}
}
GET test_idx_aggs/_doc/1
结果
# GET test_idx_aggs/_doc/1
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
"_index" : "test_idx_aggs",
"_type" : "_doc",
"_id" : "1",
"_version" : 5,
"_seq_no" : 14,
"_primary_term" : 3,
"found" : true,
"_source" : {
"script" : {
"source" : "ctx._id+=1"
}
}
}
# POST test_idx_aggs/_update/1
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
"_index" : "test_idx_aggs",
"_type" : "_doc",
"_id" : "1",
"_version" : 6,
"result" : "deleted",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 17,
"_primary_term" : 3
}
# GET test_idx_aggs/_doc/1
#! Elasticsearch built-in security features are not enabled. Without authentication, your cluster could be accessible to anyone. See https://www.elastic.co/guide/en/elasticsearch/reference/7.17/security-minimal-setup.html to enable security.
{
"_index" : "test_idx_aggs",
"_type" : "_doc",
"_id" : "1",
"found" : false
}
查询验证
GET test_idx_aggs/_search
{
"query": {
"match": {
"_id": 1
}
}
}
upsert
有则更新无则插入
GET test_idx_aggs/_doc/1
POST test_idx_aggs/_update/1
{
"script": {
"lang": "painless",
"source": "ctx._source.price+=99"
}
, "upsert": {
"name" : "小米NFC手机",
"desc" : "支持全功能NFC,手机中的滑翔机",
"price" : 4999,
"lv" : "旗舰机",
"type" : "手机",
"createtime" : "2020-05-21T08:00:00Z",
"tags" : [
"性价比",
"发烧",
"公交卡"
]
}
}
GET test_idx_aggs/_doc/1
其他
GET test_idx_aggs/_search
{
"script_fields": {
// 查询结果存储的字段名称
"my_col_name_result": {
"script": {
// 可以改为expression语言,区别是什么? 结果一样语法不一样
"lang": "painless",
// 查询时不用ctx对象了使用doc来获取
"source": "doc['price'].value*1.1"
}
}
}
}
更新语句
POST test_idx_aggs/_update/6
{
"doc": {
"price": 9999
}
}
参数化查询
POST test_idx_aggs/_update/6
{
"script": {
"lang": "painless",
//tag_new:下边参数名称
"source": "ctx._source.tags.add(params.tag_new)",
// 编译脚本,缓存到缓冲区,参数可以传递没有硬编码
"params": {
"tag_new": "new"
}
}
}
脚本模板
/_script/{script_template_id}
创建并使用脚本模板
DELETE _scripts/calculate_score
## calculate_score:模板id
POST _scripts/calculate_score
{
"script": {
"lang": "painless",
// my_modifier:定义的参数名称
"source": "Math.log(_score * 2) + params['my_modifier']"
}
}
GET _scripts/calculate_score
## 索引名称
GET test_idx_aggs/_search
{
"query": {
"script_score": {
"query": {
"match_all": {}
},
"script": {
"id": "calculate_score",
"params": {
"my_modifier": 2
}
}
}
}
}
函数式编程
POST test_idx_aggs/_update/1
{
"script": {
"lang": "painless",
// 可以写复杂的脚本
"source": """
ctx._source.tags.add(params.param_name1);
ctx._source.price -= 1;
""",
"params": {
"param_name1": "new",
"param_name2": 2
}
}
}
GET test_idx_aggs/_doc/1
POST test_idx_aggs/_update/1
{
"script": {
"lang": "painless",
"source": """
// 正则匹配然后 给name增加点内容; ==~表示匹配的意思; /[\s\S]*小米[\s\S]*/相当于 %小米%
if(ctx._source.name ==~ /[\s\S]*小米[\s\S]*/) {
ctx._source.name+="xxxxxxxxxxxxxx"
}
"""
}
}
GET test_idx_aggs/_doc/1
PUT test_index/_bulk?refresh
{"index":{"_id":1}}
{"ajbh": "12345","ajmc": "立案案件","lasj": "2020/05/21 13:25:23","jsbax_sjjh2_xz_ryjbxx_cleaning": [{"XM": "张三","NL": "30","SF": "男"},{"XM": "李四","NL": "31","SF": "男"},{"XM": "王五","NL": "30","SF": "女"},{"XM": "赵六","NL": "23","SF": "男"}]}
{"index":{"_id":2}}
{"ajbh": "563245","ajmc": "结案案件","lasj": "2020/05/21 13:25:23","jsbax_sjjh2_xz_ryjbxx_cleaning": [{"XM": "张三2","NL": "30","SF": "男"},{"XM": "李四2","NL": "31","SF": "男"},{"XM": "王五2","NL": "30","SF": "女"},{"XM": "赵六2","NL": "23","SF": "女"}]}
{"index":{"_id":3}}
{"ajbh": "12345","ajmc": "立案案件","lasj": "2020/05/21 13:25:23","jsbax_sjjh2_xz_ryjbxx_cleaning": [{"XM": "张三3","NL": "30","SF": "男"},{"XM": "李四3","NL": "31","SF": "男"},{"XM": "王五3","NL": "30","SF": "女"},{"XM": "赵六3","NL": "23","SF": "男"}]}
GET test_index/_search
GET test_index/_search
{
"aggs": {
"agg_NAME": {
"sum": {
"script": {
"lang": "painless",
"source": """
int total = 0;
for(int i = 0; i<params['_source']['jsbax_sjjh2_xz_ryjbxx_cleaning'].length; i++){
if(params['_source']['jsbax_sjjh2_xz_ryjbxx_cleaning'][i]['SF']=='男'){
total+=1;
}
}
return total;
"""
}
}
}
}
}
doc和 _source访问属性区别
doc[<field>].value和params['_source'][<field>]
- doc只适用简单类型,复杂类型会报错
- doc会加载到内存,性能高,内存占用也高
- 只有在非分析或单个词条的基础上有意义
- _source方式每次都要解析比较慢