验证ElasticSearch 分词的BUG

news2024/9/17 7:11:21

验证ElasticSearch 分词的BUG


ElasticSearch 版本号: 6.7.0

BUG 重现


PUT test_2022
  "settings": {
    "analysis": {
      "filter": {
        "pinyin_filter": {
          "type": "pinyin"
      "analyzer": {
        "custome_standard": {
          "type": "custom",
          "tokenizer": "standard",
          "filter": [
        "custome_chinese": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "filter": [
          "type": "custom",
          "tokenizer": "ik_smart",
          "filter": [
      "normalizer": {
        "custome_normalizer": {
          "type": "custom",
          "char_filter": [],
          "filter": [
    "index.mapping.coerce": false,
    "number_of_shards" : 5,
    "number_of_replicas" : 1
  "mappings": {
    "_doc": {
      "properties": {
        "id": {
          "type": "long"
        "workContent": {
              "type": "text",
              "analyzer": "custome_standard",
              "fields": {
                "raw": {
                  "type": "keyword",
                  "normalizer": "custome_normalizer"
                "chinese": {
                  "type": "text",
                  "analyzer": "custome_chinese",


POST test_2022/_doc/1
  "id": 1,
  "workContent": "<span>0-1冷启动阶段,构建小鹏汽车社群营销体系,通过内容营销、活动营销两只抓手吸引沉淀潜在车主。结合公司重大传播节点,策划事件营销活动占领用户心智,在 G3 未上市之前进行品牌产品背书,提高潜客对产品的信任度。<br> <br>业绩荣誉: <br>1、配合 G3 发布会节奏,策划组织执行小鹏汽车《一路向北》品牌事件营销,通过 6 天 8 人 2553 公里的极客朝圣之旅,为小鹏汽车制造品质背书,铺垫口碑。这是国内新造车 势力的超长途自驾第一测。视频分别于小鹏汽车品牌媒体发布会、G3 发布会何小鹏讲演前及小鹏汽车各城市端展厅播放,获得行业及公司内部好评,相关链接: <br>A、外部视频链接:https://chejiahao.autohome.com.cn/info/2290990 <br>B、内部报道链接:https://mp.weixin.qq.com/s/A2AfghYRJ8Hc2gMMr_C8dQ <br> <br>2、配合 G3 发布会节奏,联合汽车之家资源组织策划《G3 价格竞猜》项目。该项目以极底成本,获得较好的曝光声量及活动参与效果。项目外部论坛总点击量环比 增长 108.9%,总回复数环比增长 97.3% ,成果如下:<br>A、新闻曝光举例:https://www.autohome.com.cn/news/201804/915688.html <br>B、论坛活动地址: https://club.autohome.com.cn/bbs/thread/7fc721404489d922/72434411-1.html <br> <br>3、撰写原创内容作品,抢占汽车之家首页首屏文字链和论坛相关位置,争取免费露出位:<br>A、小鹏汽车 1.0 评测:宝剑锋从磨砺出 https://chejiahao.autohome.com.cn/info/2239269 <br>B、一路向北 小鹏汽车广州-北京自驾游记 https://chejiahao.autohome.com.cn/info/2290598 </span>"


  "error": {
    "root_cause": [
        "type": "illegal_argument_exception",
        "reason": "startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards startOffset=750,endOffset=751,lastStartOffset=751 for field 'workContent.chinese'"
    "type": "illegal_argument_exception",
    "reason": "startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards startOffset=750,endOffset=751,lastStartOffset=751 for field 'workContent.chinese'"
  "status": 400





POST test_2022/_analyze
  "field": "workContent.chinese",
  "text": "<span>0-1冷启动阶段,构建小鹏汽车社群营销体系,通过内容营销、活动营销两只抓手吸引沉淀潜在车主。结合公司重大传播节点,策划事件营销活动占领用户心智,在 G3 未上市之前进行品牌产品背书,提高潜客对产品的信任度。<br> <br>业绩荣誉: <br>1、配合 G3 发布会节奏,策划组织执行小鹏汽车《一路向北》品牌事件营销,通过 6 天 8 人 2553 公里的极客朝圣之旅,为小鹏汽车制造品质背书,铺垫口碑。这是国内新造车 势力的超长途自驾第一测。视频分别于小鹏汽车品牌媒体发布会、G3 发布会何小鹏讲演前及小鹏汽车各城市端展厅播放,获得行业及公司内部好评,相关链接: <br>A、外部视频链接:https://chejiahao.autohome.com.cn/info/2290990 <br>B、内部报道链接:https://mp.weixin.qq.com/s/A2AfghYRJ8Hc2gMMr_C8dQ <br> <br>2、配合 G3 发布会节奏,联合汽车之家资源组织策划《G3 价格竞猜》项目。该项目以极底成本,获得较好的曝光声量及活动参与效果。项目外部论坛总点击量环比 增长 108.9%,总回复数环比增长 97.3% ,成果如下:<br>A、新闻曝光举例:https://www.autohome.com.cn/news/201804/915688.html <br>B、论坛活动地址: https://club.autohome.com.cn/bbs/thread/7fc721404489d922/72434411-1.html <br> <br>3、撰写原创内容作品,抢占汽车之家首页首屏文字链和论坛相关位置,争取免费露出位:<br>A、小鹏汽车 1.0 评测:宝剑锋从磨砺出 https://chejiahao.autohome.com.cn/info/2239269 <br>B、一路向北 小鹏汽车广州-北京自驾游记 https://chejiahao.autohome.com.cn/info/2290598 </span>"


  "tokens" : [
      "token" : "span",
      "start_offset" : 1,
      "end_offset" : 5,
      "type" : "ENGLISH",
      "position" : 0
      "token" : "0-1",
      "start_offset" : 6,
      "end_offset" : 9,
      "type" : "LETTER",
      "position" : 1
      "token" : "0",
      "start_offset" : 6,
      "end_offset" : 7,
      "type" : "ARABIC",
      "position" : 2
      "token" : "1",
      "start_offset" : 8,
      "end_offset" : 9,
      "type" : "ARABIC",
      "position" : 3
      "token" : "冷启动",
      "start_offset" : 9,
      "end_offset" : 12,
      "type" : "CN_WORD",
      "position" : 4
      "token" : "冷",
      "start_offset" : 9,
      "end_offset" : 10,
      "type" : "CN_CHAR",
      "position" : 5
      "token" : "启动",
      "start_offset" : 10,
      "end_offset" : 12,
      "type" : "CN_WORD",
      "position" : 6
      "token" : "阶段",
      "start_offset" : 12,
      "end_offset" : 14,
      "type" : "CN_WORD",
      "position" : 7
      "token" : "构建",
      "start_offset" : 15,
      "end_offset" : 17,
      "type" : "CN_WORD",
      "position" : 8
      "token" : "小",
      "start_offset" : 17,
      "end_offset" : 18,
      "type" : "CN_CHAR",
      "position" : 9
      "token" : "鹏",
      "start_offset" : 18,
      "end_offset" : 19,
      "type" : "CN_CHAR",
      "position" : 10
      "token" : "汽车",
      "start_offset" : 19,
      "end_offset" : 21,
      "type" : "CN_WORD",
      "position" : 11
      "token" : "社群",
      "start_offset" : 21,
      "end_offset" : 23,
      "type" : "CN_WORD",
      "position" : 12
      "token" : "营销",
      "start_offset" : 23,
      "end_offset" : 25,
      "type" : "CN_WORD",
      "position" : 13
      "token" : "体系",
      "start_offset" : 25,
      "end_offset" : 27,
      "type" : "CN_WORD",
      "position" : 14
      "token" : "通过",
      "start_offset" : 28,
      "end_offset" : 30,
      "type" : "CN_WORD",
      "position" : 15
      "token" : "内容",
      "start_offset" : 30,
      "end_offset" : 32,
      "type" : "CN_WORD",
      "position" : 16
      "token" : "营销",
      "start_offset" : 32,
      "end_offset" : 34,
      "type" : "CN_WORD",
      "position" : 17
      "token" : "活动",
      "start_offset" : 35,
      "end_offset" : 37,
      "type" : "CN_WORD",
      "position" : 18
      "token" : "营销",
      "start_offset" : 37,
      "end_offset" : 39,
      "type" : "CN_WORD",
      "position" : 19
      "token" : "两只",
      "start_offset" : 39,
      "end_offset" : 41,
      "type" : "CN_WORD",
      "position" : 20
      "token" : "两",
      "start_offset" : 39,
      "end_offset" : 40,
      "type" : "COUNT",
      "position" : 21
      "token" : "只",
      "start_offset" : 40,
      "end_offset" : 41,
      "type" : "CN_CHAR",
      "position" : 22
      "token" : "抓手",
      "start_offset" : 41,
      "end_offset" : 43,
      "type" : "CN_WORD",
      "position" : 23
      "token" : "吸引",
      "start_offset" : 43,
      "end_offset" : 45,
      "type" : "CN_WORD",
      "position" : 24
      "token" : "沉淀",
      "start_offset" : 45,
      "end_offset" : 47,
      "type" : "CN_WORD",
      "position" : 25
      "token" : "潜在",
      "start_offset" : 47,
      "end_offset" : 49,
      "type" : "CN_WORD",
      "position" : 26
      "token" : "潜",
      "start_offset" : 47,
      "end_offset" : 48,
      "type" : "CN_CHAR",
      "position" : 27
      "token" : "在车",
      "start_offset" : 48,
      "end_offset" : 50,
      "type" : "CN_WORD",
      "position" : 28
      "token" : "车主",
      "start_offset" : 49,
      "end_offset" : 51,
      "type" : "CN_WORD",
      "position" : 29
      "token" : "结合",
      "start_offset" : 52,
      "end_offset" : 54,
      "type" : "CN_WORD",
      "position" : 30
      "token" : "公司",
      "start_offset" : 54,
      "end_offset" : 56,
      "type" : "CN_WORD",
      "position" : 31
      "token" : "重大",
      "start_offset" : 56,
      "end_offset" : 58,
      "type" : "CN_WORD",
      "position" : 32
      "token" : "传播",
      "start_offset" : 58,
      "end_offset" : 60,
      "type" : "CN_WORD",
      "position" : 33
      "token" : "节点",
      "start_offset" : 60,
      "end_offset" : 62,
      "type" : "CN_WORD",
      "position" : 34
      "token" : "策划",
      "start_offset" : 63,
      "end_offset" : 65,
      "type" : "CN_WORD",
      "position" : 35
      "token" : "事件",
      "start_offset" : 65,
      "end_offset" : 67,
      "type" : "CN_WORD",
      "position" : 36
      "token" : "营销",
      "start_offset" : 67,
      "end_offset" : 69,
      "type" : "CN_WORD",
      "position" : 37
      "token" : "活动",
      "start_offset" : 69,
      "end_offset" : 71,
      "type" : "CN_WORD",
      "position" : 38
      "token" : "占领",
      "start_offset" : 71,
      "end_offset" : 73,
      "type" : "CN_WORD",
      "position" : 39
      "token" : "占",
      "start_offset" : 71,
      "end_offset" : 72,
      "type" : "CN_CHAR",
      "position" : 40
      "token" : "领用",
      "start_offset" : 72,
      "end_offset" : 74,
      "type" : "CN_WORD",
      "position" : 41
      "token" : "用户",
      "start_offset" : 73,
      "end_offset" : 75,
      "type" : "CN_WORD",
      "position" : 42
      "token" : "心智",
      "start_offset" : 75,
      "end_offset" : 77,
      "type" : "CN_WORD",
      "position" : 43
      "token" : "在",
      "start_offset" : 78,
      "end_offset" : 79,
      "type" : "CN_CHAR",
      "position" : 44
      "token" : "g3",
      "start_offset" : 80,
      "end_offset" : 82,
      "type" : "LETTER",
      "position" : 45
      "token" : "g",
      "start_offset" : 80,
      "end_offset" : 81,
      "type" : "ENGLISH",
      "position" : 46
      "token" : "3",
      "start_offset" : 81,
      "end_offset" : 82,
      "type" : "ARABIC",
      "position" : 47
      "token" : "未上市",
      "start_offset" : 83,
      "end_offset" : 86,
      "type" : "CN_WORD",
      "position" : 48
      "token" : "未上",
      "start_offset" : 83,
      "end_offset" : 85,
      "type" : "CN_WORD",
      "position" : 49
      "token" : "上市",
      "start_offset" : 84,
      "end_offset" : 86,
      "type" : "CN_WORD",
      "position" : 50
      "token" : "之前",
      "start_offset" : 86,
      "end_offset" : 88,
      "type" : "CN_WORD",
      "position" : 51
      "token" : "之",
      "start_offset" : 86,
      "end_offset" : 87,
      "type" : "CN_CHAR",
      "position" : 52
      "token" : "前进",
      "start_offset" : 87,
      "end_offset" : 89,
      "type" : "CN_WORD",
      "position" : 53
      "token" : "进行",
      "start_offset" : 88,
      "end_offset" : 90,
      "type" : "CN_WORD",
      "position" : 54
      "token" : "品牌",
      "start_offset" : 90,
      "end_offset" : 92,
      "type" : "CN_WORD",
      "position" : 55
      "token" : "产品",
      "start_offset" : 92,
      "end_offset" : 94,
      "type" : "CN_WORD",
      "position" : 56
      "token" : "背书",
      "start_offset" : 94,
      "end_offset" : 96,
      "type" : "CN_WORD",
      "position" : 57
      "token" : "提高",
      "start_offset" : 97,
      "end_offset" : 99,
      "type" : "CN_WORD",
      "position" : 58
      "token" : "潜",
      "start_offset" : 99,
      "end_offset" : 100,
      "type" : "CN_CHAR",
      "position" : 59
      "token" : "客",
      "start_offset" : 100,
      "end_offset" : 101,
      "type" : "CN_CHAR",
      "position" : 60
      "token" : "对",
      "start_offset" : 101,
      "end_offset" : 102,
      "type" : "CN_CHAR",
      "position" : 61
      "token" : "产品",
      "start_offset" : 102,
      "end_offset" : 104,
      "type" : "CN_WORD",
      "position" : 62
      "token" : "的",
      "start_offset" : 104,
      "end_offset" : 105,
      "type" : "CN_CHAR",
      "position" : 63
      "token" : "信任",
      "start_offset" : 105,
      "end_offset" : 107,
      "type" : "CN_WORD",
      "position" : 64
      "token" : "度",
      "start_offset" : 107,
      "end_offset" : 108,
      "type" : "CN_CHAR",
      "position" : 65
      "token" : "br",
      "start_offset" : 110,
      "end_offset" : 112,
      "type" : "ENGLISH",
      "position" : 66
      "token" : "br",
      "start_offset" : 115,
      "end_offset" : 117,
      "type" : "ENGLISH",
      "position" : 67
      "token" : "业绩",
      "start_offset" : 118,
      "end_offset" : 120,
      "type" : "CN_WORD",
      "position" : 68
      "token" : "荣誉",
      "start_offset" : 120,
      "end_offset" : 122,
      "type" : "CN_WORD",
      "position" : 69
      "token" : "br",
      "start_offset" : 125,
      "end_offset" : 127,
      "type" : "ENGLISH",
      "position" : 70
      "token" : "1",
      "start_offset" : 128,
      "end_offset" : 129,
      "type" : "ARABIC",
      "position" : 71
      "token" : "配合",
      "start_offset" : 130,
      "end_offset" : 132,
      "type" : "CN_WORD",
      "position" : 72
      "token" : "g3",
      "start_offset" : 133,
      "end_offset" : 135,
      "type" : "LETTER",
      "position" : 73
      "token" : "g",
      "start_offset" : 133,
      "end_offset" : 134,
      "type" : "ENGLISH",
      "position" : 74
      "token" : "3",
      "start_offset" : 134,
      "end_offset" : 135,
      "type" : "ARABIC",
      "position" : 75
      "token" : "发布会",
      "start_offset" : 136,
      "end_offset" : 139,
      "type" : "CN_WORD",
      "position" : 76
      "token" : "发布",
      "start_offset" : 136,
      "end_offset" : 138,
      "type" : "CN_WORD",
      "position" : 77
      "token" : "会",
      "start_offset" : 138,
      "end_offset" : 139,
      "type" : "CN_CHAR",
      "position" : 78
      "token" : "节奏",
      "start_offset" : 139,
      "end_offset" : 141,
      "type" : "CN_WORD",
      "position" : 79
      "token" : "策划",
      "start_offset" : 142,
      "end_offset" : 144,
      "type" : "CN_WORD",
      "position" : 80
      "token" : "组织",
      "start_offset" : 144,
      "end_offset" : 146,
      "type" : "CN_WORD",
      "position" : 81
      "token" : "执行",
      "start_offset" : 146,
      "end_offset" : 148,
      "type" : "CN_WORD",
      "position" : 82
      "token" : "小",
      "start_offset" : 148,
      "end_offset" : 149,
      "type" : "CN_CHAR",
      "position" : 83
      "token" : "鹏",
      "start_offset" : 149,
      "end_offset" : 150,
      "type" : "CN_CHAR",
      "position" : 84
      "token" : "汽车",
      "start_offset" : 150,
      "end_offset" : 152,
      "type" : "CN_WORD",
      "position" : 85
      "token" : "一路",
      "start_offset" : 153,
      "end_offset" : 155,
      "type" : "CN_WORD",
      "position" : 86
      "token" : "一",
      "start_offset" : 153,
      "end_offset" : 154,
      "type" : "TYPE_CNUM",
      "position" : 87
      "token" : "路向",
      "start_offset" : 154,
      "end_offset" : 156,
      "type" : "CN_WORD",
      "position" : 88
      "token" : "路",
      "start_offset" : 154,
      "end_offset" : 155,
      "type" : "COUNT",
      "position" : 89
      "token" : "向北",
      "start_offset" : 155,
      "end_offset" : 157,
      "type" : "CN_WORD",
      "position" : 90
      "token" : "品牌",
      "start_offset" : 158,
      "end_offset" : 160,
      "type" : "CN_WORD",
      "position" : 91
      "token" : "事件",
      "start_offset" : 160,
      "end_offset" : 162,
      "type" : "CN_WORD",
      "position" : 92
      "token" : "营销",
      "start_offset" : 162,
      "end_offset" : 164,
      "type" : "CN_WORD",
      "position" : 93
      "token" : "通过",
      "start_offset" : 165,
      "end_offset" : 167,
      "type" : "CN_WORD",
      "position" : 94
      "token" : "6",
      "start_offset" : 168,
      "end_offset" : 169,
      "type" : "ARABIC",
      "position" : 95
      "token" : "天",
      "start_offset" : 170,
      "end_offset" : 171,
      "type" : "CN_CHAR",
      "position" : 96
      "token" : "8",
      "start_offset" : 172,
      "end_offset" : 173,
      "type" : "ARABIC",
      "position" : 97
      "token" : "人",
      "start_offset" : 174,
      "end_offset" : 175,
      "type" : "CN_CHAR",
      "position" : 98
      "token" : "2553",
      "start_offset" : 176,
      "end_offset" : 180,
      "type" : "ARABIC",
      "position" : 99
      "token" : "公里",
      "start_offset" : 181,
      "end_offset" : 183,
      "type" : "CN_WORD",
      "position" : 100
      "token" : "的",
      "start_offset" : 183,
      "end_offset" : 184,
      "type" : "CN_CHAR",
      "position" : 101
      "token" : "极",
      "start_offset" : 184,
      "end_offset" : 185,
      "type" : "CN_CHAR",
      "position" : 102
      "token" : "客",
      "start_offset" : 185,
      "end_offset" : 186,
      "type" : "CN_CHAR",
      "position" : 103
      "token" : "朝圣",
      "start_offset" : 186,
      "end_offset" : 188,
      "type" : "CN_WORD",
      "position" : 104
      "token" : "之旅",
      "start_offset" : 188,
      "end_offset" : 190,
      "type" : "CN_WORD",
      "position" : 105
      "token" : "为",
      "start_offset" : 191,
      "end_offset" : 192,
      "type" : "CN_CHAR",
      "position" : 106
      "token" : "小",
      "start_offset" : 192,
      "end_offset" : 193,
      "type" : "CN_CHAR",
      "position" : 107
      "token" : "鹏",
      "start_offset" : 193,
      "end_offset" : 194,
      "type" : "CN_CHAR",
      "position" : 108
      "token" : "汽车",
      "start_offset" : 194,
      "end_offset" : 196,
      "type" : "CN_WORD",
      "position" : 109
      "token" : "制造品",
      "start_offset" : 196,
      "end_offset" : 199,
      "type" : "CN_WORD",
      "position" : 110
      "token" : "制造",
      "start_offset" : 196,
      "end_offset" : 198,
      "type" : "CN_WORD",
      "position" : 111
      "token" : "品质",
      "start_offset" : 198,
      "end_offset" : 200,
      "type" : "CN_WORD",
      "position" : 112
      "token" : "背书",
      "start_offset" : 200,
      "end_offset" : 202,
      "type" : "CN_WORD",
      "position" : 113
      "token" : "铺垫",
      "start_offset" : 203,
      "end_offset" : 205,
      "type" : "CN_WORD",
      "position" : 114
      "token" : "口碑",
      "start_offset" : 205,
      "end_offset" : 207,
      "type" : "CN_WORD",
      "position" : 115
      "token" : "这是",
      "start_offset" : 208,
      "end_offset" : 210,
      "type" : "CN_WORD",
      "position" : 116
      "token" : "国内",
      "start_offset" : 210,
      "end_offset" : 212,
      "type" : "CN_WORD",
      "position" : 117
      "token" : "新造",
      "start_offset" : 212,
      "end_offset" : 214,
      "type" : "CN_WORD",
      "position" : 118
      "token" : "车",
      "start_offset" : 214,
      "end_offset" : 215,
      "type" : "CN_CHAR",
      "position" : 119
      "token" : "势力",
      "start_offset" : 216,
      "end_offset" : 218,
      "type" : "CN_WORD",
      "position" : 120
      "token" : "的",
      "start_offset" : 218,
      "end_offset" : 219,
      "type" : "CN_CHAR",
      "position" : 121
      "token" : "超长",
      "start_offset" : 219,
      "end_offset" : 221,
      "type" : "CN_WORD",
      "position" : 122
      "token" : "超",
      "start_offset" : 219,
      "end_offset" : 220,
      "type" : "CN_CHAR",
      "position" : 123
      "token" : "长途",
      "start_offset" : 220,
      "end_offset" : 222,
      "type" : "CN_WORD",
      "position" : 124
      "token" : "自驾",
      "start_offset" : 222,
      "end_offset" : 224,
      "type" : "CN_WORD",
      "position" : 125
      "token" : "第一",
      "start_offset" : 224,
      "end_offset" : 226,
      "type" : "CN_WORD",
      "position" : 126
      "token" : "第",
      "start_offset" : 224,
      "end_offset" : 225,
      "type" : "CN_CHAR",
      "position" : 127
      "token" : "一测",
      "start_offset" : 225,
      "end_offset" : 227,
      "type" : "CN_WORD",
      "position" : 128
      "token" : "一",
      "start_offset" : 225,
      "end_offset" : 226,
      "type" : "TYPE_CNUM",
      "position" : 129
      "token" : "测",
      "start_offset" : 226,
      "end_offset" : 227,
      "type" : "CN_CHAR",
      "position" : 130
      "token" : "视频",
      "start_offset" : 228,
      "end_offset" : 230,
      "type" : "CN_WORD",
      "position" : 131
      "token" : "分别",
      "start_offset" : 230,
      "end_offset" : 232,
      "type" : "CN_WORD",
      "position" : 132
      "token" : "于",
      "start_offset" : 232,
      "end_offset" : 233,
      "type" : "CN_CHAR",
      "position" : 133
      "token" : "小",
      "start_offset" : 233,
      "end_offset" : 234,
      "type" : "CN_CHAR",
      "position" : 134
      "token" : "鹏",
      "start_offset" : 234,
      "end_offset" : 235,
      "type" : "CN_CHAR",
      "position" : 135
      "token" : "汽车品牌",
      "start_offset" : 235,
      "end_offset" : 239,
      "type" : "CN_WORD",
      "position" : 136
      "token" : "汽车",
      "start_offset" : 235,
      "end_offset" : 237,
      "type" : "CN_WORD",
      "position" : 137
      "token" : "品牌",
      "start_offset" : 237,
      "end_offset" : 239,
      "type" : "CN_WORD",
      "position" : 138
      "token" : "媒体",
      "start_offset" : 239,
      "end_offset" : 241,
      "type" : "CN_WORD",
      "position" : 139
      "token" : "发布会",
      "start_offset" : 241,
      "end_offset" : 244,
      "type" : "CN_WORD",
      "position" : 140
      "token" : "发布",
      "start_offset" : 241,
      "end_offset" : 243,
      "type" : "CN_WORD",
      "position" : 141
      "token" : "会",
      "start_offset" : 243,
      "end_offset" : 244,
      "type" : "CN_CHAR",
      "position" : 142
      "token" : "g3",
      "start_offset" : 245,
      "end_offset" : 247,
      "type" : "LETTER",
      "position" : 143
      "token" : "g",
      "start_offset" : 245,
      "end_offset" : 246,
      "type" : "ENGLISH",
      "position" : 144
      "token" : "3",
      "start_offset" : 246,
      "end_offset" : 247,
      "type" : "ARABIC",
      "position" : 145
      "token" : "发布会",
      "start_offset" : 248,
      "end_offset" : 251,
      "type" : "CN_WORD",
      "position" : 146
      "token" : "发布",
      "start_offset" : 248,
      "end_offset" : 250,
      "type" : "CN_WORD",
      "position" : 147
      "token" : "会",
      "start_offset" : 250,
      "end_offset" : 251,
      "type" : "CN_CHAR",
      "position" : 148
      "token" : "何",
      "start_offset" : 251,
      "end_offset" : 252,
      "type" : "CN_CHAR",
      "position" : 149
      "token" : "小",
      "start_offset" : 252,
      "end_offset" : 253,
      "type" : "CN_CHAR",
      "position" : 150
      "token" : "鹏",
      "start_offset" : 253,
      "end_offset" : 254,
      "type" : "CN_CHAR",
      "position" : 151
      "token" : "讲演",
      "start_offset" : 254,
      "end_offset" : 256,
      "type" : "CN_WORD",
      "position" : 152
      "token" : "前",
      "start_offset" : 256,
      "end_offset" : 257,
      "type" : "CN_CHAR",
      "position" : 153
      "token" : "及",
      "start_offset" : 257,
      "end_offset" : 258,
      "type" : "CN_CHAR",
      "position" : 154
      "token" : "小",
      "start_offset" : 258,
      "end_offset" : 259,
      "type" : "CN_CHAR",
      "position" : 155
      "token" : "鹏",
      "start_offset" : 259,
      "end_offset" : 260,
      "type" : "CN_CHAR",
      "position" : 156
      "token" : "汽车",
      "start_offset" : 260,
      "end_offset" : 262,
      "type" : "CN_WORD",
      "position" : 157
      "token" : "各城市",
      "start_offset" : 262,
      "end_offset" : 265,
      "type" : "CN_WORD",
      "position" : 158
      "token" : "各城",
      "start_offset" : 262,
      "end_offset" : 264,
      "type" : "CN_WORD",
      "position" : 159
      "token" : "城市",
      "start_offset" : 263,
      "end_offset" : 265,
      "type" : "CN_WORD",
      "position" : 160
      "token" : "端",
      "start_offset" : 265,
      "end_offset" : 266,
      "type" : "CN_CHAR",
      "position" : 161
      "token" : "展厅",
      "start_offset" : 266,
      "end_offset" : 268,
      "type" : "CN_WORD",
      "position" : 162
      "token" : "播放",
      "start_offset" : 268,
      "end_offset" : 270,
      "type" : "CN_WORD",
      "position" : 163
      "token" : "获得",
      "start_offset" : 271,
      "end_offset" : 273,
      "type" : "CN_WORD",
      "position" : 164
      "token" : "行业",
      "start_offset" : 273,
      "end_offset" : 275,
      "type" : "CN_WORD",
      "position" : 165
      "token" : "及",
      "start_offset" : 275,
      "end_offset" : 276,
      "type" : "CN_CHAR",
      "position" : 166
      "token" : "公司内部",
      "start_offset" : 276,
      "end_offset" : 280,
      "type" : "CN_WORD",
      "position" : 167
      "token" : "公司",
      "start_offset" : 276,
      "end_offset" : 278,
      "type" : "CN_WORD",
      "position" : 168
      "token" : "内部",
      "start_offset" : 278,
      "end_offset" : 280,
      "type" : "CN_WORD",
      "position" : 169
      "token" : "好评",
      "start_offset" : 280,
      "end_offset" : 282,
      "type" : "CN_WORD",
      "position" : 170
      "token" : "相关",
      "start_offset" : 283,
      "end_offset" : 285,
      "type" : "CN_WORD",
      "position" : 171
      "token" : "链接",
      "start_offset" : 285,
      "end_offset" : 287,
      "type" : "CN_WORD",
      "position" : 172
      "token" : "br",
      "start_offset" : 290,
      "end_offset" : 292,
      "type" : "ENGLISH",
      "position" : 173
      "token" : "外部",
      "start_offset" : 295,
      "end_offset" : 297,
      "type" : "CN_WORD",
      "position" : 174
      "token" : "视频",
      "start_offset" : 297,
      "end_offset" : 299,
      "type" : "CN_WORD",
      "position" : 175
      "token" : "链接",
      "start_offset" : 299,
      "end_offset" : 301,
      "type" : "CN_WORD",
      "position" : 176
      "token" : "https",
      "start_offset" : 302,
      "end_offset" : 307,
      "type" : "ENGLISH",
      "position" : 177
      "token" : "chejiahao.autohome.com.cn",
      "start_offset" : 310,
      "end_offset" : 335,
      "type" : "LETTER",
      "position" : 178
      "token" : "chejiahao",
      "start_offset" : 310,
      "end_offset" : 319,
      "type" : "ENGLISH",
      "position" : 179
      "token" : "autohome",
      "start_offset" : 320,
      "end_offset" : 328,
      "type" : "ENGLISH",
      "position" : 180
      "token" : "com",
      "start_offset" : 329,
      "end_offset" : 332,
      "type" : "ENGLISH",
      "position" : 181
      "token" : "cn",
      "start_offset" : 333,
      "end_offset" : 335,
      "type" : "ENGLISH",
      "position" : 182
      "token" : "info",
      "start_offset" : 336,
      "end_offset" : 340,
      "type" : "ENGLISH",
      "position" : 183
      "token" : "2290990",
      "start_offset" : 341,
      "end_offset" : 348,
      "type" : "ARABIC",
      "position" : 184
      "token" : "br",
      "start_offset" : 350,
      "end_offset" : 352,
      "type" : "ENGLISH",
      "position" : 185
      "token" : "b",
      "start_offset" : 353,
      "end_offset" : 354,
      "type" : "ENGLISH",
      "position" : 186
      "token" : "内部",
      "start_offset" : 355,
      "end_offset" : 357,
      "type" : "CN_WORD",
      "position" : 187
      "token" : "报道",
      "start_offset" : 357,
      "end_offset" : 359,
      "type" : "CN_WORD",
      "position" : 188
      "token" : "链接",
      "start_offset" : 359,
      "end_offset" : 361,
      "type" : "CN_WORD",
      "position" : 189
      "token" : "https",
      "start_offset" : 362,
      "end_offset" : 367,
      "type" : "ENGLISH",
      "position" : 190
      "token" : "mp.weixin.qq.com",
      "start_offset" : 370,
      "end_offset" : 386,
      "type" : "LETTER",
      "position" : 191
      "token" : "mp",
      "start_offset" : 370,
      "end_offset" : 372,
      "type" : "ENGLISH",
      "position" : 192
      "token" : "weixin",
      "start_offset" : 373,
      "end_offset" : 379,
      "type" : "ENGLISH",
      "position" : 193
      "token" : "qq",
      "start_offset" : 380,
      "end_offset" : 382,
      "type" : "ENGLISH",
      "position" : 194
      "token" : "com",
      "start_offset" : 383,
      "end_offset" : 386,
      "type" : "ENGLISH",
      "position" : 195
      "token" : "s",
      "start_offset" : 387,
      "end_offset" : 388,
      "type" : "ENGLISH",
      "position" : 196
      "token" : "a2afghyrj8hc2gmmr_c8dq",
      "start_offset" : 389,
      "end_offset" : 411,
      "type" : "LETTER",
      "position" : 197
      "token" : "2",
      "start_offset" : 390,
      "end_offset" : 391,
      "type" : "ARABIC",
      "position" : 198
      "token" : "afghyrj",
      "start_offset" : 391,
      "end_offset" : 398,
      "type" : "ENGLISH",
      "position" : 199
      "token" : "8",
      "start_offset" : 398,
      "end_offset" : 399,
      "type" : "ARABIC",
      "position" : 200
      "token" : "hc",
      "start_offset" : 399,
      "end_offset" : 401,
      "type" : "ENGLISH",
      "position" : 201
      "token" : "2",
      "start_offset" : 401,
      "end_offset" : 402,
      "type" : "ARABIC",
      "position" : 202
      "token" : "gmmr",
      "start_offset" : 402,
      "end_offset" : 406,
      "type" : "ENGLISH",
      "position" : 203
      "token" : "c",
      "start_offset" : 407,
      "end_offset" : 408,
      "type" : "ENGLISH",
      "position" : 204
      "token" : "8",
      "start_offset" : 408,
      "end_offset" : 409,
      "type" : "ARABIC",
      "position" : 205
      "token" : "dq",
      "start_offset" : 409,
      "end_offset" : 411,
      "type" : "ENGLISH",
      "position" : 206
      "token" : "br",
      "start_offset" : 413,
      "end_offset" : 415,
      "type" : "ENGLISH",
      "position" : 207
      "token" : "br",
      "start_offset" : 418,
      "end_offset" : 420,
      "type" : "ENGLISH",
      "position" : 208
      "token" : "2",
      "start_offset" : 421,
      "end_offset" : 422,
      "type" : "ARABIC",
      "position" : 209
      "token" : "配合",
      "start_offset" : 423,
      "end_offset" : 425,
      "type" : "CN_WORD",
      "position" : 210
      "token" : "g3",
      "start_offset" : 426,
      "end_offset" : 428,
      "type" : "LETTER",
      "position" : 211
      "token" : "g",
      "start_offset" : 426,
      "end_offset" : 427,
      "type" : "ENGLISH",
      "position" : 212
      "token" : "3",
      "start_offset" : 427,
      "end_offset" : 428,
      "type" : "ARABIC",
      "position" : 213
      "token" : "发布会",
      "start_offset" : 429,
      "end_offset" : 432,
      "type" : "CN_WORD",
      "position" : 214
      "token" : "发布",
      "start_offset" : 429,
      "end_offset" : 431,
      "type" : "CN_WORD",
      "position" : 215
      "token" : "会",
      "start_offset" : 431,
      "end_offset" : 432,
      "type" : "CN_CHAR",
      "position" : 216
      "token" : "节奏",
      "start_offset" : 432,
      "end_offset" : 434,
      "type" : "CN_WORD",
      "position" : 217
      "token" : "联合",
      "start_offset" : 435,
      "end_offset" : 437,
      "type" : "CN_WORD",
      "position" : 218
      "token" : "汽车",
      "start_offset" : 437,
      "end_offset" : 439,
      "type" : "CN_WORD",
      "position" : 219
      "token" : "之家",
      "start_offset" : 439,
      "end_offset" : 441,
      "type" : "CN_WORD",
      "position" : 220
      "token" : "之",
      "start_offset" : 439,
      "end_offset" : 440,
      "type" : "CN_CHAR",
      "position" : 221
      "token" : "家资",
      "start_offset" : 440,
      "end_offset" : 442,
      "type" : "CN_WORD",
      "position" : 222
      "token" : "资源",
      "start_offset" : 441,
      "end_offset" : 443,
      "type" : "CN_WORD",
      "position" : 223
      "token" : "组织",
      "start_offset" : 443,
      "end_offset" : 445,
      "type" : "CN_WORD",
      "position" : 224
      "token" : "策划",
      "start_offset" : 445,
      "end_offset" : 447,
      "type" : "CN_WORD",
      "position" : 225
      "token" : "g3",
      "start_offset" : 448,
      "end_offset" : 450,
      "type" : "LETTER",
      "position" : 226
      "token" : "g",
      "start_offset" : 448,
      "end_offset" : 449,
      "type" : "ENGLISH",
      "position" : 227
      "token" : "3",
      "start_offset" : 449,
      "end_offset" : 450,
      "type" : "ARABIC",
      "position" : 228
      "token" : "价格",
      "start_offset" : 451,
      "end_offset" : 453,
      "type" : "CN_WORD",
      "position" : 229
      "token" : "竞猜",
      "start_offset" : 453,
      "end_offset" : 455,
      "type" : "CN_WORD",
      "position" : 230
      "token" : "项目",
      "start_offset" : 456,
      "end_offset" : 458,
      "type" : "CN_WORD",
      "position" : 231
      "token" : "该项",
      "start_offset" : 459,
      "end_offset" : 461,
      "type" : "CN_WORD",
      "position" : 232
      "token" : "该",
      "start_offset" : 459,
      "end_offset" : 460,
      "type" : "CN_CHAR",
      "position" : 233
      "token" : "项目",
      "start_offset" : 460,
      "end_offset" : 462,
      "type" : "CN_WORD",
      "position" : 234
      "token" : "以",
      "start_offset" : 462,
      "end_offset" : 463,
      "type" : "CN_CHAR",
      "position" : 235
      "token" : "极",
      "start_offset" : 463,
      "end_offset" : 464,
      "type" : "CN_CHAR",
      "position" : 236
      "token" : "底",
      "start_offset" : 464,
      "end_offset" : 465,
      "type" : "CN_CHAR",
      "position" : 237
      "token" : "成本",
      "start_offset" : 465,
      "end_offset" : 467,
      "type" : "CN_WORD",
      "position" : 238
      "token" : "获得",
      "start_offset" : 468,
      "end_offset" : 470,
      "type" : "CN_WORD",
      "position" : 239
      "token" : "较好",
      "start_offset" : 470,
      "end_offset" : 472,
      "type" : "CN_WORD",
      "position" : 240
      "token" : "的",
      "start_offset" : 472,
      "end_offset" : 473,
      "type" : "CN_CHAR",
      "position" : 241
      "token" : "曝光",
      "start_offset" : 473,
      "end_offset" : 475,
      "type" : "CN_WORD",
      "position" : 242
      "token" : "声",
      "start_offset" : 475,
      "end_offset" : 476,
      "type" : "CN_CHAR",
      "position" : 243
      "token" : "量",
      "start_offset" : 476,
      "end_offset" : 477,
      "type" : "CN_CHAR",
      "position" : 244
      "token" : "及",
      "start_offset" : 477,
      "end_offset" : 478,
      "type" : "CN_CHAR",
      "position" : 245
      "token" : "活动",
      "start_offset" : 478,
      "end_offset" : 480,
      "type" : "CN_WORD",
      "position" : 246
      "token" : "参与",
      "start_offset" : 480,
      "end_offset" : 482,
      "type" : "CN_WORD",
      "position" : 247
      "token" : "效果",
      "start_offset" : 482,
      "end_offset" : 484,
      "type" : "CN_WORD",
      "position" : 248
      "token" : "项目",
      "start_offset" : 485,
      "end_offset" : 487,
      "type" : "CN_WORD",
      "position" : 249
      "token" : "外部",
      "start_offset" : 487,
      "end_offset" : 489,
      "type" : "CN_WORD",
      "position" : 250
      "token" : "论坛",
      "start_offset" : 489,
      "end_offset" : 491,
      "type" : "CN_WORD",
      "position" : 251
      "token" : "总",
      "start_offset" : 491,
      "end_offset" : 492,
      "type" : "CN_CHAR",
      "position" : 252
      "token" : "点击",
      "start_offset" : 492,
      "end_offset" : 494,
      "type" : "CN_WORD",
      "position" : 253
      "token" : "量",
      "start_offset" : 494,
      "end_offset" : 495,
      "type" : "CN_CHAR",
      "position" : 254
      "token" : "环",
      "start_offset" : 495,
      "end_offset" : 496,
      "type" : "CN_CHAR",
      "position" : 255
      "token" : "比",
      "start_offset" : 496,
      "end_offset" : 497,
      "type" : "CN_CHAR",
      "position" : 256
      "token" : "增长",
      "start_offset" : 498,
      "end_offset" : 500,
      "type" : "CN_WORD",
      "position" : 257
      "token" : "108.9",
      "start_offset" : 501,
      "end_offset" : 506,
      "type" : "ARABIC",
      "position" : 258
      "token" : "总",
      "start_offset" : 508,
      "end_offset" : 509,
      "type" : "CN_CHAR",
      "position" : 259
      "token" : "回复数",
      "start_offset" : 509,
      "end_offset" : 512,
      "type" : "CN_WORD",
      "position" : 260
      "token" : "回复",
      "start_offset" : 509,
      "end_offset" : 511,
      "type" : "CN_WORD",
      "position" : 261
      "token" : "复数",
      "start_offset" : 510,
      "end_offset" : 512,
      "type" : "CN_WORD",
      "position" : 262
      "token" : "环",
      "start_offset" : 512,
      "end_offset" : 513,
      "type" : "CN_CHAR",
      "position" : 263
      "token" : "比增",
      "start_offset" : 513,
      "end_offset" : 515,
      "type" : "CN_WORD",
      "position" : 264
      "token" : "比",
      "start_offset" : 513,
      "end_offset" : 514,
      "type" : "CN_CHAR",
      "position" : 265
      "token" : "增长",
      "start_offset" : 514,
      "end_offset" : 516,
      "type" : "CN_WORD",
      "position" : 266
      "token" : "97.3",
      "start_offset" : 517,
      "end_offset" : 521,
      "type" : "ARABIC",
      "position" : 267
      "token" : "成果",
      "start_offset" : 524,
      "end_offset" : 526,
      "type" : "CN_WORD",
      "position" : 268
      "token" : "成",
      "start_offset" : 524,
      "end_offset" : 525,
      "type" : "CN_CHAR",
      "position" : 269
      "token" : "果如",
      "start_offset" : 525,
      "end_offset" : 527,
      "type" : "CN_WORD",
      "position" : 270
      "token" : "如下",
      "start_offset" : 526,
      "end_offset" : 528,
      "type" : "CN_WORD",
      "position" : 271
      "token" : "br",
      "start_offset" : 530,
      "end_offset" : 532,
      "type" : "ENGLISH",
      "position" : 272
      "token" : "新闻",
      "start_offset" : 535,
      "end_offset" : 537,
      "type" : "CN_WORD",
      "position" : 273
      "token" : "曝光",
      "start_offset" : 537,
      "end_offset" : 539,
      "type" : "CN_WORD",
      "position" : 274
      "token" : "举例",
      "start_offset" : 539,
      "end_offset" : 541,
      "type" : "CN_WORD",
      "position" : 275
      "token" : "https",
      "start_offset" : 542,
      "end_offset" : 547,
      "type" : "ENGLISH",
      "position" : 276
      "token" : "www.autohome.com.cn",
      "start_offset" : 550,
      "end_offset" : 569,
      "type" : "LETTER",
      "position" : 277
      "token" : "www",
      "start_offset" : 550,
      "end_offset" : 553,
      "type" : "ENGLISH",
      "position" : 278
      "token" : "autohome",
      "start_offset" : 554,
      "end_offset" : 562,
      "type" : "ENGLISH",
      "position" : 279
      "token" : "com",
      "start_offset" : 563,
      "end_offset" : 566,
      "type" : "ENGLISH",
      "position" : 280
      "token" : "cn",
      "start_offset" : 567,
      "end_offset" : 569,
      "type" : "ENGLISH",
      "position" : 281
      "token" : "news",
      "start_offset" : 570,
      "end_offset" : 574,
      "type" : "ENGLISH",
      "position" : 282
      "token" : "201804",
      "start_offset" : 575,
      "end_offset" : 581,
      "type" : "ARABIC",
      "position" : 283
      "token" : "915688.html",
      "start_offset" : 582,
      "end_offset" : 593,
      "type" : "LETTER",
      "position" : 284
      "token" : "915688",
      "start_offset" : 582,
      "end_offset" : 588,
      "type" : "ARABIC",
      "position" : 285
      "token" : "html",
      "start_offset" : 589,
      "end_offset" : 593,
      "type" : "ENGLISH",
      "position" : 286
      "token" : "br",
      "start_offset" : 595,
      "end_offset" : 597,
      "type" : "ENGLISH",
      "position" : 287
      "token" : "b",
      "start_offset" : 598,
      "end_offset" : 599,
      "type" : "ENGLISH",
      "position" : 288
      "token" : "论坛",
      "start_offset" : 600,
      "end_offset" : 602,
      "type" : "CN_WORD",
      "position" : 289
      "token" : "活动",
      "start_offset" : 602,
      "end_offset" : 604,
      "type" : "CN_WORD",
      "position" : 290
      "token" : "活",
      "start_offset" : 602,
      "end_offset" : 603,
      "type" : "CN_CHAR",
      "position" : 291
      "token" : "动地",
      "start_offset" : 603,
      "end_offset" : 605,
      "type" : "CN_WORD",
      "position" : 292
      "token" : "地址",
      "start_offset" : 604,
      "end_offset" : 606,
      "type" : "CN_WORD",
      "position" : 293
      "token" : "https",
      "start_offset" : 608,
      "end_offset" : 613,
      "type" : "ENGLISH",
      "position" : 294
      "token" : "club.autohome.com.cn",
      "start_offset" : 616,
      "end_offset" : 636,
      "type" : "LETTER",
      "position" : 295
      "token" : "club",
      "start_offset" : 616,
      "end_offset" : 620,
      "type" : "ENGLISH",
      "position" : 296
      "token" : "autohome",
      "start_offset" : 621,
      "end_offset" : 629,
      "type" : "ENGLISH",
      "position" : 297
      "token" : "com",
      "start_offset" : 630,
      "end_offset" : 633,
      "type" : "ENGLISH",
      "position" : 298
      "token" : "cn",
      "start_offset" : 634,
      "end_offset" : 636,
      "type" : "ENGLISH",
      "position" : 299
      "token" : "bbs",
      "start_offset" : 637,
      "end_offset" : 640,
      "type" : "ENGLISH",
      "position" : 300
      "token" : "thread",
      "start_offset" : 641,
      "end_offset" : 647,
      "type" : "ENGLISH",
      "position" : 301
      "token" : "7fc721404489d922",
      "start_offset" : 648,
      "end_offset" : 664,
      "type" : "LETTER",
      "position" : 302
      "token" : "7",
      "start_offset" : 648,
      "end_offset" : 649,
      "type" : "ARABIC",
      "position" : 303
      "token" : "fc",
      "start_offset" : 649,
      "end_offset" : 651,
      "type" : "ENGLISH",
      "position" : 304
      "token" : "721404489",
      "start_offset" : 651,
      "end_offset" : 660,
      "type" : "ARABIC",
      "position" : 305
      "token" : "d",
      "start_offset" : 660,
      "end_offset" : 661,
      "type" : "ENGLISH",
      "position" : 306
      "token" : "922",
      "start_offset" : 661,
      "end_offset" : 664,
      "type" : "ARABIC",
      "position" : 307
      "token" : "72434411-1.html",
      "start_offset" : 665,
      "end_offset" : 680,
      "type" : "LETTER",
      "position" : 308
      "token" : "72434411",
      "start_offset" : 665,
      "end_offset" : 673,
      "type" : "ARABIC",
      "position" : 309
      "token" : "1",
      "start_offset" : 674,
      "end_offset" : 675,
      "type" : "ARABIC",
      "position" : 310
      "token" : "html",
      "start_offset" : 676,
      "end_offset" : 680,
      "type" : "ENGLISH",
      "position" : 311
      "token" : "br",
      "start_offset" : 682,
      "end_offset" : 684,
      "type" : "ENGLISH",
      "position" : 312
      "token" : "br",
      "start_offset" : 687,
      "end_offset" : 689,
      "type" : "ENGLISH",
      "position" : 313
      "token" : "3",
      "start_offset" : 690,
      "end_offset" : 691,
      "type" : "ARABIC",
      "position" : 314
      "token" : "撰写",
      "start_offset" : 692,
      "end_offset" : 694,
      "type" : "CN_WORD",
      "position" : 315
      "token" : "原创",
      "start_offset" : 694,
      "end_offset" : 696,
      "type" : "CN_WORD",
      "position" : 316
      "token" : "内容",
      "start_offset" : 696,
      "end_offset" : 698,
      "type" : "CN_WORD",
      "position" : 317
      "token" : "作品",
      "start_offset" : 698,
      "end_offset" : 700,
      "type" : "CN_WORD",
      "position" : 318
      "token" : "抢占",
      "start_offset" : 701,
      "end_offset" : 703,
      "type" : "CN_WORD",
      "position" : 319
      "token" : "汽车",
      "start_offset" : 703,
      "end_offset" : 705,
      "type" : "CN_WORD",
      "position" : 320
      "token" : "之家",
      "start_offset" : 705,
      "end_offset" : 707,
      "type" : "CN_WORD",
      "position" : 321
      "token" : "首页",
      "start_offset" : 707,
      "end_offset" : 709,
      "type" : "CN_WORD",
      "position" : 322
      "token" : "首",
      "start_offset" : 709,
      "end_offset" : 710,
      "type" : "CN_CHAR",
      "position" : 323
      "token" : "屏",
      "start_offset" : 710,
      "end_offset" : 711,
      "type" : "CN_CHAR",
      "position" : 324
      "token" : "文字",
      "start_offset" : 711,
      "end_offset" : 713,
      "type" : "CN_WORD",
      "position" : 325
      "token" : "链",
      "start_offset" : 713,
      "end_offset" : 714,
      "type" : "CN_CHAR",
      "position" : 326
      "token" : "和",
      "start_offset" : 714,
      "end_offset" : 715,
      "type" : "CN_CHAR",
      "position" : 327
      "token" : "论坛",
      "start_offset" : 715,
      "end_offset" : 717,
      "type" : "CN_WORD",
      "position" : 328
      "token" : "相关",
      "start_offset" : 717,
      "end_offset" : 719,
      "type" : "CN_WORD",
      "position" : 329
      "token" : "位置",
      "start_offset" : 719,
      "end_offset" : 721,
      "type" : "CN_WORD",
      "position" : 330
      "token" : "争取",
      "start_offset" : 722,
      "end_offset" : 724,
      "type" : "CN_WORD",
      "position" : 331
      "token" : "免费",
      "start_offset" : 724,
      "end_offset" : 726,
      "type" : "CN_WORD",
      "position" : 332
      "token" : "露出",
      "start_offset" : 726,
      "end_offset" : 728,
      "type" : "CN_WORD",
      "position" : 333
      "token" : "露",
      "start_offset" : 726,
      "end_offset" : 727,
      "type" : "CN_CHAR",
      "position" : 334
      "token" : "出位",
      "start_offset" : 727,
      "end_offset" : 729,
      "type" : "CN_WORD",
      "position" : 335
      "token" : "br",
      "start_offset" : 731,
      "end_offset" : 733,
      "type" : "ENGLISH",
      "position" : 336
      "token" : "小",
      "start_offset" : 736,
      "end_offset" : 737,
      "type" : "CN_CHAR",
      "position" : 337
      "token" : "鹏",
      "start_offset" : 737,
      "end_offset" : 738,
      "type" : "CN_CHAR",
      "position" : 338
      "token" : "汽车",
      "start_offset" : 738,
      "end_offset" : 740,
      "type" : "CN_WORD",
      "position" : 339
      "token" : "1.0",
      "start_offset" : 741,
      "end_offset" : 744,
      "type" : "ARABIC",
      "position" : 340
      "token" : "评测",
      "start_offset" : 745,
      "end_offset" : 747,
      "type" : "CN_WORD",
      "position" : 341
      "token" : "宝剑锋从磨砺出",
      "start_offset" : 748,
      "end_offset" : 755,
      "type" : "CN_WORD",
      "position" : 342
      "token" : "宝剑锋",
      "start_offset" : 748,
      "end_offset" : 751,
      "type" : "CN_WORD",
      "position" : 343
      "token" : "宝剑",
      "start_offset" : 748,
      "end_offset" : 750,
      "type" : "CN_WORD",
      "position" : 344
      "token" : "从",
      "start_offset" : 751,
      "end_offset" : 752,
      "type" : "CN_CHAR",
      "position" : 345
      "token" : "锋",
      "start_offset" : 750,
      "end_offset" : 751,
      "type" : "CN_CHAR",
      "position" : 346
      "token" : "从",
      "start_offset" : 751,
      "end_offset" : 752,
      "type" : "CN_CHAR",
      "position" : 347
      "token" : "磨砺",
      "start_offset" : 752,
      "end_offset" : 754,
      "type" : "CN_WORD",
      "position" : 348
      "token" : "出",
      "start_offset" : 754,
      "end_offset" : 755,
      "type" : "CN_CHAR",
      "position" : 349
      "token" : "https",
      "start_offset" : 756,
      "end_offset" : 761,
      "type" : "ENGLISH",
      "position" : 350
      "token" : "chejiahao.autohome.com.cn",
      "start_offset" : 764,
      "end_offset" : 789,
      "type" : "LETTER",
      "position" : 351
      "token" : "chejiahao",
      "start_offset" : 764,
      "end_offset" : 773,
      "type" : "ENGLISH",
      "position" : 352
      "token" : "autohome",
      "start_offset" : 774,
      "end_offset" : 782,
      "type" : "ENGLISH",
      "position" : 353
      "token" : "com",
      "start_offset" : 783,
      "end_offset" : 786,
      "type" : "ENGLISH",
      "position" : 354
      "token" : "cn",
      "start_offset" : 787,
      "end_offset" : 789,
      "type" : "ENGLISH",
      "position" : 355
      "token" : "info",
      "start_offset" : 790,
      "end_offset" : 794,
      "type" : "ENGLISH",
      "position" : 356
      "token" : "2239269",
      "start_offset" : 795,
      "end_offset" : 802,
      "type" : "ARABIC",
      "position" : 357
      "token" : "br",
      "start_offset" : 804,
      "end_offset" : 806,
      "type" : "ENGLISH",
      "position" : 358
      "token" : "b",
      "start_offset" : 807,
      "end_offset" : 808,
      "type" : "ENGLISH",
      "position" : 359
      "token" : "一路",
      "start_offset" : 809,
      "end_offset" : 811,
      "type" : "CN_WORD",
      "position" : 360
      "token" : "一",
      "start_offset" : 809,
      "end_offset" : 810,
      "type" : "TYPE_CNUM",
      "position" : 361
      "token" : "路向",
      "start_offset" : 810,
      "end_offset" : 812,
      "type" : "CN_WORD",
      "position" : 362
      "token" : "路",
      "start_offset" : 810,
      "end_offset" : 811,
      "type" : "COUNT",
      "position" : 363
      "token" : "向北",
      "start_offset" : 811,
      "end_offset" : 813,
      "type" : "CN_WORD",
      "position" : 364
      "token" : "小",
      "start_offset" : 814,
      "end_offset" : 815,
      "type" : "CN_CHAR",
      "position" : 365
      "token" : "鹏",
      "start_offset" : 815,
      "end_offset" : 816,
      "type" : "CN_CHAR",
      "position" : 366
      "token" : "汽车",
      "start_offset" : 816,
      "end_offset" : 818,
      "type" : "CN_WORD",
      "position" : 367
      "token" : "广州",
      "start_offset" : 818,
      "end_offset" : 820,
      "type" : "CN_WORD",
      "position" : 368
      "token" : "北京",
      "start_offset" : 821,
      "end_offset" : 823,
      "type" : "CN_WORD",
      "position" : 369
      "token" : "自驾游",
      "start_offset" : 823,
      "end_offset" : 826,
      "type" : "CN_WORD",
      "position" : 370
      "token" : "自驾",
      "start_offset" : 823,
      "end_offset" : 825,
      "type" : "CN_WORD",
      "position" : 371
      "token" : "游记",
      "start_offset" : 825,
      "end_offset" : 827,
      "type" : "CN_WORD",
      "position" : 372
      "token" : "https",
      "start_offset" : 828,
      "end_offset" : 833,
      "type" : "ENGLISH",
      "position" : 373
      "token" : "chejiahao.autohome.com.cn",
      "start_offset" : 836,
      "end_offset" : 861,
      "type" : "LETTER",
      "position" : 374
      "token" : "chejiahao",
      "start_offset" : 836,
      "end_offset" : 845,
      "type" : "ENGLISH",
      "position" : 375
      "token" : "autohome",
      "start_offset" : 846,
      "end_offset" : 854,
      "type" : "ENGLISH",
      "position" : 376
      "token" : "com",
      "start_offset" : 855,
      "end_offset" : 858,
      "type" : "ENGLISH",
      "position" : 377
      "token" : "cn",
      "start_offset" : 859,
      "end_offset" : 861,
      "type" : "ENGLISH",
      "position" : 378
      "token" : "info",
      "start_offset" : 862,
      "end_offset" : 866,
      "type" : "ENGLISH",
      "position" : 379
      "token" : "2290598",
      "start_offset" : 867,
      "end_offset" : 874,
      "type" : "ARABIC",
      "position" : 380
      "token" : "span",
      "start_offset" : 877,
      "end_offset" : 881,
      "type" : "ENGLISH",
      "position" : 381






POST test_2022/_analyze
  "field": "workContent.chinese",
  "text": "宝剑锋从磨砺出"


  "tokens" : [
      "token" : "宝剑锋从磨砺出",
      "start_offset" : 0,
      "end_offset" : 7,
      "type" : "CN_WORD",
      "position" : 0
      "token" : "宝剑锋",
      "start_offset" : 0,
      "end_offset" : 3,
      "type" : "CN_WORD",
      "position" : 1
      "token" : "宝剑",
      "start_offset" : 0,
      "end_offset" : 2,
      "type" : "CN_WORD",
      "position" : 2
      "token" : "从",
      "start_offset" : 3,
      "end_offset" : 4,
      "type" : "CN_CHAR",
      "position" : 3
      "token" : "锋",
      "start_offset" : 2,
      "end_offset" : 3,
      "type" : "CN_CHAR",
      "position" : 4
      "token" : "从",
      "start_offset" : 3,
      "end_offset" : 4,
      "type" : "CN_CHAR",
      "position" : 5
      "token" : "磨砺",
      "start_offset" : 4,
      "end_offset" : 6,
      "type" : "CN_WORD",
      "position" : 6
      "token" : "出",
      "start_offset" : 6,
      "end_offset" : 7,
      "type" : "CN_CHAR",
      "position" : 7



测试发现宝剑锋从磨砺出 已经是最短的能触发异常的文本了。



方案1 上传自定义的分词库

宝剑锋从磨砺出 设置为独立的分词,不再智能拆分。

新建一个extend.dic 的文本文件,里面添加一行内容宝剑锋从磨砺出
将文件放入Es安装目录,并修改IKAnalyzer.cfg.xml 文件,增加扩展字典配置。重启ES实例就可以。

如果是阿里云的ES服务器,则直接使用阿里云的热更新 即可(IK热更新首次触发重启,之后更新同名词典底层不会触发重启。)。


POST test_2022/_analyze
  "field": "workContent.chinese",
  "text": "宝剑锋从磨砺出"


  "tokens" : [
      "token" : "宝剑锋从磨砺出",
      "start_offset" : 0,
      "end_offset" : 7,
      "type" : "CN_WORD",
      "position" : 0
      "token" : "宝剑锋",
      "start_offset" : 0,
      "end_offset" : 3,
      "type" : "CN_WORD",
      "position" : 1
      "token" : "宝剑",
      "start_offset" : 0,
      "end_offset" : 2,
      "type" : "CN_WORD",
      "position" : 2
      "token" : "锋",
      "start_offset" : 2,
      "end_offset" : 3,
      "type" : "CN_CHAR",
      "position" : 3
      "token" : "从",
      "start_offset" : 3,
      "end_offset" : 4,
      "type" : "CN_CHAR",
      "position" : 4
      "token" : "磨砺",
      "start_offset" : 4,
      "end_offset" : 6,
      "type" : "CN_WORD",
      "position" : 5
      "token" : "出",
      "start_offset" : 6,
      "end_offset" : 7,
      "type" : "CN_CHAR",
      "position" : 6





今天我想和大家分享的是一套商业开源的 MES制造执行管理系统。对于制造业而言&#xff0c;MES 是一个至关重要的系统&#xff0c;它可以帮助企业提高生产效率、优化资源利用、提高产品质量&#xff0c;从而增强市场竞争力。什么是 MES&#xff1f; MES 是指通过计算机技术、自动…

uniapp开发 如何获取IP地址?

一、需求 使用uniapp开发小程序时&#xff0c;需要调取【记录日活动统计】的接口&#xff0c;而这个接口需要传递一个ip给后台&#xff0c; 那么前端如何获取ip呢&#xff1f;下面代码里可以实现 二、代码实现 1.在项目的manifest.json中配置一下网络权限&#xff1a; &quo…




简介&#xff1a; CSDN博客专家&#xff0c;专注Android/Linux系统&#xff0c;分享多mic语音方案、音视频、编解码等技术&#xff0c;与大家一起成长&#xff01; 优质专栏&#xff1a;Audio工程师进阶系列【原创干货持续更新中……】&#x1f680; 优质专栏&#xff1a;多媒…


HTML&#xff08;HyperText Markup Language&#xff09;是一种用于创建网页的标准标记语言。它是构建和设计网页及应用的基础&#xff0c;通过定义各种元素和属性&#xff0c;HTML使得开发者能够组织和格式化文本、图像、链接等内容。 HTML的基本结构 文档类型声明&#xff0…

Adobe Premiere 2020 下载地址及安装教程

Premiere是一款专业的视频编辑软件&#xff0c;由Adobe Systems开发。它为用户提供了丰富的视频编辑工具和创意效果&#xff0c;可用于电影、电视节目、广告和其他多媒体项目的制作。 Premiere具有直观的用户界面和强大的功能&#xff0c;使得编辑和处理视频变得简单而高效。它…


<iframe style"width: 100%; height: 100%;" src"{{vm.previewUrl}}"></iframe> 出现报错信息&#xff1a;Cant interpolate: {{vm.previewUrl}} 在ctrl文件中信任该文件就可以了 vm.trustUrl $sce.trustAsResourceUrl(vm.previewUrl);//信任…


博主介绍&#xff1a;✌程序员徐师兄、7年大厂程序员经历。全网粉丝12w、csdn博客专家、掘金/华为云/阿里云/InfoQ等平台优质作者、专注于Java技术领域和毕业项目实战✌ &#x1f345;文末获取源码联系&#x1f345; &#x1f447;&#x1f3fb; 精彩专栏推荐订阅&#x1f447;…


参考&#xff1a; 对XML文件读取和编辑2-QXmlStreamReader读取 - 知乎 https://zhuanlan.zhihu.com/p/358862429 本地环境&#xff1a; win10专业版&#xff0c;64位&#xff0c;Qt 5.12 代码已测试通过。 问题描述 需要按字节读取一个文档&#xff0c;解析其中具有xml格式的…


前言1. 数组2. 一维数组2.1 一维数组的创建2.2 一维数组的初始化2.3 一维数组的使用2.3.1 一维数组的下标2.3.2 一维数组的输入和输出 2.4 一维数组在内存中的存储 3. 二维数组3.1 二维数组的创建3.2 二维数组的初始化3.3 二维数组的使用3.3.1 二维数组的下标3.3.2 二维数组的输…

ELK日志收集和备份填坑实战 (滞后8个小时等时区问题)

ES的备份&#xff1a;ES快照备份 根据时间&#xff0c;每天零点在Linux机器crontab来调用api接口实现快照备份&#xff0c;通过快照备份&#xff0c;可以定准恢复到某一天的日志。 现象&#xff1a;&#xff08;坑&#xff1a;但是恢复某一天日志&#xff0c;发现会少8小时的日…


1 re模块 re 模块是 Python 中用于正则表达式操作的模块。正则表达式&#xff08;Regular Expression&#xff09;是一种强大的文本处理工具&#xff0c;它使用一种特殊的字符序列来表示字符串中的模式&#xff0c;并可以通过模式匹配、查找、替换等操作对文本进行高效处理。 …


对话框 一.消息对话框&#xff08;QMessageBox&#xff09;1.自己构建2.使用静态函数构建 二.颜色对话框&#xff08;QDialog&#xff09;三.文件对话框&#xff08;QFileDialog&#xff09;四.字体对话框&#xff08;QFontDialog&#xff09;五.输入对话框&#xff08;QInputD…



PgSQL之WITH Queries/Statement

PostgreSQL WITH 子句 在 PostgreSQL 中&#xff0c;WITH 子句提供了一种编写辅助语句的方法&#xff0c;以便在更大的查询中使用。 WITH 子句有助于将复杂的大型查询分解为更简单的表单&#xff0c;便于阅读。这些语句通常称为通用表表达式&#xff08;Common Table Express…

《Kubernets证书篇:基于Kylin V10+ARM架构CPU修改K8S 1.26.15版本证书时间限制》

一、背景 Kubernetes 默认的证书有效期只有1年&#xff0c;因此需要每年手动更新一次节点上面的证书&#xff0c;特别麻烦而且更新过程中可能会出现问题&#xff0c;因此我们要对 Kubernetes 的 SSL 证书有效期进行修改&#xff0c;这里将证书的时间限制修改为100年。 环境信息…


总体介绍 GB/T28181协议&#xff0c;全名叫《安全防范视频监控联网系统信息传输、交换、控制技术要求》&#xff0c;是由中国国家标准委员会发布的一种国家级的标准。它主要对视频监控系统的各个方面做了明确的规定&#xff0c;使得不同厂商生产的视频监控设备能够相互连通&am…


文章的更新路线&#xff1a;JavaScript基础知识-Vue2基础知识-Vue3基础知识-TypeScript基础知识-网络基础知识-浏览器基础知识-项目优化知识-项目实战经验-前端温习题&#xff08;HTML基础知识和CSS基础知识已经更新完毕&#xff09; 正文 Proxy是JavaScript中的一个强大而灵活…

linux 自定义快捷指令(docker

vi /root/.bashrc alias disdocker images alias dpsdocker ps --format "table {{.ID}}\t{{.Image}}\t{{.Ports}}\t{{.Status}}\t{{.Names}}" 保存退出后使用sourece /root/.bashrc 让其立即生效 sourece /root/.bashrc

【C 数据结构】栈

文章目录 【 1. 基本原理 】栈的分类 【 2. 动态链表栈 】2.1 双结构体实现2.1.0 栈的节点设计2.1.1 入栈2.1.2 出栈2.1.3 遍历2.1.4 实例 2.2 单结构体实现2.2.0 栈的节点设计2.2.1 入栈2.2.2 出栈2.2.3 实例 【 3. 顺序栈 】3.1 入栈3.2 出栈3.3 实例 【 1. 基本原理 】 栈&…