Leon's Blogging

Coding blogging for hackers.

Getting Started With the ElasticSearch

| Comments

Elasticsearch is a distributed, RESTful search and analytics engine capable of solving a growing number of use cases. As the heart of the Elastic Stack, it centrally stores your data so you can discover the expected and uncover the unexpected.

Elasticsearch vs RDBMS

  • Node - Server
  • Indices - Databases
  • Types - Tables
  • Documents - Rows
  • Fields - Columns

After 6.0 version need add -H 'Content-Type:application/json'

Install

java

1
2
brew cask install java
brew cask install homebrew/cask-versions/java8

elasticsearch

1
brew install elasticsearch24

Check

1
curl localhost:9200
1
2
3
4
5
6
7
8
9
10
11
12
13
{
  "name" : "Crimson Daffodil",
  "cluster_name" : "elasticsearch",
  "cluster_uuid" : "_1W7Qb8WSByw_AoBtx3V9g",
  "version" : {
    "number" : "2.4.6",
    "build_hash" : "5376dca9f70f3abef96a77f4bb22720ace8240fd",
    "build_timestamp" : "2017-07-18T12:17:44Z",
    "build_snapshot" : false,
    "lucene_version" : "5.5.4"
  },
  "tagline" : "You Know, for Search"
}

health

1
curl localhost:9200/_cluster/health?pretty=true
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
{
  "cluster_name" : "elasticsearch_leon",
  "status" : "yellow",
  "timed_out" : false,
  "number_of_nodes" : 1,
  "number_of_data_nodes" : 1,
  "active_primary_shards" : 10,
  "active_shards" : 10,
  "relocating_shards" : 0,
  "initializing_shards" : 0,
  "unassigned_shards" : 10,
  "delayed_unassigned_shards" : 0,
  "number_of_pending_tasks" : 0,
  "number_of_in_flight_fetch" : 0,
  "task_max_waiting_in_queue_millis" : 0,
  "active_shards_percent_as_number" : 50.0
}

explain

know how to get this result

1
localhost:9200/_search?explain

Count

1
2
3
4
5
6
7
curl -X GET 'http://localhost:9200/_count?pretty' -d '
{
    "query": {
        "match_all": {}
    }
}
'
1
2
3
4
5
6
7
8
{
  "count" : 0,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  }
}

Show all Index

1
curl -X GET 'http://localhost:9200/_cat/indices?v'
1
2
health status index    pri rep docs.count docs.deleted store.size pri.store.size
yellow open   accounts   5   1          1            0      4.1kb          4.1kb

Show all Index mapping type

1
curl 'localhost:9200/_mapping?pretty=true'
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
{
  "accounts" : {
    "mappings" : {
      "person" : {
        "properties" : {
          "desc" : {
            "type" : "string"
          },
          "title" : {
            "type" : "string"
          },
          "user" : {
            "type" : "string"
          }
        }
      }
    }
  }
}

Create Index

1
curl -X PUT 'localhost:9200/weather'
1
2
3
{
  "acknowledged":true
}

Delete Index

1
2
3
4
5
curl -X DELETE 'localhost:9200/weather'
curl -X DELETE 'localhost:9200/_all'
curl -X DELETE 'localhost:9200/*'
curl -X DELETE 'localhost:9200/index_*'
curl -X DELETE 'localhost:9200/A,B'
1
2
3
{
  "acknowledged":true
}

Setting Analyzer

elasticsearch-analysis-ik

index type

  • analyzed - analyzed string and index it
  • not_analyzed - index it but not not analyzed
  • no - not index and analyzed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
$ curl -X PUT 'localhost:9200/accounts' -d '
{
  "mappings": {
    "person": {
      "properties": {
        "user": {
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_max_word"
        },
        "title": {
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_max_word"
        },
        "desc": {
          "type": "text",
          "analyzer": "ik_max_word",
          "search_analyzer": "ik_max_word"
        },
        "categories": {
           "type": "nested",
           "properties": {
              "name": {
                "type": "string"
              }
           }
        }
      }
    }
  }
}'

update analyzer

1
2
3
4
5
6
7
8
9
$ curl -X PUT 'localhost:9200/accounts/_mapping/person' -d '
{
  "properties" : {
    "tag" : {
      "type" :    "string",
      "index":    "not_analyzed"
    }
  }
}'

show mapping

1
$ curl 'localhost:9200/accounts/_mapping/person'

understand elastic how to analyze text

1
2
3
4
5
curl /_analyze 'localhost:9200' -d '
{
  "analyzer": "standard",
  "text": "Text to analyze"
}'

Create Document

with id, also can string

1
2
3
4
5
6
7
8
// can replace 1 to first
curl -X PUT 'localhost:9200/accounts/person/1' -d '
{
  "user": "leon",
  "title": "工程師",
  "desc": "數據庫管理",
  "age": 18
}'
1
2
3
4
5
6
7
8
9
10
11
12
13
{
  "_index": "accounts",
  "_type": "person",
  "_id": "1",
  "_version": 1,
  "_score": 1,
  "_source": {
    "user": "leon",
    "title": "工程師",
    "desc": "數據庫管理",
    "age": 18
  }
}

without id will auto create random uuid

1
2
3
4
5
6
7
curl -X POST 'localhost:9200/accounts/person' -d '
{
  "user": "mark",
  "title": "工程師",
  "desc": "系統管理",
  "age": 28
}'

Show

1
2
3
4
curl 'localhost:9200/accounts/person/1?pretty=true'

// _source without other data
curl 'localhost:9200/accounts/person/1/_source?pretty=true'
1
2
3
4
5
6
7
8
9
10
11
12
13
{
  "_index": "accounts",
  "_type": "person",
  "_id": "AWpP4wxUKXQ2yoYJjuiA",
  "_version": 1,
  "found": true,
    "_source": {
      "user": "leon",
      "title": "工程師",
      "desc": "數據庫管理",
      "age": 20
    }
}

not found

1
curl 'localhost:9200/weather/beijing/abc?pretty=true'
1
2
3
4
5
6
{
  "_index" : "weather",
  "_type" : "beijing",
  "_id" : "abc",
  "found" : false
}

Delete

1
curl -X DELETE 'localhost:9200/accounts/person/1'

Reindex (not Update)

elastic can’t update, only create new, and increase the version number

1
2
3
4
5
6
7
curl -X PUT 'localhost:9200/accounts/person/1' -d '
{
    "user" : "leon",
    "title" : "工程師",
    "desc" : "數據庫管理,軟件開發",
    "age": 20
}'

version was change to 2

1
2
3
4
5
6
7
8
9
10
11
12
{
  "_index": "accounts",
  "_type": "person",
  "_id": "1",
  "_version": 2,
  "_shards": {
    "total": 2,
    "successful": 1,
    "failed": 0
  },
  "created": false
}

Search keyword

Search all documents (empty search)

  • /_search - all index, all type
  • /gb/_search - gb index, all type
  • /gb,us/_search - gb and us index, all type
  • /g*,u*/_search - g and u start index, all type
  • /gb/user/_search - gb index, user type
  • /gb,us/user,tweet/_search - gb or us index, user and tweet type
  • /_all/user,tweet/_search - all index, user and tweet type
1
2
3
4
5
6
7
8
9
10
curl 'localhost:9200/accounts/person/_search?pretty=true'


// empty body like use

{
    "query": {
        "match_all": {}
    }
}
  • took - search time
  • hits - hit how many record
  • _score - match score
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
{
  "took" : 50,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  },
  "hits" : {
    "total" : 2,
    "max_score" : 1.0,
    "hits" : [ {
      "_index" : "accounts",
      "_type" : "person",
      "_id" : "2",
      "_score" : 1.0,
      "_source" : {
        "user" : "leon",
        "title" : "工程師",
        "desc" : "數據庫管理",
        "age": 20
      }
    }, {
      "_index" : "accounts",
      "_type" : "person",
      "_id" : "1",
      "_score" : 1.0,
      "_source" : {
        "user" : "mark",
        "title" : "工程師",
        "desc" : "數據庫管理,軟件開發",
        "age": 28
      }
    } ]
  }
}

match & multi_match Search

analyzer text and search each keyword

1
2
3
4
5
6
7
8
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
    "query" : {
        "match" : {
            "user" : "leon"
        }
    }
}'
1
2
3
4
5
6
7
8
9
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
    "query" : {
        "multi_match" : {
           "query":    "數據庫",
          "fields":   [ "title", "desc" ]
        }
    }
}'

filtered & range Search

filtered replaced by the bool

  • gt - >
  • gte - >=
  • lt - <
  • lte -<=
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
    "query" : {
        "filtered" : {
            "filter" : {
                "range" : {
                    "age" : { "gt" : 25 }
                }
            },
            "query" : {
                "match" : {
                    "user" : "leon"
                }
            }
        }
    }
}'

term & terms Search

Exact match not to analyzer

1
2
3
4
5
6
7
8
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
    "query" : {
        "term" : {
            "user": "leon"
        }
    }
}'
1
2
3
4
5
6
7
8
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
    "query" : {
        "terms" : {
            "user": ["leon", "mark"]
        }
    }
}'

exists & missing Search

Search field IS NUll or IS NOT NULL

1
2
3
4
5
6
7
8
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
    "query" : {
        "exists" : {
            "field": "title"
        }
    }
}'

Global search

Match Query

1
2
3
4
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
  "query" : { "match" : { "desc" : "軟件" }}
}'
1
curl 'localhost:9200/accounts/person/_search?pretty=true&q=user:leon'

size to limit response record

1
2
3
4
5
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
  "query" : { "match" : { "desc" : "管理" }},
  "size": 1
}'

from to shift start index

1
2
3
4
5
6
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
  "query" : { "match" : { "desc" : "管理" }},
  "from": 1,
  "size": 1
}'

or Search

“軟件 系統” -> “軟件” or “系統”

1
2
3
4
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
  "query" : { "match" : { "desc" : "軟件 系統" }}
}'

Bool Search (and)

should use Bool Query

must have"軟件" AND “系統”

1
2
3
4
5
6
7
8
9
10
11
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
  "query": {
    "bool": {
      "must": [
        { "match": { "desc": "軟件" } },
        { "match": { "desc": "系統" } }
      ]
    }
  }
}'

match_phrase Search

must be “軟件 系統”

1
2
3
4
5
6
7
8
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
    "query" : {
        "match_phrase" : {
            "desc" : "軟件 系統"
        }
    }
}'

highlight Search

returm search result include highlight <em></em>

1
2
3
4
5
6
7
8
9
10
11
12
13
14
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
  "query":  {
        "match_phrase" : {
            "user" : "leon"
        }
    },
    "highlight": {
        "fields" : {
            "user" : {}
        }
    }
}
'
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
{
 "_index": "accounts",
 "_type": "person",
 "_id": "1",
 "_score": 0.30685282,
 "_source": {
   "user": "leon",
   "title": "工程師",
   "desc": "數據庫管理,軟件開發",
   "age": 18
 },
 "highlight": {
    "user": [
      "<em>leon</em>"
    ]
  }
}

aggregations Search like SQL GROUP BY

count each user

1
2
3
4
5
6
7
8
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
  "aggs": {
    "all_user": {
      "terms": { "field": "user" }
    }
  }
}'
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
//...
"aggregations" : {
    "all_user" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [ {
        "key" : "leon",
        "doc_count" : 2
      }, {
        "key" : "mark",
        "doc_count" : 1
      } ]
    }
  }
//...

count each user + avg age

1
2
3
4
5
6
7
8
9
10
11
12
13
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
    "aggs" : {
        "all_user" : {
            "terms" : { "field" : "user" },
            "aggs" : {
                "avg_age" : {
                    "avg" : { "field" : "age" }
                }
            }
        }
    }
}'
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
// ...
  "aggregations" : {
    "all_user" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [ {
        "key" : "leon",
        "doc_count" : 2,
        "avg_age" : {
          "value" : 28.0
        }
      }, {
        "key" : "leo2222n",
        "doc_count" : 1,
        "avg_age" : {
          "value" : 38.0
        }
      }, {
        "key" : "mark",
        "doc_count" : 1,
        "avg_age" : {
          "value" : 28.0
        }
      } ]
    }
  }
// ...

mix query

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
  "query": {
      "match": {
          "user": "leon"
      }
  },
    "aggs" : {
        "all_user" : {
            "terms" : { "field" : "user" },
            "aggs" : {
                "avg_age" : {
                    "avg" : { "field" : "age" }
                }
            }
        }
    }
}'
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
{
    "took": 50,
    "timed_out": false,
    "_shards": {
        "total": 5,
        "successful": 5,
        "failed": 0
    },
    "hits": {
        "total": 2,
        "max_score": 1.4054651,
        "hits": [
            {
                "_index": "accounts",
                "_type": "person",
                "_id": "8",
                "_score": 1.4054651,
                "_source": {
                    "user": "leon",
                    "title": "工程師",
                    "desc": "數據庫管理",
                    "age": 38
                }
            },
            {
                "_index": "accounts",
                "_type": "person",
                "_id": "1",
                "_score": 0.30685282,
                "_source": {
                    "user": "leon",
                    "title": "工程師",
                    "desc": "數據庫管理,軟件開發",
                    "age": 18
                }
            }
        ]
    },
    "aggregations": {
        "all_user": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "leon",
                    "doc_count": 2,
                    "avg_age": {
                        "value": 28
                    }
                }
            ]
        }
    }
}

Sort

  • ASC
  • DESC
1
2
3
4
5
6
7
8
9
10
11
12
13
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
    "query" : {
        "match" : {
            "user" : "leon"
        }
    },
    "sort": {
          "age": {
              "order": "asc"
          }
      }
}'

max_score & _score is null, because not use to sort

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
// ...
"hits": {
  "total": 2,
  "max_score": null,
  "hits": [
      {
          "_index": "accounts",
          "_type": "person",
          "_id": "1",
          "_score": null,
          "_source": {
              "user": "leon",
              "title": "工程師",
              "desc": "數據庫管理,軟件開發",
              "age": 18
          },
          "sort": [
              18
          ]
      }
      // ...
  ]
}
// ...

mutiple sort

Use level 2 sort if level 1 sort result have same age

1
2
3
4
5
6
7
8
9
10
11
12
curl 'localhost:9200/accounts/person/_search?pretty=true'  -d '
{
    "query" : {
        "match" : {
            "user" : "leon"
        }
    },
    "sort": [
        { "age":   { "order": "asc" }},
        { "_score": { "order": "desc" }}
    ]
}'

Reference

Comments