I've been trying to build an index in ES and add the initial items to it (around 350k), using PHP.
I tried all kinds of batch sizes (from 10 items to 1k), check the count, check the threshold, but for some reason it doesn't index every item.
It just skips over some random items, without any errors in the batch result response. I feel like I tried everything and I have to idea what to do next
I'm using Amazon OpenSearch with the latest supported ES (7.10).
The index looks like this:
{
"wonder-search": {
"aliases": {},
"mappings": {
"properties": {
"address": {
"type": "text"
},
"city": {
"type": "text"
},
"city_id": {
"type": "integer"
},
"duration": {
"type": "integer"
},
"filename": {
"type": "text"
},
"geo_point": {
"type": "geo_point"
},
"icon": {
"type": "keyword"
},
"is_sandbox": {
"type": "integer"
},
"item_id": {
"type": "integer"
},
"item_label": {
"type": "keyword"
},
"latitude": {
"type": "float"
},
"longitude": {
"type": "float"
},
"search_text_caption_json": {
"type": "text",
"index_phrases": true
},
"search_text_city_json": {
"type": "text",
"index_phrases": true
},
"search_text_completion": {
"type": "completion",
"analyzer": "simple",
"preserve_separators": true,
"preserve_position_increments": true,
"max_input_length": 50,
"contexts": [
{
"name": "type",
"type": "CATEGORY"
}
]
},
"search_text_country_json": {
"type": "text",
"index_phrases": true
},
"search_text_cuisine_name_json": {
"type": "text",
"index_phrases": true
},
"search_text_location_name_json": {
"type": "text",
"index_phrases": true
},
"search_text_state_json": {
"type": "text",
"index_phrases": true
},
"search_text_tag_name_json": {
"type": "text",
"index_phrases": true
},
"search_text_username_json": {
"type": "text",
"index_phrases": true
},
"sort": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"sort_score": {
"type": "double"
},
"type": {
"type": "text"
},
"user_icon": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_id": {
"type": "integer"
},
"username": {
"type": "keyword"
},
"vanity_url": {
"type": "keyword"
},
"video_count": {
"type": "integer"
}
}
},
"settings": {
"index": {
"routing": {
"allocation": {
"include": {
"_tier_preference": "data_content"
}
}
},
"mapping": {
"ignore_malformed": "true"
},
"number_of_shards": "1",
"provided_name": "wonder-search",
"creation_date": "1671003076106",
"number_of_replicas": "1",
"uuid": "YQh1q40WTneLE4MWDWhArw",
"version": {
"created": "7100199"
}
}
}
}
}
and one item looks like this:
{
"_index": "wonder-search",
"_type": "_doc",
"_id": "wq2LD4UBUAuy7FQPhtZh",
"_version": 1,
"_seq_no": 2003,
"_primary_term": 1,
"found": true,
"_source": {
"sort": "4004",
"item_id": "4934",
"user_id": "434",
"user_icon": "/site-content/avatars/Sp8AXjTJvMbRao2oZbuiUuSVH042-1597776099045.jpeg",
"username": "chuurros",
"item_label": "Kyoto Katsugyu【京都勝牛】",
"search_text_username_json": [
"chuurros"
],
"search_text_caption_json": [
"Absolutely love their gyukatsu (beef katsu) here! Delicious and will keep you wanting more! 🥰"
],
"search_text_city_json": [
"Toronto"
],
"search_text_state_json": [
"Ontario"
],
"search_text_country_json": [
"Canada"
],
"search_text_location_name_json": [
"Kyoto Katsugyu【京都勝牛】"
],
"search_text_tag_name_json": [
"japanese",
"restaurant",
"asian",
"dining",
"topcollection-4934"
],
"search_text_cuisine_name_json": [],
"type": "video",
"vanity_url": "",
"icon": "",
"city": "Toronto",
"city_id": "439",
"latitude": "43.65682410",
"longitude": "-79.37617410",
"address": "134 Dundas St E",
"duration": "9.57",
"video_count": "0",
"sort_score": "43",
"filename": "373d75fd-4292-4e5b-a239-4b1c39ffc86c.MOV",
"is_sandbox": "0",
"geo_point": {
"lat": "43.65682410",
"lon": "-79.37617410"
},
"search_text_completion": {
"input": [
"Kyoto Katsugyu【京都勝牛】"
],
"contexts": {
"type": [
"video"
]
}
}
}
}
Any ideas why does it work like this?
There is json response:
{
"id": "1234567890123456789",
"creation_date": 12345678,
"event": "WAITING_PAYMENT",
"version": "2.0.0",
"data": {
"product": {
"id": 213344,
"name": "Product Name",
"has_co_production": false
},
"affiliates": [
{
"name": "Affiliate name"
}
],
"buyer": {
"email": "buyer#email.com"
},
"producer": {
"name": "Producer Name"
},
"commissions": [
{
"value": 0.65,
"source": "MARKETPLACE"
},
{
"value": 3.10,
"source": "PRODUCER"
}
],
"purchase": {
"approved_date": 1231241434453,
"full_price": {
"value": 134.0
},
"original_offer_price": {
"currency_value": "EUR"
"value": 100.78,
},
"price": {
"value": 150.6
},
"order_date": "123243546",
"status": "STARTED",
"transaction": "HP02316330308193",
"payment": {
"billet_barcode": "03399.33335 33823.303087 198801027 2 876300015000",
"billet_url": "https://billet-link.com/bHP023163303193",
}
},
"subscription": {
"status": "ACTIVE",
"plan": {
"name": "plan name"
},
"subscriber": {
"code": "12133421"
}
}
}
}
My question is how to extract data["buyer"]["email"] in PHP ?
I only need to extract the email information from the buyer table inside the data table.
First, you need to decode the json to a PHP array (or an object), then you can access the requested information from the decoded data.
$data = json_decode('the json string most place here', true);
$email = $data['buyer']['email'];
Place your json string in the first argument of json_decode() function.
Error
[body] => {"error":{"root_cause":[{"type":"mapper_parsing_exception","reason":"failed to parse"}],"type":"mapper_parsing_exception","reason":"failed to
parse","caused_by":{"type":"not_x_content_exception","reason":"Compressor detection can
only be called on some xcontent bytes or compressed xcontent ytes"}},"status":400}
I'm getting an error while adding the documents to my index.
http://localhost:9595/patient_trimester
{
"patient_trimester": {
"aliases": {
},
"mappings": {
"_default_": {
"_all": {
"enabled": true
},
"dynamic_templates": [
{
"string_fields": {
"mapping": {
"index": "not_analyzed",
"omit_norms": true,
"type": "string"
},
"match": "*",
"match_mapping_type": "string"
}
}
],
"properties": {
"#version": {
"type": "string",
"index": "not_analyzed"
}
}
},
"patient_trimester": {
"_all": {
"enabled": true
},
"dynamic_templates": [
{
"string_fields": {
"mapping": {
"index": "not_analyzed",
"omit_norms": true,
"type": "string"
},
"match": "*",
"match_mapping_type": "string"
}
}
],
"properties": {
"#timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"#version": {
"type": "string",
"index": "not_analyzed"
},
"last_consult_by": {
"type": "string",
"index": "not_analyzed"
},
"mpi": {
"type": "string",
"index": "not_analyzed"
},
"bill_id": {
"type": "integer"
},
"bill_date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"site": {
"type": "string",
"index": "not_analyzed"
},
"effective_edd": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"is_converted": {
"type": "integer"
},
"admitting_physician": {
"type": "string",
"index": "not_analyzed"
},
"days": {
"type": "integer"
},
"trim": {
"type": "string",
"index": "not_analyzed"
},
"tags": {
"type": "string",
"index": "not_analyzed"
}
}
}
},
"warmers": {
}
}
}
This is how I created the index through postman.
in the $result variable im sending
(
[last_consult_by] => xxxxxx
[mpi] => xxxxxxxx
[bill_id] => 176073
[bill_date] => 2018-07-12 12:00:00
[site] => xxx
[effective_edd] => 2018-07-28 12:00:00
[is_converted] => 0
[admitting_physician] => xxxxxxxxx
[days] => 16
[trim] => Array
(
[trim3] => 1
)
)
$params = [
'index' => 'patient_trimester',
'type' => 'patient_trimester',
'body' => $result
];
$res = $client->index($params);
print_r($res); exit;
I'm not getting why mapper_parsing_exception is happening.
Is this because of my mapping of datatypes? mapping given for Datatype of effective_edd,bill_date and the trim is right ?
please help me out to resolve this issue.
I have a mapping like this
{
"settings": {
"analysis": {
"filter": {
"nGramFilter": {
"type": "nGram",
"min_gram": 3,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
},
"email" : {
"type" : "pattern_capture",
"preserve_original" : 1,
"patterns" : [
"([^#]+)",
"(\\p{L}+)",
"(\\d+)",
"#(.+)"
]
},
"number" : {
"type" : "pattern_capture",
"preserve_original" : 1,
"patterns" : [
"([^+-]+)",
"(\\d+)"
]
},
"edgeNGramFilter": {
"type": "nGram",
"min_gram": 1,
"max_gram": 10,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"nGramAnalyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"nGramFilter"
]
},
"whitespaceAnalyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase"
]
},
"email" : {
"tokenizer" : "uax_url_email",
"filter" : [
"email",
"lowercase",
"unique"
]
},
"number" : {
"tokenizer" : "whitespace",
"filter" : [ "number", "unique" ]
},
"edgeNGramAnalyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"edgeNGramFilter"
]
}
}
}
},
"users": {
"mappings": {
"user_profiles": {
"properties": {
"firstName": {
"type": "string",
"analyzer": "nGramAnalyzer",
"search_analyzer": "whitespaceAnalyzer"
},
"lastName": {
"type": "string",
"analyzer": "nGramAnalyzer",
"search_analyzer": "whitespaceAnalyzer"
},
"email": {
"type": "string",
"analyzer": "email",
"search_analyzer": "whitespaceAnalyzer"
},
"score" : {
"type": "string"
},
"homeLandline": {
"type": "string",
"analyzer": "number",
"search_analyzer": "whitespaceAnalyzer"
},
"dob": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"mobile": {
"type": "integer"
},
"residenceCity": {
"type": "string",
"analyzer": "edgeNGramAnalyzer",
"search_analyzer": "whitespaceAnalyzer"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
}
}
}
}
}
I can get the score as integer as well as "NA" so I mapped the type as string but while posting data to the index i am getting Number Format Exception.
For Example:
if I post first data as integer and followed by "NA". I am getting these exception.
while checking my log file I am getting this errors:
[2016-08-29 15:19:01] elasticlog.WARNING: Response ["{\"error\":{\"root_cause\":[{\"type\":\"mapper_parsing_exception\",\"reason\":\"failed
to parse
[score]\"}],\"type\":\"mapper_parsing_exception\",\"reason\":\"failed
to parse
[score]\",\"caused_by\":{\"type\":\"number_format_exception\",\"reason\":\"For
input string: \"NH\"\"}},\"status\":400}"] []
Your mapping is incorrect. It should be, assuming, users is the index name and user_profiles is the type:
{
"users": {
"mappings": {
"user_profiles": {
"properties": {
"score": {
"type": "string"
}
}
}
}
}
}
You have a missing mappings before user_profiles.
I have a dynamic field named *_value. The field os_value which contains value like android 5,android 5.1, android 6 etc.
While doing facet on field os_value, the values are getting tokenized to android count as 3 , 5 as 1 , 5.1 as 1 and 6 as 1.
The mapping for the index is as below.
{
"test_prod": {
"aliases": {},
"mappings": {
"products": {
"properties": {
"*_capacity": {
"type": "string",
"index": "not_analyzed"
},
"*_value": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
"*_rating": {
"type": "double"
},
"*_value": {
"type": "string",
"index": "not_analyzed"
},
"attribute_set": {
"type": "string",
"index": "not_analyzed"
},
"availability": {
"type": "integer"
},
"battery_capacity": {
"type": "string"
},
"battery_capacity_value": {
"type": "long"
},
"battery_life_rating": {
"type": "long"
},
"brand": {
"type": "string",
"index": "not_analyzed"
},
"brand_label": {
"type": "string"
},
"camera_rating": {
"type": "long"
},
"capacity": {
"type": "long"
},
"category": {
"type": "string",
"index": "not_analyzed"
},
"class": {
"type": "string",
"index": "not_analyzed"
},
"color": {
"type": "string",
"index": "not_analyzed"
},
"configuration": {
"type": "string",
"index": "not_analyzed"
},
"connectivity": {
"type": "string",
"index": "not_analyzed"
},
"created_at": {
"type": "integer"
},
"description": {
"type": "string"
},
"design_rating": {
"type": "long"
},
"designed_for": {
"type": "string",
"index": "not_analyzed"
},
"discount": {
"type": "double"
},
"display_rating": {
"type": "long"
},
"features": {
"type": "string",
"index": "not_analyzed"
},
"front_camera_resolution_range": {
"type": "string",
"index": "not_analyzed"
},
"front_camera_resolution_value": {
"type": "long"
},
"graphics_memory_capacity": {
"type": "string"
},
"hard_disk_capacity": {
"type": "string"
},
"headset_design": {
"type": "string",
"index": "not_analyzed"
},
"headset_type": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "integer"
},
"image_big": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
"image_slider": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
"image_thumb": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
"interface": {
"type": "string",
"index": "not_analyzed"
},
"internal_storage": {
"type": "string",
"index": "not_analyzed"
},
"is_default": {
"type": "integer"
},
"is_exclusive": {
"type": "integer"
},
"key": {
"type": "string"
},
"last_update": {
"type": "date",
"format": "Y-m-d H:m:s"
},
"material": {
"type": "string"
},
"model": {
"type": "string"
},
"mrp": {
"type": "double"
},
"ndtv_rating": {
"type": "long"
},
"network_type": {
"type": "string",
"index": "not_analyzed"
},
"os": {
"type": "string",
"index": "not_analyzed"
},
"os_label": {
"type": "string"
},
"performance_rating": {
"type": "long"
},
"popularity": {
"type": "integer"
},
"processor_core": {
"type": "string",
"index": "not_analyzed"
},
"processor_name": {
"type": "string",
"index": "not_analyzed"
},
"product_id": {
"type": "long"
},
"product_specs": {
"type": "string"
},
"promo_label": {
"type": "string"
},
"pros_cons": {
"type": "string"
},
"ram_range": {
"type": "string"
},
"ram_value": {
"type": "long"
},
"rear_camera_resolution_range": {
"type": "string",
"index": "not_analyzed"
},
"rear_camera_resolution_value": {
"type": "long"
},
"register_mode": {
"type": "string"
},
"related_sku": {
"type": "string"
},
"release_priority": {
"type": "long"
},
"review_url": {
"type": "string"
},
"screen_size": {
"type": "string",
"index": "not_analyzed"
},
"screen_size_value": {
"type": "double"
},
"selling_price": {
"type": "double"
},
"shop_url": {
"type": "string"
},
"sim3g": {
"type": "long"
},
"sim4g": {
"type": "long"
},
"sim_type": {
"type": "string"
},
"sku": {
"type": "string"
},
"slug": {
"type": "string"
},
"software_rating": {
"type": "long"
},
"source": {
"type": "string"
},
"ssd_capacity": {
"type": "string"
},
"stock": {
"type": "string"
},
"subtitle": {
"type": "string"
},
"system_memory": {
"type": "string"
},
"tags": {
"type": "string"
},
"theme": {
"type": "string",
"index": "not_analyzed"
},
"title": {
"type": "string"
},
"title_raw": {
"type": "string",
"index": "not_analyzed"
},
"title_suggest": {
"type": "string",
"analyzer": "autocomplete_analyzer",
"search_analyzer": "standard"
},
"type": {
"type": "string",
"index": "not_analyzed"
},
"value_for_money_rating": {
"type": "long"
},
"variant_id": {
"type": "integer"
},
"voice_calling": {
"type": "integer"
},
"wifi": {
"type": "integer"
},
"wired_or_wireless": {
"type": "string",
"index": "not_analyzed"
}
}
}
},
"settings": {
"index": {
"creation_date": "1467010796904",
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"autocomplete_analyzer": {
"filter": ["lowercase", "autocomplete_filter"],
"type": "custom",
"tokenizer": "standard"
}
}
},
"number_of_shards": "5",
"number_of_replicas": "1",
"uuid": "BJbw5tD-assad",
"version": {
"created": "2030399"
}
}
},
"warmers": {}
}
}
Also the values are converting to lowercase while faceting . Am I doing anything wrong? . Please help.
Ok, I see what you're trying to achieve. What you actually need are dynamic templates. You'll need to delete your index and recreate it like this:
POST test_prod
{
"mappings": {
"products": {
"dynamic_templates": [
{
"capacities": {
"match_mapping_type": "string",
"match": "*_capacity",
"mapping": {
"type": "string",
"index": "not_analyzed"
}
}
},
{
"values": {
"match_mapping_type": "string",
"match": "*_value",
"mapping": {
"type": "string",
"index": "not_analyzed"
}
}
},
{
"ratings": {
"match": "*_rating",
"mapping": {
"type": "double"
}
}
}
],
"properties": {
"attribute_set": {
"type": "string",
"index": "not_analyzed"
},
"availability": {
"type": "integer"
},
"battery_capacity": {
"type": "string"
},
"battery_capacity_value": {
"type": "long"
},
"battery_life_rating": {
"type": "long"
},
"brand": {
"type": "string",
"index": "not_analyzed"
},
"brand_label": {
"type": "string"
},
"camera_rating": {
"type": "long"
},
"capacity": {
"type": "long"
},
"category": {
"type": "string",
"index": "not_analyzed"
},
"class": {
"type": "string",
"index": "not_analyzed"
},
"color": {
"type": "string",
"index": "not_analyzed"
},
"configuration": {
"type": "string",
"index": "not_analyzed"
},
"connectivity": {
"type": "string",
"index": "not_analyzed"
},
"created_at": {
"type": "integer"
},
"description": {
"type": "string"
},
"design_rating": {
"type": "long"
},
"designed_for": {
"type": "string",
"index": "not_analyzed"
},
"discount": {
"type": "double"
},
"display_rating": {
"type": "long"
},
"features": {
"type": "string",
"index": "not_analyzed"
},
"front_camera_resolution_range": {
"type": "string",
"index": "not_analyzed"
},
"front_camera_resolution_value": {
"type": "long"
},
"graphics_memory_capacity": {
"type": "string"
},
"hard_disk_capacity": {
"type": "string"
},
"headset_design": {
"type": "string",
"index": "not_analyzed"
},
"headset_type": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "integer"
},
"image_big": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
"image_slider": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
"image_thumb": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
"interface": {
"type": "string",
"index": "not_analyzed"
},
"internal_storage": {
"type": "string",
"index": "not_analyzed"
},
"is_default": {
"type": "integer"
},
"is_exclusive": {
"type": "integer"
},
"key": {
"type": "string"
},
"last_update": {
"type": "date",
"format": "Y-m-d H:m:s"
},
"material": {
"type": "string"
},
"model": {
"type": "string"
},
"mrp": {
"type": "double"
},
"ndtv_rating": {
"type": "long"
},
"network_type": {
"type": "string",
"index": "not_analyzed"
},
"os": {
"type": "string",
"index": "not_analyzed"
},
"os_label": {
"type": "string"
},
"performance_rating": {
"type": "long"
},
"popularity": {
"type": "integer"
},
"processor_core": {
"type": "string",
"index": "not_analyzed"
},
"processor_name": {
"type": "string",
"index": "not_analyzed"
},
"product_id": {
"type": "long"
},
"product_specs": {
"type": "string"
},
"promo_label": {
"type": "string"
},
"pros_cons": {
"type": "string"
},
"ram_range": {
"type": "string"
},
"ram_value": {
"type": "long"
},
"rear_camera_resolution_range": {
"type": "string",
"index": "not_analyzed"
},
"rear_camera_resolution_value": {
"type": "long"
},
"register_mode": {
"type": "string"
},
"related_sku": {
"type": "string"
},
"release_priority": {
"type": "long"
},
"review_url": {
"type": "string"
},
"screen_size": {
"type": "string",
"index": "not_analyzed"
},
"screen_size_value": {
"type": "double"
},
"selling_price": {
"type": "double"
},
"shop_url": {
"type": "string"
},
"sim3g": {
"type": "long"
},
"sim4g": {
"type": "long"
},
"sim_type": {
"type": "string"
},
"sku": {
"type": "string"
},
"slug": {
"type": "string"
},
"software_rating": {
"type": "long"
},
"source": {
"type": "string"
},
"ssd_capacity": {
"type": "string"
},
"stock": {
"type": "string"
},
"subtitle": {
"type": "string"
},
"system_memory": {
"type": "string"
},
"tags": {
"type": "string"
},
"theme": {
"type": "string",
"index": "not_analyzed"
},
"title": {
"type": "string"
},
"title_raw": {
"type": "string",
"index": "not_analyzed"
},
"title_suggest": {
"type": "string",
"analyzer": "autocomplete_analyzer",
"search_analyzer": "standard"
},
"type": {
"type": "string",
"index": "not_analyzed"
},
"value_for_money_rating": {
"type": "long"
},
"variant_id": {
"type": "integer"
},
"voice_calling": {
"type": "integer"
},
"wifi": {
"type": "integer"
},
"wired_or_wireless": {
"type": "string",
"index": "not_analyzed"
}
}
}
},
"settings": {
"index": {
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"autocomplete_analyzer": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
}
}
},
"number_of_shards": "5",
"number_of_replicas": "1"
}
}
}