Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 : Main metadata server / Spotlight routines / Elasticsearch backend
4 :
5 : Copyright (C) Ralph Boehme 2019
6 :
7 : This program is free software; you can redistribute it and/or modify
8 : it under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3 of the License, or
10 : (at your option) any later version.
11 :
12 : This program is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : GNU General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with this program. If not, see <http://www.gnu.org/licenses/>.
19 : */
20 :
21 : #include "includes.h"
22 : #include "es_mapping.h"
23 :
24 : /*
25 : * Escaping of special characters in Lucene query syntax across HTTP and JSON
26 : * ==========================================================================
27 : *
28 : * These characters in Lucene queries need escaping [1]:
29 : *
30 : * + - & | ! ( ) { } [ ] ^ " ~ * ? : \ /
31 : *
32 : * Additionally JSON requires escaping of:
33 : *
34 : * " \
35 : *
36 : * Characters already escaped by the mdssvc client:
37 : *
38 : * * " \
39 : *
40 : * The following table contains the resulting escaped strings, beginning with the
41 : * search term, the corresponding Spotlight query and the final string that gets
42 : * sent to the target Elasticsearch server.
43 : *
44 : * string | mdfind | http
45 : * -------+--------+------
46 : * x!x x!x x\\!x
47 : * x&x x&x x\\&x
48 : * x+x x+x x\\+x
49 : * x-x x-x x\\-x
50 : * x.x x.x x\\.x
51 : * x<x x<x x\\<x
52 : * x>x x>x x\\>x
53 : * x=x x=x x\\=x
54 : * x?x x?x x\\?x
55 : * x[x x[x x\\[x
56 : * x]x x]x x\\]x
57 : * x^x x^x x\\^x
58 : * x{x x{x x\\{x
59 : * x}x x}x x\\}x
60 : * x|x x|x x\\|x
61 : * x x x x x\\ x
62 : * x*x x\*x x\\*x
63 : * x\x x\\x x\\\\x
64 : * x"x x\"x x\\\"x
65 : *
66 : * Special cases:
67 : * x y It's not possible to search for terms including spaces, Spotlight
68 : * will search for x OR y.
69 : * x(x Search for terms including ( and ) doesn not work with Spotlight.
70 : *
71 : * [1] <http://lucene.apache.org/core/8_2_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters>
72 : */
73 :
74 0 : static char *escape_str(TALLOC_CTX *mem_ctx,
75 : const char *in,
76 : const char *escape_list,
77 : const char *escape_exceptions)
78 : {
79 0 : char *out = NULL;
80 : size_t in_len;
81 : size_t new_len;
82 : size_t in_pos;
83 0 : size_t out_pos = 0;
84 :
85 0 : if (in == NULL) {
86 0 : return NULL;
87 : }
88 0 : in_len = strlen(in);
89 :
90 0 : if (escape_list == NULL) {
91 0 : escape_list = "";
92 : }
93 0 : if (escape_exceptions == NULL) {
94 0 : escape_exceptions = "";
95 : }
96 :
97 : /*
98 : * Allocate enough space for the worst case: every char needs to be
99 : * escaped and requires an additional char.
100 : */
101 0 : new_len = (in_len * 2) + 1;
102 0 : if (new_len <= in_len) {
103 0 : return NULL;
104 : }
105 :
106 0 : out = talloc_zero_array(mem_ctx, char, new_len);
107 0 : if (out == NULL) {
108 0 : return NULL;
109 : }
110 :
111 0 : for (in_pos = 0, out_pos = 0; in_pos < in_len; in_pos++, out_pos++) {
112 0 : if (strchr(escape_list, in[in_pos]) != NULL &&
113 0 : strchr(escape_exceptions, in[in_pos]) == NULL)
114 : {
115 0 : out[out_pos++] = '\\';
116 : }
117 0 : out[out_pos] = in[in_pos];
118 : }
119 :
120 0 : return out;
121 : }
122 :
123 0 : char *es_escape_str(TALLOC_CTX *mem_ctx,
124 : const char *in,
125 : const char *exceptions)
126 : {
127 0 : const char *lucene_escape_list = "+-&|!(){}[]^\"~*?:\\/ ";
128 0 : const char *json_escape_list = "\\\"";
129 0 : char *lucene_escaped = NULL;
130 0 : char *full_escaped = NULL;
131 :
132 0 : lucene_escaped = escape_str(mem_ctx,
133 : in,
134 : lucene_escape_list,
135 : exceptions);
136 0 : if (lucene_escaped == NULL) {
137 0 : return NULL;
138 : }
139 :
140 0 : full_escaped = escape_str(mem_ctx,
141 : lucene_escaped,
142 : json_escape_list,
143 : NULL);
144 0 : TALLOC_FREE(lucene_escaped);
145 0 : return full_escaped;
146 : }
147 :
148 0 : struct es_attr_map *es_map_sl_attr(TALLOC_CTX *mem_ctx,
149 : json_t *kmd_map,
150 : const char *sl_attr)
151 : {
152 0 : struct es_attr_map *es_map = NULL;
153 0 : const char *typestr = NULL;
154 : enum ssm_type type;
155 0 : char *es_attr = NULL;
156 : size_t i;
157 : int cmp;
158 : int ret;
159 :
160 : static struct {
161 : const char *typestr;
162 : enum ssm_type typeval;
163 : } ssmt_type_map[] = {
164 : {"bool", ssmt_bool},
165 : {"num", ssmt_num},
166 : {"str", ssmt_str},
167 : {"fts", ssmt_fts},
168 : {"date", ssmt_date},
169 : {"type", ssmt_type},
170 : };
171 :
172 0 : if (sl_attr == NULL) {
173 0 : return NULL;
174 : }
175 :
176 0 : ret = json_unpack(kmd_map,
177 : "{s: {s: s}}",
178 : sl_attr,
179 : "type",
180 : &typestr);
181 0 : if (ret != 0) {
182 0 : DBG_DEBUG("No JSON type mapping for [%s]\n", sl_attr);
183 0 : return NULL;
184 : }
185 :
186 0 : ret = json_unpack(kmd_map,
187 : "{s: {s: s}}",
188 : sl_attr,
189 : "attribute",
190 : &es_attr);
191 0 : if (ret != 0) {
192 0 : DBG_ERR("No JSON attribute mapping for [%s]\n", sl_attr);
193 0 : return NULL;
194 : }
195 :
196 0 : for (i = 0; i < ARRAY_SIZE(ssmt_type_map); i++) {
197 0 : cmp = strcmp(typestr, ssmt_type_map[i].typestr);
198 0 : if (cmp == 0) {
199 0 : type = ssmt_type_map[i].typeval;
200 0 : break;
201 : }
202 : }
203 0 : if (i == ARRAY_SIZE(ssmt_type_map)) {
204 0 : return NULL;
205 : }
206 :
207 0 : es_map = talloc_zero(mem_ctx, struct es_attr_map);
208 0 : if (es_map == NULL) {
209 0 : return NULL;
210 : }
211 0 : es_map->type = type;
212 :
213 0 : es_map->name = es_escape_str(es_map, es_attr, NULL);
214 0 : if (es_map->name == NULL) {
215 0 : TALLOC_FREE(es_map);
216 0 : return false;
217 : }
218 :
219 0 : return es_map;
220 : }
221 :
222 0 : const char *es_map_sl_type(json_t *mime_map,
223 : const char *sl_type)
224 : {
225 0 : const char *mime_type = NULL;
226 : int ret;
227 :
228 0 : if (sl_type == NULL) {
229 0 : return NULL;
230 : }
231 :
232 0 : ret = json_unpack(mime_map,
233 : "{s: s}",
234 : sl_type,
235 : &mime_type);
236 0 : if (ret != 0) {
237 0 : return NULL;
238 : }
239 :
240 0 : return mime_type;
241 : }
|