The original SPARQL regex support of Tracker is using a custom SQLite function. But of course back when we wrote it we didn’t yet think much about optimizing. As a result, we were using g_regex_match_simple which of course recompiles the regular expression each time.
Today Jürg and me found out about sqlite3_get_auxdata and sqlite3_set_auxdata which allows us to cache a compiled value for a specific custom SQLite function for the duration of the query.
This is much better:
static void
function_sparql_regex (sqlite3_context *context,
int argc,
sqlite3_value *argv[])
{
gboolean ret;
const gchar *text, *pattern, *flags;
GRegexCompileFlags regex_flags;
GRegex *regex;
if (argc != 3) {
sqlite3_result_error (context, "Invalid argument count", -1);
return;
}
regex = sqlite3_get_auxdata (context, 1);
text = sqlite3_value_text (argv[0]);
flags = sqlite3_value_text (argv[2]);
if (regex == NULL) {
gchar *err_str;
GError *error = NULL;
pattern = sqlite3_value_text (argv[1]);
regex_flags = 0;
while (*flags) {
switch (*flags) {
case 's': regex_flags |= G_REGEX_DOTALL; break;
case 'm': regex_flags |= G_REGEX_MULTILINE; break;
case 'i': regex_flags |= G_REGEX_CASELESS; break;
case 'x': regex_flags |= G_REGEX_EXTENDED; break;
default:
err_str = g_strdup_printf ("Invalid SPARQL regex flag '%c'", *flags);
sqlite3_result_error (context, err_str, -1);
g_free (err_str);
return;
}
flags++;
}
regex = g_regex_new (pattern, regex_flags, 0, &error);
if (error) {
sqlite3_result_error (context, error->message, error->code);
g_clear_error (&error);
return;
}
sqlite3_set_auxdata (context, 1, regex, (void (*) (void*)) g_regex_unref);
}
ret = g_regex_match (regex, text, 0, NULL);
sqlite3_result_int (context, ret);
return;
}
Before (this was a test on a huge amount of resources):
$ time tracker-sparql -q "select ?u { ?u a rdfs:Resource . FILTER (regex(?u, '^titl', 'i')) }"
real 0m3.337s
user 0m0.004s
sys 0m0.008s
After:
$ time tracker-sparql -q "select ?u { ?u a rdfs:Resource . FILTER (regex(?u, '^titl', 'i')) }"
real 0m1.887s
user 0m0.008s
sys 0m0.008s
This will hit Tracker’s master today or tomorrow.