The original SPARQL regex support of Tracker is using a custom SQLite function. But of course back when we wrote it we didn’t yet think much about optimizing. As a result, we were using g_regex_match_simple which of course recompiles the regular expression each time.
Today Jürg and me found out about sqlite3_get_auxdata and sqlite3_set_auxdata which allows us to cache a compiled value for a specific custom SQLite function for the duration of the query.
This is much better:
static void function_sparql_regex (sqlite3_context *context, int argc, sqlite3_value *argv[]) { gboolean ret; const gchar *text, *pattern, *flags; GRegexCompileFlags regex_flags; GRegex *regex; if (argc != 3) { sqlite3_result_error (context, "Invalid argument count", -1); return; } regex = sqlite3_get_auxdata (context, 1); text = sqlite3_value_text (argv[0]); flags = sqlite3_value_text (argv[2]); if (regex == NULL) { gchar *err_str; GError *error = NULL; pattern = sqlite3_value_text (argv[1]); regex_flags = 0; while (*flags) { switch (*flags) { case 's': regex_flags |= G_REGEX_DOTALL; break; case 'm': regex_flags |= G_REGEX_MULTILINE; break; case 'i': regex_flags |= G_REGEX_CASELESS; break; case 'x': regex_flags |= G_REGEX_EXTENDED; break; default: err_str = g_strdup_printf ("Invalid SPARQL regex flag '%c'", *flags); sqlite3_result_error (context, err_str, -1); g_free (err_str); return; } flags++; } regex = g_regex_new (pattern, regex_flags, 0, &error); if (error) { sqlite3_result_error (context, error->message, error->code); g_clear_error (&error); return; } sqlite3_set_auxdata (context, 1, regex, (void (*) (void*)) g_regex_unref); } ret = g_regex_match (regex, text, 0, NULL); sqlite3_result_int (context, ret); return; }
Before (this was a test on a huge amount of resources):
$ time tracker-sparql -q "select ?u { ?u a rdfs:Resource . FILTER (regex(?u, '^titl', 'i')) }" real 0m3.337s user 0m0.004s sys 0m0.008s
After:
$ time tracker-sparql -q "select ?u { ?u a rdfs:Resource . FILTER (regex(?u, '^titl', 'i')) }" real 0m1.887s user 0m0.008s sys 0m0.008s
This will hit Tracker’s master today or tomorrow.