Case insensitive search
Case insensitive search is another thing that many people would expect a database to support. There is a common misunderstanding of indexedDB where people seem to believe that they need to double-store their strings in lowercase in order to be able to do case insensitive search. Here's why this is NOT needed!
Dexie.js implements case insensitive searching using a similar IDBCursor.continue(key) method as with the SQL 'IN' algorithm shown above, however a little more complexity in the algorithm is needed.
Let's say we need to find "david" in table "friends" no matter its casing. "David" or "david" should both be found. Obviously, we could create an array containing all possible case combinations of "david" and then use the SQL 'IN' algorithms above. However, the number of combinations would increase exponentially with the length of the key we're looking for. But there is a trick we can use; since cursor.continue() will land on the next record in sort order, that will reveal what combinations we can skip when landing on the key that is not a match.
What we do is to start searching for the lowest possible value of "David", which would be "DAVID" since uppercase unicode characters have a lesser value than their corresponding lowercase version. If there is no "DAVID" in db, we will land on the least possible key yet greater than "DAVID". Now, instead of testing the next combination of davids (which would be "DAVId"), we first inspect the key we landed on and from that key, we derive what would be the next "david" variant to search for.
Here's the Dexie algorithm for this:
function findIgnoreCaseAlgorithm(needle, onfound, onfinish) {
var upperNeedle = needle.toUpperCase();
var lowerNeedle = needle.toLowerCase();
var cursorReq = index.openCursor();
cursorReq.onsuccess = function (event) {
var cursor = event.target.result;
if (!cursor) {
onfinish();
return;
}
var key = cursor.key;
if (typeof (key) !== 'string') {
cursor.continue();
return;
}
var lowerKey = key.toLowerCase();
if (lowerKey === lowerNeedle) {
onfound(cursor.value);
cursor.continue();
} else {
var nextNeedle = nextCasing(key, lowerKey, upperNeedle, lowerNeedle);
if (nextNeedle) {
cursor.continue(nextNeedle);
} else {
onfinish();
}
}
};
function nextCasing(key, lowerKey) {
var length = Math.min(key.length, lowerNeedle.length);
var llp = -1;
for (var i = 0; i < length; ++i) {
var lwrKeyChar = lowerKey[i];
if (lwrKeyChar !== lowerNeedle[i]) {
if (key[i] < upperNeedle[i]) {
return key.substr(0, i) + upperNeedle[i] + upperNeedle.substr(i + 1);
}
if (key[i] < lowerNeedle[i]) {
return key.substr(0, i) + lowerNeedle[i] + upperNeedle.substr(i + 1);
}
if (llp >= 0) {
return key.substr(0, llp) + lowerKey[llp] + upperNeedle.substr(llp + 1)
}
return null;
}
if (key[i] < lwrKeyChar) {
llp = i;
}
if (length < lowerNeedle.length) {
return key + upperNeedle.substr(key.length);
}
if (llp < 0) {
return null;
} else {
return key.substr(0, llp) + lowerNeedle[llp] + upperNeedle.substr(llp + 1);
}
}
}
Search for strings that start with X, case insensitive.
In Dexie, a case insensitive LIKE '%str' query is written using the followin straight-forward syntax:
db.friends.where("name").startsWithIgnoreCase("da")
The result is a Collection that would return the "David" and "Daniel" objects once executed with any of the toArray() or each() methods.
Here's how it's implemented:
- Do the same findIgnoreCaseAlgorithm as above, but instead of checking if (lowerKey === lowerNeedle) {...}, we do if (lowerKey.indexOf(lowerNeedle) == 0) { ... }
cursorReq.onsuccess = function (event) {
var cursor = event.target.result;
if (!cursor) {
onfinish();
return;
}
var key = cursor.key;
if (typeof (key) !== 'string') {
cursor.continue();
return;
}
var lowerKey = key.toLowerCase();
if (lowerKey.indexOf(lowerNeedle) === 0) {
onfound(cursor.value);
cursor.continue();
} else {
var nextNeedle = nextCasing(key, lowerKey, upperNeedle, lowerNeedle);
if (nextNeedle) {
cursor.continue(nextNeedle);
} else {
onfinish();
}
}
};
Note: The above snippet is just a part of the algorithm. See Case Insensitive Search algorithm which covers the rest.
Logical OR
IndexedDB has no support for logical or. You can only specify one keyrange at a time. However, what it does have support for, is to run several queries in parallell - even when using same transaction (as long as the queries are readonly queries). Even if the queries wouldnt run in parallell, it would still work but a little less performant. The OR operation in Dexie.js is unit tested with Chrome, IE, Firefox and Opera.
The only thing we need to do except executing both queries in parallell, is to remove any duplicates. To do that, we can use a closure-based set of found primary keys. Whenever a new record match is found on any of the parallell queries, it first checks the set if it's already included. If not, it calls onfound for the entry and sets set[primKey] = true so that if the same entry would be found on the other query, it would silently ignore to call onfound().
To access this algorithm with Dexie.js, you type something like the following:
db.friends.where('name').equalsIgnoreCase('david').or('shoeSize').above(40).toArray(fn)
Here's how it's done. The code snipped below is a simplified version only supporting the logical OR of two standard IDBKeyRange queries. With Dexie, you can do arbritary number of OR with any standard or extended operation such as equalsIgnoreCase().
function logical_or(index1, keyRange1, index2, keyRange2, onfound, onfinish) {
var openCursorRequest1 = index1.openCursor(keyRange1);
var openCursorRequest2 = index2.openCursor(keyRange2);
assert(index1.objectStore === index2.objectStore);
var primKey = index1.objectStore.keyPath;
var set = {};
var resolved = 0;
function complete() {
if (++resolved === 2) onfinish();
}
function union(item) {
var key = JSON.stringify(item[primKey]);
if (!set.hasOwnProperty(key)) {
set[key] = true;
onfound(item);
}
}
openCursorRequest1.onsuccess = function (event) {
var cursor = event.target.result;
if (cursor) {
union(cursor.value);
} else {
complete();
}
}
openCursorRequest2.onsuccess = function (event) {
var cursor = event.target.result;
if (cursor) {
union(cursor.value);
} else {
complete();
}
}
}
When using parallell OR execution, the sort order will not be valid. Partly because the two different queries execute on different indexes with different sort order, but mainly because the two queries run in parallell by the browser backround threads and we cannot know which onsuccess is called before the other. However, this can be resolved by implementing onfound to push the items to an array, and onfinish to sort it using any requested sort order.
var index1 = transaction.objectStore("friends").index("name");
var index2 = transaction.objectStire("friends").index("shoeSize");
var keyRange1 = IDBKeyRange.bound ("Da", "Da\uffff");
var keyRange2 = IDBKeyRange.lowerBound (40, true);
var result = [];
logical_or (index1, keyRange1, index2, keyRange2, function (friend) {
result.push(friend);
}, function () {
result.sort (function (a,b) { return a.shoeSize - b.shoeSize; });
});
Logical AND
Logical AND can partly be implemented in indexedDB using compound indexes. As with many other databases, indexedDB supports creating an index that is not just 'name' or 'shoeSize' but the combination of 'name' and 'shoeSize':
store.createIndex('nameAndShoeSize', ['name', 'shoeSize']);
This index will sort by name at first hand, and, if two records have the same name, the second sort order would be shoeSize. When such an index is being used, the key will be regarded as the array of the two compound keys. By using IDBKeyRange.only(['David', 40]) you could look for records where name is 'David' AND shoeSize is 40. However, it doesnt give as a generic AND - it just gives us the posibility to do AND when using exact match. You couldn't do WHERE name = 'David' AND shoeSize > 40. You could almost say that it behaves like if you had created another field with the concatenation of 'name' and 'shoeSize' into a single string. Another drawback is that not all browsers supports compound indexes - namely IE10/IE11.
There is no trick to emulate a real generic logical AND in indexedDB other that applying a JS filter to the iteration. This is the best hit you can make. And it's not that bad as long as you choose a good first-hand filter you can apply your manual filter on the iteration. Keep in mind that with indexedDB, the database operates on the same machine as the client, so there is no network consumption to optimize here. The pros with this, is that you have all the power of doing a complex filter - whatever expression possible to write in JS. Just remember to choose a good first-hand index to filter out the major results.
Consider the following SQL query:
SELECT * FROM friends WHERE name='David' AND shoeSize > 40
It's wiser to let the database filter out name='David' and our javascript filter do the (shoeSize > 40) since it's more probable that the number of friends with name 'David' are less than the number of friends with shoeSize above 40.
Here's an example using Dexie.js (including bootstraping the database):
var db = new Dexie("MyDB");
db.version(1).stores({friends: "++id,name,shoeSize"});
db.open();
db.friends.where('name')
.equalsIgnoreCase('david')
.and(function(friend) { return friend.shoeSize > 40; })
.toArray (function (friendsArray) {
console.log(JSON.stringify(friendsArray));
});
...or if you rather want to use compound indexes, you would do this in Dexie.js:
var db = new Dexie("MyDB");
db.version(1).stores({friends: "++id,name,shoeSize,[name+shoeSize]"});
db.open();
db.friends.where('[name+shoeSize]')
.equals(['David',43])
.toArray (function (friendsArray) {
console.log(JSON.stringify(friendsArray));
});
However, if you need to target IE, dont use them - no support in neither IE10 or IE11.
Search for strings that start with X ( SQL LIKE 'prefix%' )
Matching prefix of string keys is the most straight-forward trick you can do with indexedDB. It's not unique for Dexie.js as other libraries support it as well. However, for the sake of completeness, here is how it is done: Just do an IDBKeyRange.bound where lowerBound is the prefix and upperBound is a string that would include all possible continuations of the prefix. The simplest way to do this is just to append a character with highest possible value; '\uffff'.
IDBKeyRange.bound(prefix, prefix + '\uffff', false, false)
More tricks...
With inspiration from the algorithms used in this article, it is also possible to do other queries that Dexie.js supports, like:
- startsWithAnyOf([str1, str2, strN...])
- SQL IN ignoring case
- betweenAnyOfRanges([from, to], [from2, to2], ...])
- ...
Using the code
Code snippets in the article are conceptual only. The purpuse of the article is only to show simplified versions of the concept and algorithms used in Dexie.js.
Test it on Your Own Browser!
To see the algorithms in action just paste the following snippet into an empty HTML file and download Dexie.js. If using IE, make sure you run the page on an HTTP or HTTPS url. If using Safari, also download the indexedDB polyfill and include it in the HTML. Notice that when using polyfill for Safari the OR operation is not function as expected (this is something that needs to be fixed in the polyfill).
<!DOCTYPE html>
<html>
<head>
<title>Extended indexedDB queries using Dexie.js</title>
<script type="text/javascript" src="Dexie.js"></script>
<script>
var db = new Dexie("MyDB");
db.version(1).stores({
friends: "++id,name,shoeSize"
});
db.open().catch(function (e) {
log("Error opening database: " + e, "error");
});
function populateSomeData() {
log("Populating some data", "heading");
return db.transaction("rw", db.friends, function (friends) {
friends.clear();
friends.add({ name: "David", shoeSize: 43 });
friends.add({ name: "Ylva", shoeSize: 37 });
friends.add({ name: "Jon", shoeSize: 44 });
friends.add({ name: "Måns", shoeSize: 42 });
friends.add({ name: "Daniel", shoeSize: 39 });
friends.add({ name: "Nils", shoeSize: 45 });
friends.add({ name: "Zlatan", shoeSize: 47 });
friends.orderBy('name').each(function (friend) {
log(JSON.stringify(friend));
});
}).catch(function (e) {
log(e, "error");
});
}
function equalsAnyOf() {
log("db.friends.where('name').anyOf('David', 'Zlatan', 'Daniel')", "heading");
return db.friends.where('name').anyOf('David', 'Zlatan', 'Daniel')
.each(function (friend) {
log(JSON.stringify(friend));
});
}
function equalsIgnoreCase() {
log("db.friends.where('name').equalsIgnoreCase('david')", "heading");
return db.friends.where('name').equalsIgnoreCase('david')
.each(function (friend) {
log(JSON.stringify(friend));
});
}
function startsWithIgnoreCase() {
log("db.friends.where('name').startsWithIgnoreCase('da')", "heading");
return db.friends.where('name').startsWithIgnoreCase('da')
.each(function (friend) {
log(JSON.stringify(friend));
});
}
function logicalOR() {
log("db.friends.where('name').startsWithIgnoreCase('da').or('shoeSize').below(40)", "heading");
return db.friends.where('name').startsWithIgnoreCase('da')
.or('shoeSize').below(40)
.each(function (friend) {
log(JSON.stringify(friend));
});
}
function logicalAND() {
log("db.friends.where('name').startsWithIgnoreCase('da').and(function (friend) { return friend.shoeSize > 40; })", "heading");
return db.friends.where('name').startsWithIgnoreCase('da')
.and(function (friend) { return friend.shoeSize > 40; })
.each(function (friend) {
log(JSON.stringify(friend));
});
}
function log(txt, clazz) {
var li = document.createElement('li');
li.textContent = txt.toString();
if (clazz) li.className = clazz;
document.getElementById('log').appendChild(li);
}
function runSamples() {
populateSomeData()
.then(equalsAnyOf)
.then(equalsIgnoreCase)
.then(startsWithIgnoreCase)
.then(logicalOR)
.then(logicalAND)
.catch(function (e) {
log(e, "error");
});
}
</script>
<style>
li {list-style-type:none;}
.error { color: red; }
.heading { color: #808080; margin-top: 12px;}
</style>
</head>
<body onload="runSamples();">
<ul id="log"></ul>
</body>
</html>
Points of Interest
All of these algorithms was invented while I was writing the indexedDB wrapper Dexie.js. My initial intention was just to create a decent wrapper library for a product I am developing.
The first name that I gave the wrapper library was StraightForwardDB / sfdb.js - since I wanted a minimalistic API - yet capable of all the versioning and robustness of the native API. Much of the API is written with inspiration from .NET's Linq2SQL. Along the way I learned the details of how indexedDB works and as I was learning about the IDBCursor interface, I stumbled on the posibility of case insensitive searching and set matching. To my surprise I was unable to find any other implementation of case insensitive search on the net. This inspired me to write this article and show what is possible to do with indexedDB.
History
March 16: Initial draft
March 26: Article published.
March 27: Updated sample files - bugfix in Dexie.min.js