Using the SQLite JSON1 and FTS5 Extensions with Python

Author's note:

export JQLITE="$HOME/bin/jqlite"
mkdir -p $JQLITE
cd $JQLITE

fossil clone http://www.sqlite.org/cgi/src sqlite.fossil
fossil open sqlite.fossil

curl 'https://www.sqlite.org/src/tarball/sqlite.tar.gz?ci=trunk' | tar xz
mv sqlite/* .

export CFLAGS="-DSQLITE_ENABLE_COLUMN_METADATA \
-DSQLITE_ENABLE_DBSTAT_VTAB \
-DSQLITE_ENABLE_FTS3 \
-DSQLITE_ENABLE_FTS3_PARENTHESIS \
-DSQLITE_ENABLE_FTS4 \
-DSQLITE_ENABLE_FTS5 \
-DSQLITE_ENABLE_JSON1 \
-DSQLITE_ENABLE_STAT4 \
-DSQLITE_ENABLE_UPDATE_DELETE_LIMIT \
-DSQLITE_SECURE_DELETE \
-DSQLITE_SOUNDEX \
-DSQLITE_TEMP_STORE=3 \
-O2 \
-fPIC"
LIBS="-lm" ./configure --prefix=$JQLITE --enable-static --enable-shared
make
make install

git clone https://github.com/ghaering/pysqlite
cd pysqlite/
cp ../sqlite3.c .
echo -e "library_dirs=$JQLITE/lib" >> setup.cfg
echo -e "include_dirs=$JQLITE/include" >> setup.cfg
LIBS="-lm" python setup.py build_static

>>> from pysqlite2 import dbapi2 as sqlite
>>> conn = sqlite.connect(':memory:')
>>> conn.execute('CREATE VIRTUAL TABLE testing USING fts5(data);')
<pysqlite2.dbapi2.Cursor object at 0x7ff7d0a2dc60>
>>> conn.execute('SELECT json(?)', (1337,)).fetchone()
(u'1337',)

cd $JQLITE
git clone https://github.com/rogerbinns/apsw
cd apsw
cp ../sqlite3{ext.h,.h,.c} .
echo -e "library_dirs=$JQLITE/lib" >> setup.cfg
echo -e "include_dirs=$JQLITE/include" >> setup.cfg
LIBS="-lm" python setup.py build

>>> import apsw
>>> conn = apsw.Connection(':memory:')
>>> cursor = conn.cursor()
>>> cursor.execute('CREATE VIRTUAL TABLE testing USING fts5(data);')
<apsw.Cursor at 0x7fcf6b17fa80>
>>> cursor.execute('SELECT json(?)', (1337,)).fetchone()
(u'1337',)

[{
   "title": "My List of Python and SQLite Resources",
   "url": "http://charlesleifer.com/blog/my-list-of-python-and-sqlite-resources/",
   "metadata": {"tags": ["python", "sqlite"]}
 },
 {
   "title": "Using SQLite4's LSM Storage Engine as a Stand-alone NoSQL Database with Python"
   "url": "http://charlesleifer.com/blog/using-sqlite4-s-lsm-storage-engine-as-a-stand-alone-nosql-database-with-python/",
   "metadata": {"tags": ["nosql", "python", "sqlite", "cython"]}
  },
  ...
]

>>> import json, urllib2
>>> fh = urllib2.urlopen('http://media.charlesleifer.com/blog/downloads/misc/blogs.json')
>>> data = json.loads(fh.read())
>>> data[0]
{u'metadata': {u'tags': [u'python', u'sqlite']},
 u'title': u'My List of Python and SQLite Resources',
 u'url': u'http://charlesleifer.com/blog/my-list-of-python-and-sqlite-resources/'}

>>> from pysqlite2 import dbapi2 as jqlite
>>> from peewee import *
>>> from playhouse.sqlite_ext import *
>>> class JQLiteDatabase(SqliteExtDatabase):
...     def _connect(self, database, **kwargs):
...         conn = jqlite.connect(database, **kwargs)
...         conn.isolation_level = None
...         self._add_conn_hooks(conn)
...         return conn
...
>>> db = JQLiteDatabase(':memory:')

>>> class Entry(Model):
...     data = TextField()
...     class Meta:
...         database = db
...
>>> Entry.create_table()
>>> with db.atomic():
...     for entry_json in data:
...         Entry.create(data=json.dumps(entry_json))
...

>>> title = fn.json_extract(Entry.data, '$.title')
>>> query = (Entry
...          .select(title.alias('title'))
...          .order_by(title)
...          .limit(5))
...
>>> [row for row in query.dicts()]
[{'title': u'A Tour of Tagging Schemas: Many-to-many, Bitmaps and More'},
 {'title': u'Alternative Redis-Like Databases with Python'},
 {'title': u'Building the SQLite FTS5 Search Extension'},
 {'title': u'Connor Thomas Leifer'},
 {'title': u'Extending SQLite with Python'}]

SELECT json_extract("t1"."data", '$.title') AS title
FROM "entry" AS t1
ORDER BY json_extract("t1"."data", '$.title')
LIMIT 5

>>> from peewee import Entity
>>> tags_src = fn.json_each(Entry.data, '$.metadata.tags').alias('tags')
>>> tags_ref = Entity('tags')

>>> query = (Entry
...          .select(title.alias('title'))
...          .from_(Entry, tags_src)
...          .where(tags_ref.value == 'sqlite')
...          .order_by(title))
...
>>> [row for row, in query.tuples()]
[u'Building the SQLite FTS5 Search Extension',
 u'Extending SQLite with Python',
 u'Meet Scout, a Search Server Powered by SQLite',
 u'My List of Python and SQLite Resources',
 u'Querying Tree Structures in SQLite using Python and the Transitive Closure Extension',
 u"Using SQLite4's LSM Storage Engine as a Stand-alone NoSQL Database with Python",
 u'Web-based SQLite Database Browser, powered by Flask and Peewee']

SELECT json_extract("t1"."data", '$.title') AS title
FROM
    "entry" AS t1,
    json_each("t1"."data", '$.metadata.tags') AS tags
WHERE ("tags"."value" = 'sqlite')
ORDER BY json_extract("t1"."data", '$.title')

>>> query = (Entry
...          .select(
...              title.alias('title'),
...              fn.group_concat(tags_ref.value, ', ').alias('tags'))
...          .from_(Entry, tags_src)
...          .group_by(title)
...          .limit(5))
...
>>> [row for row in query.tuples()]
[(u'A Tour of Tagging Schemas: Many-to-many, Bitmaps and More',
  u'peewee, sql, python'),
 (u'Alternative Redis-Like Databases with Python',
  u'python, walrus, redis, nosql'),
 (u'Building the SQLite FTS5 Search Extension',
  u'sqlite, search, python, peewee'),
 (u'Connor Thomas Leifer', u'thoughts'),
 (u'Extending SQLite with Python', u'peewee, python, sqlite')]

SELECT
    json_extract("t1"."data", '$.title') AS title,
    group_concat("tags"."value", ', ') AS tags
FROM
    "entry" AS t1,
    json_each("t1"."data", '$.metadata.tags') AS tags
GROUP BY json_extract("t1"."data", '$.title')
LIMIT 5

>>> tree = fn.json_tree(Entry.data, '$').alias('tree')
>>> parent = fn.json_tree(Entry.data, '$').alias('parent')

>>> tree_ref = Entity('tree')
>>> parent_ref = Entity('parent')

>>> query = (Entry
...          .select(title.alias('title'))
...          .from_(Entry, tree, parent)
...          .where(
...              (tree_ref.parent == parent_ref.id) &
...              (parent_ref.key == 'tags') &
...              (tree_ref.value == 'sqlite'))
...          .order_by(title))
...
>>> [title for title, in query.tuples()]
[u'Building the SQLite FTS5 Search Extension',
 u'Extending SQLite with Python',
 u'Meet Scout, a Search Server Powered by SQLite',
 u'My List of Python and SQLite Resources',
 u'Querying Tree Structures in SQLite using Python and the Transitive Closure Extension',
 u"Using SQLite4's LSM Storage Engine as a Stand-alone NoSQL Database with Python",
 u'Web-based SQLite Database Browser, powered by Flask and Peewee']

SELECT json_extract("t1"."data", '$.title') AS title
FROM
    "entry" AS t1,
    json_tree("t1"."data", '$') AS tree,
    json_tree("t1"."data", '$') AS parent
WHERE (
    ("tree"."parent" = "parent"."id") AND
    ("parent"."key" = 'tags') AND
    ("tree"."value" = 'sqlite'))
ORDER BY json_extract("t1"."data", '$.title')

class EntryIndex(FTS5Model):
    title = SearchField()
    url = SearchField(unindexed=True)

    class Meta:
        database = db
        options = {
            'prefix': [2, 3],
            'tokenize': 'porter unicode61', 
        }

EntryIndex.create_table()

CREATE VIRTUAL TABLE "entryindex" USING fts5 (
    "title" ,
    "url"  UNINDEXED,
    prefix='2,3',
    tokenize="porter unicode61")

title = fn.json_extract(Entry.data, '$.title').alias('title')
url = fn.json_extract(Entry.data, '$.url').alias('url')
query = Entry.select(title, url).dicts()
with db.atomic():
    for entry in query:
        EntryIndex.create(**entry)

>>> query = EntryIndex.search('sqlite').limit(3)
>>> for result in query:
...     print result.title

Extending SQLite with Python
Building the SQLite FTS5 Search Extension
My List of Python and SQLite Resources

SELECT "t1"."title", "t1"."url"
FROM "entryindex" AS t1
WHERE ("entryindex" MATCH 'sqlite')
ORDER BY rank

>>> query = EntryIndex.search('sqlite AND python', with_score=True)
>>> for result in query:
...     print round(result.score, 3), result.title

-1.259 Extending SQLite with Python
-1.059 My List of Python and SQLite Resources
-0.838 Querying Tree Structures in SQLite using Python and the Transitive Closure Extension

SELECT "t1"."title", "t1"."url", rank AS score
FROM "entryindex" AS t1
WHERE ("entryindex" MATCH 'sqlite AND python')
ORDER BY rank

Using the SQLite JSON1 and FTS5 Extensions with Python

Getting started

Compiling SQLite with json1 and fts5

Building pysqlite

Building apsw

Using the JSON extension

Populating the database

JSON functions

FTS5 with Python

Thanks for reading

Comments (3)

Charles | nov 18 2015, at 11:50am

Oisin Mulvihill | nov 18 2015, at 11:46am

Anonymous | sep 17 2015, at 05:00pm