Python: Cache Functions to Disk


If you have a long-running or expensive function that ought not be re-invoked with the same arguments, then cache its output to disk.

Note: sqlite3 comes with python. This code has no dependencies.

import hashlib
import json
import sqlite3

def memoize_to_sqlite(filename: str = "cache.db"):
    """
    Memoization decorator that caches the output of a method in a SQLite
    database.
    """
    db_conn = sqlite3.connect(filename)
    db_conn.execute(
        "CREATE TABLE IF NOT EXISTS cache (hash TEXT PRIMARY KEY, result TEXT)"
    )

    def memoize(func):
        def wrapped(*args):
            # Compute the hash of the <function name>:<argument>
            xs = f"{func.__name__}:{repr(tuple(args))}".encode("utf-8")
            arg_hash = hashlib.sha256(xs).hexdigest()

            # Check if the result is already cached
            cursor = db_conn.cursor()
            cursor.execute(
                "SELECT result FROM cache WHERE hash = ?", (arg_hash,)
            )
            row = cursor.fetchone()
            if row is not None:
                print(f"Cached result found for {arg_hash}. Returning it.")
                return json.loads(row[0])

            # Compute the result and cache it
            result = func(*args)
            cursor.execute(
                "INSERT INTO cache (hash, result) VALUES (?, ?)",
                (arg_hash, json.dumps(result))
            )
            db_conn.commit()

            return result

        return wrapped

    return memoize
Usage

Prepend it to any function.

@memoize_to_sqlite(filename="cache.db")
def expensive_method(s: str, n: int):
    # Long running slow stuff here
    ...
Caveats

Ensure your arguments have a stable __repr__.

Ensure your output can be serialized as json.

Back to posts
TwitterUdemyMy Twitter ProfileMy Instagram

Copyright © Kevin Katz 2023

Privacy