How do I access multiple python/shelve database at the same time?

137 views Asked by At

I'm building a simple program that concurrently save data to different shelve database with multithreading but error occurs when 2 threads invoke shelve.open() (for different files):

import threading
import shelve
import time


def parallel_shelve(idx):
    print("thread {}: start".format(idx))
    with shelve.open("cache_shelve_{}".format(idx)) as db:
        time.sleep(4)
        db["0"] = 0
        db.close()
    print("thread {}: done".format(idx))


if __name__ == "__main__":
    threads = []
    for idx in range(2):
        threads += [threading.Thread(target=parallel_shelve, args=(idx,))]

    for idx in range(len(threads)):
        threads[idx].start()

    for idx in range(len(threads)):
        threads[idx].join()

Full log:

$ python multi_database.py
thread 0: start
thread 1: start
Exception in thread Thread-1:
Traceback (most recent call last):
  File "/home/blahblah/anaconda3/lib/python3.9/threading.py", line 980, in _bootstrap_inner
Exception in thread Thread-2:
Traceback (most recent call last):
  File "/home/blahblah/anaconda3/lib/python3.9/threading.py", line 980, in _bootstrap_inner
    self.run()
  File "/home/blahblah/anaconda3/lib/python3.9/threading.py", line 917, in run
    self._target(*self._args, **self._kwargs)
  File "/home/blahblah/Desktop/multi_database.py", line 8, in parallel_shelve
    with shelve.open("cache_shelve_{}".format(idx)) as db:
  File "/home/blahblah/anaconda3/lib/python3.9/shelve.py", line 243, in open
    self.run()
  File "/home/blahblah/anaconda3/lib/python3.9/threading.py", line 917, in run
    return DbfilenameShelf(filename, flag, protocol, writeback)
  File "/home/blahblah/anaconda3/lib/python3.9/shelve.py", line 227, in __init__
    Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback)
  File "/home/blahblah/anaconda3/lib/python3.9/dbm/__init__.py", line 95, in open
    return mod.open(file, flag, mode)
AttributeError: module 'dbm.gnu' has no attribute 'open'
    self._target(*self._args, **self._kwargs)
  File "/home/blahblah/Desktop/multi_database.py", line 8, in parallel_shelve
    with shelve.open("cache_shelve_{}".format(idx)) as db:
  File "/home/blahblah/anaconda3/lib/python3.9/shelve.py", line 243, in open
    return DbfilenameShelf(filename, flag, protocol, writeback)
  File "/home/blahblah/anaconda3/lib/python3.9/shelve.py", line 227, in __init__
    Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback)
  File "/home/blahblah/anaconda3/lib/python3.9/dbm/__init__.py", line 95, in open
    return mod.open(file, flag, mode)
AttributeError: module 'dbm.gnu' has no attribute 'open'
$ python --version
Python 3.9.13

How do I fix it to access different shelve files at the same time?

2

There are 2 answers

0
Michael Butscher On

I could only find out that dbm.open when called the first time tries to find an available dbm implementation. If this happens in parallel two times at once mysterious things happen (I just can't explain it) leading to the error.

A workaround is to trigger the implementation search before starting the threads with e.g.

import dbm
try:
    dbm.open(None)
except TypeError:
    pass
0
Rahn On

From experimentation, I found out that the bug is triggered only when you create a new database and works fine when reading/writing to an existing one. The easiest solution to me seems to add a lock when creating database:

import threading
import shelve
import time
from threading import Lock


def parallel_shelve(lock, idx):
    print("thread {}: start".format(idx))

    file_name = "cache_shelve_{}".format(idx)

    lock.acquire()
    with shelve.open(file_name) as db:
        db.clear()
    lock.release()

    with shelve.open(file_name) as db:
        time.sleep(1)
        db["0"] = 0

    print("thread {}: done".format(idx))


if __name__ == "__main__":
    threads = []
    lock = Lock()
    for idx in range(10):
        threads += [
            threading.Thread(target=parallel_shelve, args=(
                lock,
                idx,
            ))
        ]

    for idx in range(len(threads)):
        threads[idx].start()

    for idx in range(len(threads)):
        threads[idx].join()