Spaces:

Omnitopia
/

gradio-demo

Running

App Files Files Community

gradio-demo / gradio-env /lib /python3.11 /site-packages /IPython /external /pickleshare.py

Omnitopia

Upload folder using huggingface_hub

658460c verified 2 months ago

raw

history blame contribute delete

9.97 kB

	#!/usr/bin/env python

	"""PickleShare - a small 'shelve' like datastore with concurrency support

	Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike
	shelve, many processes can access the database simultaneously. Changing a
	value in database is immediately visible to other processes accessing the
	same database.

	Concurrency is possible because the values are stored in separate files. Hence
	the "database" is a directory where all files are governed by PickleShare.

	Example usage::

	from pickleshare import *
	db = PickleShareDB('~/testpickleshare')
	db.clear()
	print "Should be empty:",db.items()
	db['hello'] = 15
	db['aku ankka'] = [1,2,313]
	db['paths/are/ok/key'] = [1,(5,46)]
	print db.keys()
	del db['aku ankka']

	This module is certainly not ZODB, but can be used for low-load
	(non-mission-critical) situations where tiny code size trumps the
	advanced features of a "real" object database.

	Installation guide: pip install pickleshare

	Author: Ville Vainio <vivainio@gmail.com>
	License: MIT open source license.

	"""

	from __future__ import print_function


	__version__ = "0.7.5"

	try:
	from pathlib import Path
	except ImportError:
	# Python 2 backport
	from pathlib2 import Path

	import os, stat, time

	try:
	import collections.abc as collections_abc
	except ImportError:
	import collections as collections_abc
	try:
	import cPickle as pickle
	except ImportError:
	import pickle
	import errno
	import sys

	if sys.version_info[0] >= 3:
	string_types = (str,)
	else:
	string_types = (str, unicode)


	def gethashfile(key):
	return ("%02x" % abs(hash(key) % 256))[-2:]


	_sentinel = object()


	class PickleShareDB(collections_abc.MutableMapping):
	"""The main 'connection' object for PickleShare database"""

	def __init__(self, root):
	"""Return a db object that will manage the specied directory"""
	if not isinstance(root, string_types):
	root = str(root)
	root = os.path.abspath(os.path.expanduser(root))
	self.root = Path(root)
	if not self.root.is_dir():
	# catching the exception is necessary if multiple processes are concurrently trying to create a folder
	# exists_ok keyword argument of mkdir does the same but only from Python 3.5
	try:
	self.root.mkdir(parents=True)
	except OSError as e:
	if e.errno != errno.EEXIST:
	raise
	# cache has { 'key' : (obj, orig_mod_time) }
	self.cache = {}

	def __getitem__(self, key):
	"""db['key'] reading"""
	fil = self.root / key
	try:
	mtime = fil.stat()[stat.ST_MTIME]
	except OSError:
	raise KeyError(key)

	if fil in self.cache and mtime == self.cache[fil][1]:
	return self.cache[fil][0]
	try:
	# The cached item has expired, need to read
	with fil.open("rb") as f:
	obj = pickle.loads(f.read())
	except:
	raise KeyError(key)

	self.cache[fil] = (obj, mtime)
	return obj

	def __setitem__(self, key, value):
	"""db['key'] = 5"""
	fil = self.root / key
	parent = fil.parent
	if parent and not parent.is_dir():
	parent.mkdir(parents=True)
	# We specify protocol 2, so that we can mostly go between Python 2
	# and Python 3. We can upgrade to protocol 3 when Python 2 is obsolete.
	with fil.open("wb") as f:
	pickle.dump(value, f, protocol=2)
	try:
	self.cache[fil] = (value, fil.stat().st_mtime)
	except OSError as e:
	if e.errno != errno.ENOENT:
	raise

	def hset(self, hashroot, key, value):
	"""hashed set"""
	hroot = self.root / hashroot
	if not hroot.is_dir():
	hroot.mkdir()
	hfile = hroot / gethashfile(key)
	d = self.get(hfile, {})
	d.update({key: value})
	self[hfile] = d

	def hget(self, hashroot, key, default=_sentinel, fast_only=True):
	"""hashed get"""
	hroot = self.root / hashroot
	hfile = hroot / gethashfile(key)

	d = self.get(hfile, _sentinel)
	# print "got dict",d,"from",hfile
	if d is _sentinel:
	if fast_only:
	if default is _sentinel:
	raise KeyError(key)

	return default

	# slow mode ok, works even after hcompress()
	d = self.hdict(hashroot)

	return d.get(key, default)

	def hdict(self, hashroot):
	"""Get all data contained in hashed category 'hashroot' as dict"""
	hfiles = self.keys(hashroot + "/*")
	hfiles.sort()
	last = len(hfiles) and hfiles[-1] or ""
	if last.endswith("xx"):
	# print "using xx"
	hfiles = [last] + hfiles[:-1]

	all = {}

	for f in hfiles:
	# print "using",f
	try:
	all.update(self[f])
	except KeyError:
	print("Corrupt", f, "deleted - hset is not threadsafe!")
	del self[f]

	self.uncache(f)

	return all

	def hcompress(self, hashroot):
	"""Compress category 'hashroot', so hset is fast again

	hget will fail if fast_only is True for compressed items (that were
	hset before hcompress).

	"""
	hfiles = self.keys(hashroot + "/*")
	all = {}
	for f in hfiles:
	# print "using",f
	all.update(self[f])
	self.uncache(f)

	self[hashroot + "/xx"] = all
	for f in hfiles:
	p = self.root / f
	if p.name == "xx":
	continue
	p.unlink()

	def __delitem__(self, key):
	"""del db["key"]"""
	fil = self.root / key
	self.cache.pop(fil, None)
	try:
	fil.unlink()
	except OSError:
	# notfound and permission denied are ok - we
	# lost, the other process wins the conflict
	pass

	def _normalized(self, p):
	"""Make a key suitable for user's eyes"""
	return str(p.relative_to(self.root)).replace("\\", "/")

	def keys(self, globpat=None):
	"""All keys in DB, or all keys matching a glob"""

	if globpat is None:
	files = self.root.rglob("*")
	else:
	files = self.root.glob(globpat)
	return [self._normalized(p) for p in files if p.is_file()]

	def __iter__(self):
	return iter(self.keys())

	def __len__(self):
	return len(self.keys())

	def uncache(self, *items):
	"""Removes all, or specified items from cache

	Use this after reading a large amount of large objects
	to free up memory, when you won't be needing the objects
	for a while.

	"""
	if not items:
	self.cache = {}
	for it in items:
	self.cache.pop(it, None)

	def waitget(self, key, maxwaittime=60):
	"""Wait (poll) for a key to get a value

	Will wait for `maxwaittime` seconds before raising a KeyError.
	The call exits normally if the `key` field in db gets a value
	within the timeout period.

	Use this for synchronizing different processes or for ensuring
	that an unfortunately timed "db['key'] = newvalue" operation
	in another process (which causes all 'get' operation to cause a
	KeyError for the duration of pickling) won't screw up your program
	logic.
	"""

	wtimes = [0.2] * 3 + [0.5] * 2 + [1]
	tries = 0
	waited = 0
	while 1:
	try:
	val = self[key]
	return val
	except KeyError:
	pass

	if waited > maxwaittime:
	raise KeyError(key)

	time.sleep(wtimes[tries])
	waited += wtimes[tries]
	if tries < len(wtimes) - 1:
	tries += 1

	def getlink(self, folder):
	"""Get a convenient link for accessing items"""
	return PickleShareLink(self, folder)

	def __repr__(self):
	return "PickleShareDB('%s')" % self.root


	class PickleShareLink:
	"""A shortdand for accessing nested PickleShare data conveniently.

	Created through PickleShareDB.getlink(), example::

	lnk = db.getlink('myobjects/test')
	lnk.foo = 2
	lnk.bar = lnk.foo + 5

	"""

	def __init__(self, db, keydir):
	self.__dict__.update(locals())

	def __getattr__(self, key):
	return self.__dict__["db"][self.__dict__["keydir"] + "/" + key]

	def __setattr__(self, key, val):
	self.db[self.keydir + "/" + key] = val

	def __repr__(self):
	db = self.__dict__["db"]
	keys = db.keys(self.__dict__["keydir"] + "/*")
	return "<PickleShareLink '%s': %s>" % (
	self.__dict__["keydir"],
	";".join([Path(k).basename() for k in keys]),
	)


	def main():
	import textwrap

	usage = textwrap.dedent(
	"""\
	pickleshare - manage PickleShare databases

	Usage:

	pickleshare dump /path/to/db > dump.txt
	pickleshare load /path/to/db < dump.txt
	pickleshare test /path/to/db
	"""
	)
	DB = PickleShareDB
	import sys

	if len(sys.argv) < 2:
	print(usage)
	return

	cmd = sys.argv[1]
	args = sys.argv[2:]
	if cmd == "dump":
	if not args:
	args = ["."]
	db = DB(args[0])
	import pprint

	pprint.pprint(db.items())
	elif cmd == "load":
	cont = sys.stdin.read()
	db = DB(args[0])
	data = eval(cont)
	db.clear()
	for k, v in db.items():
	db[k] = v
	elif cmd == "testwait":
	db = DB(args[0])
	db.clear()
	print(db.waitget("250"))
	elif cmd == "test":
	test()
	stress()


	if __name__ == "__main__":
	main()