Mercurial > danboorufs
view danboorufs.py @ 8:c93cfd58112e draft default tip
Get the tags from danbooru’s json format. Warning: breaks the compatibility with older databases!
author | Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> |
---|---|
date | Sat, 25 May 2013 18:30:25 +0200 |
parents | 09945ce42e28 |
children |
line wrap: on
line source
#!/usr/bin/env python # -*- encoding: utf-8 -*- # # # Copyright © 2012 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from __future__ import with_statement, unicode_literals from errno import ENOENT, ENOTDIR from sys import argv from threading import Lock from time import time from functools import reduce import os import json from fuse import FUSE, FuseOSError, Operations, LoggingMixIn class Danbooru(LoggingMixIn, Operations): ''' Represent a list of images as a filesystem tree, with nice tag filtering. ''' def __init__(self, json_files, root, use_symlinks): ''' Takes a list of files containing the tags. They have to be named as the image, with ".json" at the end. ''' self.paths = {} self.files = {} self.tags = {} self.cache = {} start = time() for json_name in json_files: with open(json_name, 'r') as jsonfile: data = json.load(jsonfile) tags = [tag.replace('/', '∕') for tag in data['tag_string'].split()] + ['rating:' + data['rating']] name = '{}.{}'.format(data['id'], data['file_ext']) self.paths[name] = json_name[:-4] + data['file_ext'] self.files[name] = tags for tag in tags: self.tags.setdefault(tag, []).append(name) print('[%d] Index done.' % (time() - start)) self.root = root self.use_symlinks = use_symlinks self.rwlock = Lock() def _split_path(self, path): if path == '/': return (None, None) real_path = path[1:].split('/') # Remove the leading - of tag exclusion. path = [tag[1:] if tag[0] == '-' else tag for tag in real_path] for tag in path[:-1]: if tag not in self.tags: raise FuseOSError(ENOENT) if path[-1] in self.tags: return (real_path, None) if path[-1] not in self.paths: raise FuseOSError(ENOENT) return (real_path[:-1], self.paths[real_path[-1]]) def access(self, path, mode): self._split_path(path) def getattr(self, path, file_handle=None): _, filename = self._split_path(path) path = filename if filename else self.root stat = os.lstat(path) stat = dict((key, getattr(stat, key)) for key in ('st_atime', 'st_ctime', 'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid')) if self.use_symlinks: # Those modes are respectively for a symlink and a directory. stat['st_mode'] = 0o120700 if filename else 0o40700 if filename: stat['st_size'] = len(filename) return stat getxattr = None listxattr = None def open(self, path, flags): _, filename = self._split_path(path) return os.open(filename, flags) def read(self, path, size, offset, file_handle): with self.rwlock: os.lseek(file_handle, offset, 0) return os.read(file_handle, size) def readdir(self, path, file_handle): if path == '/': return (['.', '..'] + list(self.tags.keys()) + list(self.files.keys())) tags, filename = self._split_path(path) if filename: return FuseOSError(ENOTDIR) tags = set(tags) key = ' '.join(tags) if key in self.cache: return ['.', '..'] + self.cache[key] inclusion_tags = set(tag for tag in tags if tag[0] != '-') exclusion_tags = set(tag[1:] for tag in tags if tag[0] == '-') # Get the list of the files corresponding to those tags. files = reduce((lambda s, t: s.intersection(self.tags[t])), inclusion_tags, set(self.files)) files -= set([f for f in files if exclusion_tags.intersection(self.files[f])]) # Those next two steps are for useless tags removal. # Get the tags of those files. taglist = reduce((lambda s, f: s.union(self.files[f])), files, set()) taglist -= tags # Remove the tags that can’t precise the file list anymore. remove = reduce((lambda s, f: s.intersection(self.files[f])), files, taglist) taglist -= remove self.cache[key] = list(taglist) + list(files) return ['.', '..'] + self.cache[key] def readlink(self, path): _, filename = self._split_path(path) return filename def release(self, path, file_handle): return os.close(file_handle) def statfs(self, path): _, filename = self._split_path(path) path = filename if filename else self.root stv = os.statvfs(path) return dict((key, getattr(stv, key)) for key in ('f_bavail', 'f_bfree', 'f_blocks', 'f_bsize', 'f_favail', 'f_ffree', 'f_files', 'f_flag', 'f_frsize', 'f_namemax')) utimens = os.utime def main(args): mountpoint = args.pop() if args[1] == '-n' or args[1] == '--no-symlinks': use_symlinks = False directories = args[2:] else: use_symlinks = True directories = args[1:] filelist = [] start = time() for directory in directories: for (path, _, files) in os.walk(directory): filelist.extend(os.path.join(path, filename) for filename in files if filename.endswith('.json')) print('[%d] Walk done.' % (time() - start)) FUSE(Danbooru(filelist, os.path.dirname(mountpoint), use_symlinks), mountpoint, foreground=True) if __name__ == '__main__': if len(argv) < 3: print('USAGE: %s' % argv[0], '[-n|--no-symlinks]', '<directory> [<directory> ...]', '<mountpoint>') exit(1) main(argv)