view danboorufs.py @ 8:c93cfd58112e draft default tip

Get the tags from danbooru’s json format. Warning: breaks the compatibility with older databases!
author Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
date Sat, 25 May 2013 18:30:25 +0200
parents 09945ce42e28
children
line wrap: on
line source

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
#
# Copyright © 2012 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.


from __future__ import with_statement, unicode_literals

from errno import ENOENT, ENOTDIR
from sys import argv
from threading import Lock
from time import time
from functools import reduce

import os
import json

from fuse import FUSE, FuseOSError, Operations, LoggingMixIn


class Danbooru(LoggingMixIn, Operations):
    '''
    Represent a list of images as a filesystem tree, with nice tag filtering.
    '''

    def __init__(self, json_files, root, use_symlinks):
        '''
        Takes a list of files containing the tags. They have to be named as the
        image, with ".json" at the end.
        '''
        self.paths = {}
        self.files = {}
        self.tags = {}
        self.cache = {}

        start = time()

        for json_name in json_files:
            with open(json_name, 'r') as jsonfile:
                data = json.load(jsonfile)

            tags = [tag.replace('/', '∕') for tag in data['tag_string'].split()] + ['rating:' + data['rating']]
            name = '{}.{}'.format(data['id'], data['file_ext'])
            self.paths[name] = json_name[:-4] + data['file_ext']
            self.files[name] = tags
            for tag in tags:
                self.tags.setdefault(tag, []).append(name)

        print('[%d] Index done.' % (time() - start))

        self.root = root
        self.use_symlinks = use_symlinks
        self.rwlock = Lock()

    def _split_path(self, path):
        if path == '/':
            return (None, None)

        real_path = path[1:].split('/')

        # Remove the leading - of tag exclusion.
        path = [tag[1:] if tag[0] == '-' else tag for tag in real_path]

        for tag in path[:-1]:
            if tag not in self.tags:
                raise FuseOSError(ENOENT)

        if path[-1] in self.tags:
            return (real_path, None)

        if path[-1] not in self.paths:
            raise FuseOSError(ENOENT)

        return (real_path[:-1], self.paths[real_path[-1]])

    def access(self, path, mode):
        self._split_path(path)

    def getattr(self, path, file_handle=None):
        _, filename = self._split_path(path)
        path = filename if filename else self.root
        stat = os.lstat(path)
        stat = dict((key, getattr(stat, key)) for key in ('st_atime',
                        'st_ctime', 'st_gid', 'st_mode', 'st_mtime',
                        'st_nlink', 'st_size', 'st_uid'))

        if self.use_symlinks:
            # Those modes are respectively for a symlink and a directory.
            stat['st_mode'] = 0o120700 if filename else 0o40700
            if filename:
                stat['st_size'] = len(filename)

        return stat

    getxattr = None
    listxattr = None

    def open(self, path, flags):
        _, filename = self._split_path(path)
        return os.open(filename, flags)

    def read(self, path, size, offset, file_handle):
        with self.rwlock:
            os.lseek(file_handle, offset, 0)
            return os.read(file_handle, size)

    def readdir(self, path, file_handle):
        if path == '/':
            return (['.', '..'] + list(self.tags.keys())
                    + list(self.files.keys()))

        tags, filename = self._split_path(path)
        if filename:
            return FuseOSError(ENOTDIR)

        tags = set(tags)

        key = ' '.join(tags)
        if key in self.cache:
            return ['.', '..'] + self.cache[key]

        inclusion_tags = set(tag for tag in tags if tag[0] != '-')
        exclusion_tags = set(tag[1:] for tag in tags if tag[0] == '-')

        # Get the list of the files corresponding to those tags.
        files = reduce((lambda s, t: s.intersection(self.tags[t])),
                       inclusion_tags, set(self.files))
        files -= set([f for f in files
                      if exclusion_tags.intersection(self.files[f])])

        # Those next two steps are for useless tags removal.

        # Get the tags of those files.
        taglist = reduce((lambda s, f: s.union(self.files[f])), files, set())
        taglist -= tags

        # Remove the tags that can’t precise the file list anymore.
        remove = reduce((lambda s, f: s.intersection(self.files[f])), files,
                        taglist)
        taglist -= remove

        self.cache[key] = list(taglist) + list(files)
        return ['.', '..'] + self.cache[key]

    def readlink(self, path):
        _, filename = self._split_path(path)
        return filename

    def release(self, path, file_handle):
        return os.close(file_handle)

    def statfs(self, path):
        _, filename = self._split_path(path)
        path = filename if filename else self.root
        stv = os.statvfs(path)
        return dict((key, getattr(stv, key)) for key in ('f_bavail', 'f_bfree',
            'f_blocks', 'f_bsize', 'f_favail', 'f_ffree', 'f_files', 'f_flag',
            'f_frsize', 'f_namemax'))

    utimens = os.utime


def main(args):
    mountpoint = args.pop()

    if args[1] == '-n' or args[1] == '--no-symlinks':
        use_symlinks = False
        directories = args[2:]
    else:
        use_symlinks = True
        directories = args[1:]

    filelist = []
    start = time()
    for directory in directories:
        for (path, _, files) in os.walk(directory):
            filelist.extend(os.path.join(path, filename) for filename in files
                            if filename.endswith('.json'))
    print('[%d] Walk done.' % (time() - start))

    FUSE(Danbooru(filelist, os.path.dirname(mountpoint), use_symlinks),
         mountpoint, foreground=True)


if __name__ == '__main__':
    if len(argv) < 3:
        print('USAGE: %s' % argv[0], '[-n|--no-symlinks]',
              '<directory> [<directory> ...]', '<mountpoint>')
        exit(1)

    main(argv)