view danboorufs.py @ 6:2c81cc41de2d draft

Allow only a base directory for the tags, which will be recursively walked to find the actual tags files.
author Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
date Sun, 02 Dec 2012 19:15:17 +0100
parents a422e75bf464
children 09945ce42e28
line wrap: on
line source

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
#
# Copyright © 2012 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.


from __future__ import with_statement, unicode_literals

from errno import ENOENT, ENOTDIR
from sys import argv
from threading import Lock
from time import time
from functools import reduce

import os

from fuse import FUSE, FuseOSError, Operations, LoggingMixIn


class Danbooru(LoggingMixIn, Operations):
    '''
    Represent a list of images as a filesystem tree, with nice tag filtering.
    '''

    def __init__(self, tagfiles, root, use_symlinks):
        '''
        Takes a list of files containing the tags. They have to be named as the
        image, with ".tags" at the end.
        '''
        self.paths = {}
        self.files = {}
        self.tags = {}
        self.cache = {}

        start = time()

        for name in tagfiles:
            filename = name.replace('.tags', '')
            basename = os.path.basename(filename)
            self.paths[basename] = filename
            tags = []
            self.files[basename] = tags
            with open(name, 'rb') as tagfile:
                for line in tagfile:
                    for tag in line.split():
                        try:
                            tag = tag.decode('UTF-8')
                        except UnicodeDecodeError:
                            continue
                        except AttributeError:
                            pass
                        tag = tag.replace('/', '�') #XXX
                        tags.append(tag)
                        self.tags.setdefault(tag, []).append(basename)

        print('[%d] Index done.' % (time() - start))

        self.root = root
        self.use_symlinks = use_symlinks
        self.rwlock = Lock()

    def _split_path(self, path):
        if path == '/':
            return (None, None)

        real_path = path[1:].split('/')

        # Remove the leading - of tag exclusion.
        path = [tag[1:] if tag[0] == '-' else tag for tag in real_path]

        for tag in path[:-1]:
            if tag not in self.tags:
                raise FuseOSError(ENOENT)

        if path[-1] in self.tags:
            return (real_path, None)

        if path[-1] not in self.paths:
            raise FuseOSError(ENOENT)

        return (real_path[:-1], self.paths[real_path[-1]])

    def access(self, path, mode):
        self._split_path(path)

    def getattr(self, path, file_handle=None):
        _, filename = self._split_path(path)
        path = filename if filename else self.root
        stat = os.lstat(path)
        stat = dict((key, getattr(stat, key)) for key in ('st_atime',
                        'st_ctime', 'st_gid', 'st_mode', 'st_mtime',
                        'st_nlink', 'st_size', 'st_uid'))

        if self.use_symlinks:
            # Those modes are respectively for a symlink and a directory.
            stat['st_mode'] = 0o120700 if filename else 0o40700
            if filename:
                stat['st_size'] = len(filename)

        return stat

    getxattr = None
    listxattr = None

    def open(self, path, flags):
        _, filename = self._split_path(path)
        return os.open(filename, flags)

    def read(self, path, size, offset, file_handle):
        with self.rwlock:
            os.lseek(file_handle, offset, 0)
            return os.read(file_handle, size)

    def readdir(self, path, file_handle):
        if path == '/':
            return (['.', '..'] + list(self.tags.keys())
                    + list(self.files.keys()))

        tags, filename = self._split_path(path)
        if filename:
            return FuseOSError(ENOTDIR)

        tags = set(tags)

        key = ' '.join(tags)
        if key in self.cache:
            return ['.', '..'] + self.cache[key]

        inclusion_tags = set(tag for tag in tags if tag[0] != '-')
        exclusion_tags = set(tag[1:] for tag in tags if tag[0] == '-')

        # Get the list of the files corresponding to those tags.
        files = reduce((lambda s, t: s.intersection(self.tags[t])),
                       inclusion_tags, set(self.files))
        files -= set([f for f in files
                      if exclusion_tags.intersection(self.files[f])])

        # Those next two steps are for useless tags removal.

        # Get the tags of those files.
        taglist = reduce((lambda s, f: s.union(self.files[f])), files, set())
        taglist -= tags

        # Remove the tags that can’t precise the file list anymore.
        remove = reduce((lambda s, f: s.intersection(self.files[f])), files,
                        taglist)
        taglist -= remove

        self.cache[key] = list(taglist) + list(files)
        return ['.', '..'] + self.cache[key]

    def readlink(self, path):
        _, filename = self._split_path(path)
        return filename

    def release(self, path, file_handle):
        return os.close(file_handle)

    def statfs(self, path):
        _, filename = self._split_path(path)
        path = filename if filename else self.root
        stv = os.statvfs(path)
        return dict((key, getattr(stv, key)) for key in ('f_bavail', 'f_bfree',
            'f_blocks', 'f_bsize', 'f_favail', 'f_ffree', 'f_files', 'f_flag',
            'f_frsize', 'f_namemax'))

    utimens = os.utime


def main(args):
    mountpoint = args.pop()

    if args[1] == '-n' or args[1] == '--no-symlinks':
        use_symlinks = False
        directory = args[2]
    else:
        use_symlinks = True
        directory = args[1]

    filelist = []
    start = time()
    for (path, _, files) in os.walk(directory):
        filelist.extend(os.path.join(path, filename) for filename in files
                        if filename.endswith('.tags'))
    print('[%d] Walk done.' % (time() - start))

    FUSE(Danbooru(filelist, os.path.dirname(mountpoint), use_symlinks),
         mountpoint, foreground=True)


if __name__ == '__main__':
    if len(argv) < 3:
        print('USAGE: %s' % argv[0], '[-n|--no-symlinks]',
              '<tags directory>', '<mountpoint>')
        exit(1)

    main(argv)