From 4cb613a5c912e16aedc8114df6bd519cae72e06b Mon Sep 17 00:00:00 2001 From: rr- Date: Sat, 7 Jan 2017 10:44:01 +0100 Subject: [PATCH] server/posts: change reverse image search API Add exact duplicates search; refactor to use classes over dictionaries --- API.md | 28 ++++++++++++++++------------ server/.pylintrc | 1 + server/szurubooru/api/post_api.py | 17 ++++++++++------- server/szurubooru/func/image_hash.py | 20 +++++++++++++------- server/szurubooru/func/posts.py | 23 ++++++++++++++++++----- 5 files changed, 58 insertions(+), 31 deletions(-) diff --git a/API.md b/API.md index 2b38172..43c6dd9 100644 --- a/API.md +++ b/API.md @@ -1070,17 +1070,15 @@ data. - **Output** - A list of [image search results](#image-search-result). + An [image search result](#image-search-result). - **Errors** - - input file is not an image - privileges are too low - **Description** - Retrieves posts that look like the input image. Works only on images and - animations, i.e. does not work for videos and Flash movies. + Retrieves posts that look like the input image. ## Listing comments - **Request** @@ -2152,14 +2150,15 @@ A result of reverse image search operation. ```json5 { - "results": [ + "exactPost": , + "similarPosts": [ { - "dist": , - "post": + "distance": , + "post": }, { - "dist": , - "post": + "distance": , + "post": }, ... ] @@ -2167,9 +2166,14 @@ A result of reverse image search operation. ``` **Field meaning** -- ``: distance from the original image (0..1). The lower this value is, the more similar the -post is. -- ``: a [post resource](#post). +- `exact-post`: a [post resource](#post) that is exact byte-to-byte duplicate + of the input file. May be `null`. +- ``: a [post resource](#post) that isn't exact duplicate, but + visually resembles the input file. Works only on images and animations, i.e. + does not work for videos and Flash movies. For non-images and corrupted + images, this list is empty. +- ``: distance from the original image (0..1). The lower this value + is, the more similar the post is. # Search diff --git a/server/.pylintrc b/server/.pylintrc index 2ff7cc8..846bac6 100644 --- a/server/.pylintrc +++ b/server/.pylintrc @@ -15,6 +15,7 @@ reports=no disable= # we're not java missing-docstring, + broad-except, # covered better by pycodestyle bad-continuation, diff --git a/server/szurubooru/api/post_api.py b/server/szurubooru/api/post_api.py index abc832e..5b38b18 100644 --- a/server/szurubooru/api/post_api.py +++ b/server/szurubooru/api/post_api.py @@ -212,11 +212,14 @@ def get_posts_by_image(ctx, _params=None): auth.verify_privilege(ctx.user, 'posts:reverse_search') content = ctx.get_file('content', required=True) return { - 'results': [ - { - 'dist': item['dist'], - 'post': _serialize_post(ctx, item['post']), - } - for item in posts.search_by_image(content) - ], + 'exactPost': + _serialize_post(ctx, posts.search_by_image_exact(content)), + 'similarPosts': + [ + { + 'distance': lookalike.distance, + 'post': _serialize_post(ctx, lookalike.post), + } + for lookalike in posts.search_by_image(content) + ], } diff --git a/server/szurubooru/func/image_hash.py b/server/szurubooru/func/image_hash.py index 00626d3..ebd96cd 100644 --- a/server/szurubooru/func/image_hash.py +++ b/server/szurubooru/func/image_hash.py @@ -1,7 +1,7 @@ import elasticsearch import elasticsearch_dsl from image_match.elasticsearch_driver import SignatureES -from szurubooru import config, errors +from szurubooru import config # pylint: disable=invalid-name @@ -12,6 +12,13 @@ es = elasticsearch.Elasticsearch([{ session = SignatureES(es, index='szurubooru') +class Lookalike: + def __init__(self, score, distance, path): + self.score = score + self.distance = distance + self.path = path + + def add_image(path, image_content): if not path or not image_content: return @@ -35,15 +42,14 @@ def search_by_image(image_content): for result in session.search_image( path=image_content, # sic bytestream=True): - yield { - 'score': result['score'], - 'dist': result['dist'], - 'path': result['path'], - } + yield Lookalike( + score=result['score'], + distance=result['dist'], + path=result['path']) except elasticsearch.exceptions.ElasticsearchException: raise except Exception: - raise errors.SearchError('Error searching (invalid input?)') + yield from [] def purge(): diff --git a/server/szurubooru/func/posts.py b/server/szurubooru/func/posts.py index d190b8d..755090d 100644 --- a/server/szurubooru/func/posts.py +++ b/server/szurubooru/func/posts.py @@ -57,6 +57,12 @@ class InvalidPostFlagError(errors.ValidationError): pass +class PostLookalike(image_hash.Lookalike): + def __init__(self, score, distance, post): + super().__init__(score, distance, post.post_id) + self.post = post + + SAFETY_MAP = { db.Post.SAFETY_SAFE: 'safe', db.Post.SAFETY_SKETCHY: 'sketchy', @@ -534,13 +540,20 @@ def merge_posts(source_post, target_post, replace_content): update_post_content(target_post, content) +def search_by_image_exact(image_content): + checksum = util.get_sha1(image_content) + return db.session \ + .query(db.Post) \ + .filter(db.Post.checksum == checksum) \ + .one_or_none() + + def search_by_image(image_content): for result in image_hash.search_by_image(image_content): - yield { - 'score': result['score'], - 'dist': result['dist'], - 'post': get_post_by_id(result['path']) - } + yield PostLookalike( + score=result.score, + distance=result.distance, + post=get_post_by_id(result.path)) def populate_reverse_search():