server/posts: change reverse image search API

Add exact duplicates search; refactor to use classes over dictionaries
This commit is contained in:
rr- 2017-01-07 10:44:01 +01:00
parent 04b820c730
commit 4cb613a5c9
5 changed files with 58 additions and 31 deletions

28
API.md
View file

@ -1070,17 +1070,15 @@ data.
- **Output**
A list of [image search results](#image-search-result).
An [image search result](#image-search-result).
- **Errors**
- input file is not an image
- privileges are too low
- **Description**
Retrieves posts that look like the input image. Works only on images and
animations, i.e. does not work for videos and Flash movies.
Retrieves posts that look like the input image.
## Listing comments
- **Request**
@ -2152,14 +2150,15 @@ A result of reverse image search operation.
```json5
{
"results": [
"exactPost": <exact-post>,
"similarPosts": [
{
"dist": <distance>,
"post": <post>
"distance": <distance>,
"post": <similar-post>
},
{
"dist": <distance>,
"post": <post>
"distance": <distance>,
"post": <similar-post>
},
...
]
@ -2167,9 +2166,14 @@ A result of reverse image search operation.
```
**Field meaning**
- `<dist>`: distance from the original image (0..1). The lower this value is, the more similar the
post is.
- `<post>`: a [post resource](#post).
- `exact-post`: a [post resource](#post) that is exact byte-to-byte duplicate
of the input file. May be `null`.
- `<similar-post>`: a [post resource](#post) that isn't exact duplicate, but
visually resembles the input file. Works only on images and animations, i.e.
does not work for videos and Flash movies. For non-images and corrupted
images, this list is empty.
- `<distance>`: distance from the original image (0..1). The lower this value
is, the more similar the post is.
# Search

View file

@ -15,6 +15,7 @@ reports=no
disable=
# we're not java
missing-docstring,
broad-except,
# covered better by pycodestyle
bad-continuation,

View file

@ -212,11 +212,14 @@ def get_posts_by_image(ctx, _params=None):
auth.verify_privilege(ctx.user, 'posts:reverse_search')
content = ctx.get_file('content', required=True)
return {
'results': [
{
'dist': item['dist'],
'post': _serialize_post(ctx, item['post']),
}
for item in posts.search_by_image(content)
],
'exactPost':
_serialize_post(ctx, posts.search_by_image_exact(content)),
'similarPosts':
[
{
'distance': lookalike.distance,
'post': _serialize_post(ctx, lookalike.post),
}
for lookalike in posts.search_by_image(content)
],
}

View file

@ -1,7 +1,7 @@
import elasticsearch
import elasticsearch_dsl
from image_match.elasticsearch_driver import SignatureES
from szurubooru import config, errors
from szurubooru import config
# pylint: disable=invalid-name
@ -12,6 +12,13 @@ es = elasticsearch.Elasticsearch([{
session = SignatureES(es, index='szurubooru')
class Lookalike:
def __init__(self, score, distance, path):
self.score = score
self.distance = distance
self.path = path
def add_image(path, image_content):
if not path or not image_content:
return
@ -35,15 +42,14 @@ def search_by_image(image_content):
for result in session.search_image(
path=image_content, # sic
bytestream=True):
yield {
'score': result['score'],
'dist': result['dist'],
'path': result['path'],
}
yield Lookalike(
score=result['score'],
distance=result['dist'],
path=result['path'])
except elasticsearch.exceptions.ElasticsearchException:
raise
except Exception:
raise errors.SearchError('Error searching (invalid input?)')
yield from []
def purge():

View file

@ -57,6 +57,12 @@ class InvalidPostFlagError(errors.ValidationError):
pass
class PostLookalike(image_hash.Lookalike):
def __init__(self, score, distance, post):
super().__init__(score, distance, post.post_id)
self.post = post
SAFETY_MAP = {
db.Post.SAFETY_SAFE: 'safe',
db.Post.SAFETY_SKETCHY: 'sketchy',
@ -534,13 +540,20 @@ def merge_posts(source_post, target_post, replace_content):
update_post_content(target_post, content)
def search_by_image_exact(image_content):
checksum = util.get_sha1(image_content)
return db.session \
.query(db.Post) \
.filter(db.Post.checksum == checksum) \
.one_or_none()
def search_by_image(image_content):
for result in image_hash.search_by_image(image_content):
yield {
'score': result['score'],
'dist': result['dist'],
'post': get_post_by_id(result['path'])
}
yield PostLookalike(
score=result.score,
distance=result.distance,
post=get_post_by_id(result.path))
def populate_reverse_search():