server/posts: change reverse image search API
Add exact duplicates search; refactor to use classes over dictionaries
This commit is contained in:
parent
04b820c730
commit
4cb613a5c9
5 changed files with 58 additions and 31 deletions
28
API.md
28
API.md
|
@ -1070,17 +1070,15 @@ data.
|
|||
|
||||
- **Output**
|
||||
|
||||
A list of [image search results](#image-search-result).
|
||||
An [image search result](#image-search-result).
|
||||
|
||||
- **Errors**
|
||||
|
||||
- input file is not an image
|
||||
- privileges are too low
|
||||
|
||||
- **Description**
|
||||
|
||||
Retrieves posts that look like the input image. Works only on images and
|
||||
animations, i.e. does not work for videos and Flash movies.
|
||||
Retrieves posts that look like the input image.
|
||||
|
||||
## Listing comments
|
||||
- **Request**
|
||||
|
@ -2152,14 +2150,15 @@ A result of reverse image search operation.
|
|||
|
||||
```json5
|
||||
{
|
||||
"results": [
|
||||
"exactPost": <exact-post>,
|
||||
"similarPosts": [
|
||||
{
|
||||
"dist": <distance>,
|
||||
"post": <post>
|
||||
"distance": <distance>,
|
||||
"post": <similar-post>
|
||||
},
|
||||
{
|
||||
"dist": <distance>,
|
||||
"post": <post>
|
||||
"distance": <distance>,
|
||||
"post": <similar-post>
|
||||
},
|
||||
...
|
||||
]
|
||||
|
@ -2167,9 +2166,14 @@ A result of reverse image search operation.
|
|||
```
|
||||
|
||||
**Field meaning**
|
||||
- `<dist>`: distance from the original image (0..1). The lower this value is, the more similar the
|
||||
post is.
|
||||
- `<post>`: a [post resource](#post).
|
||||
- `exact-post`: a [post resource](#post) that is exact byte-to-byte duplicate
|
||||
of the input file. May be `null`.
|
||||
- `<similar-post>`: a [post resource](#post) that isn't exact duplicate, but
|
||||
visually resembles the input file. Works only on images and animations, i.e.
|
||||
does not work for videos and Flash movies. For non-images and corrupted
|
||||
images, this list is empty.
|
||||
- `<distance>`: distance from the original image (0..1). The lower this value
|
||||
is, the more similar the post is.
|
||||
|
||||
# Search
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@ reports=no
|
|||
disable=
|
||||
# we're not java
|
||||
missing-docstring,
|
||||
broad-except,
|
||||
|
||||
# covered better by pycodestyle
|
||||
bad-continuation,
|
||||
|
|
|
@ -212,11 +212,14 @@ def get_posts_by_image(ctx, _params=None):
|
|||
auth.verify_privilege(ctx.user, 'posts:reverse_search')
|
||||
content = ctx.get_file('content', required=True)
|
||||
return {
|
||||
'results': [
|
||||
{
|
||||
'dist': item['dist'],
|
||||
'post': _serialize_post(ctx, item['post']),
|
||||
}
|
||||
for item in posts.search_by_image(content)
|
||||
],
|
||||
'exactPost':
|
||||
_serialize_post(ctx, posts.search_by_image_exact(content)),
|
||||
'similarPosts':
|
||||
[
|
||||
{
|
||||
'distance': lookalike.distance,
|
||||
'post': _serialize_post(ctx, lookalike.post),
|
||||
}
|
||||
for lookalike in posts.search_by_image(content)
|
||||
],
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import elasticsearch
|
||||
import elasticsearch_dsl
|
||||
from image_match.elasticsearch_driver import SignatureES
|
||||
from szurubooru import config, errors
|
||||
from szurubooru import config
|
||||
|
||||
|
||||
# pylint: disable=invalid-name
|
||||
|
@ -12,6 +12,13 @@ es = elasticsearch.Elasticsearch([{
|
|||
session = SignatureES(es, index='szurubooru')
|
||||
|
||||
|
||||
class Lookalike:
|
||||
def __init__(self, score, distance, path):
|
||||
self.score = score
|
||||
self.distance = distance
|
||||
self.path = path
|
||||
|
||||
|
||||
def add_image(path, image_content):
|
||||
if not path or not image_content:
|
||||
return
|
||||
|
@ -35,15 +42,14 @@ def search_by_image(image_content):
|
|||
for result in session.search_image(
|
||||
path=image_content, # sic
|
||||
bytestream=True):
|
||||
yield {
|
||||
'score': result['score'],
|
||||
'dist': result['dist'],
|
||||
'path': result['path'],
|
||||
}
|
||||
yield Lookalike(
|
||||
score=result['score'],
|
||||
distance=result['dist'],
|
||||
path=result['path'])
|
||||
except elasticsearch.exceptions.ElasticsearchException:
|
||||
raise
|
||||
except Exception:
|
||||
raise errors.SearchError('Error searching (invalid input?)')
|
||||
yield from []
|
||||
|
||||
|
||||
def purge():
|
||||
|
|
|
@ -57,6 +57,12 @@ class InvalidPostFlagError(errors.ValidationError):
|
|||
pass
|
||||
|
||||
|
||||
class PostLookalike(image_hash.Lookalike):
|
||||
def __init__(self, score, distance, post):
|
||||
super().__init__(score, distance, post.post_id)
|
||||
self.post = post
|
||||
|
||||
|
||||
SAFETY_MAP = {
|
||||
db.Post.SAFETY_SAFE: 'safe',
|
||||
db.Post.SAFETY_SKETCHY: 'sketchy',
|
||||
|
@ -534,13 +540,20 @@ def merge_posts(source_post, target_post, replace_content):
|
|||
update_post_content(target_post, content)
|
||||
|
||||
|
||||
def search_by_image_exact(image_content):
|
||||
checksum = util.get_sha1(image_content)
|
||||
return db.session \
|
||||
.query(db.Post) \
|
||||
.filter(db.Post.checksum == checksum) \
|
||||
.one_or_none()
|
||||
|
||||
|
||||
def search_by_image(image_content):
|
||||
for result in image_hash.search_by_image(image_content):
|
||||
yield {
|
||||
'score': result['score'],
|
||||
'dist': result['dist'],
|
||||
'post': get_post_by_id(result['path'])
|
||||
}
|
||||
yield PostLookalike(
|
||||
score=result.score,
|
||||
distance=result.distance,
|
||||
post=get_post_by_id(result.path))
|
||||
|
||||
|
||||
def populate_reverse_search():
|
||||
|
|
Loading…
Reference in a new issue