mirror of
https://git.youjo.love/youjo/youjo-be.git
synced 2025-01-18 18:36:01 +01:00
Add translation module for Argos Translate (#351)
Argos Translate is a Python module for translation and can be used as a command line tool. This is also the engine for LibreTranslate, for which we already have a module. Here we can use the engine directly from our server without doing requests to a third party or having to install our own LibreTranslate webservice (obviously you do have to install Argos Translate). One thing that's currently still missing from Argos Translate is auto-detection of languages (see <https://github.com/argosopentech/argos-translate/issues/9>). For now, when no source language is provided, we just return the text unchanged, supposedly translated from the target language. That way you get a near immediate response in pleroma-fe when clicking Translate, after which you can select the source language from a dropdown. Argos Translate also doesn't seem to handle html very well. Therefore we give admins the option to strip the html before translating. I made this an option because I'm unsure if/how this will change in the future. Co-authored-by: ilja <git@ilja.space> Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/351 Co-authored-by: ilja <akkoma.dev@ilja.space> Co-committed-by: ilja <akkoma.dev@ilja.space>
This commit is contained in:
parent
233c4bb3ba
commit
c092fc9fd6
6 changed files with 241 additions and 2 deletions
|
@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|||
### Added
|
||||
- Prometheus metrics exporting from `/api/v1/akkoma/metrics`
|
||||
- Ability to alter http pool size
|
||||
- Translation of statuses via ArgosTranslate
|
||||
|
||||
### Removed
|
||||
- Non-finch HTTP adapters
|
||||
|
|
|
@ -882,6 +882,11 @@ config :pleroma, :libre_translate,
|
|||
url: "http://127.0.0.1:5000",
|
||||
api_key: nil
|
||||
|
||||
config :pleroma, :argos_translate,
|
||||
command_argos_translate: "argos-translate",
|
||||
command_argospm: "argospm",
|
||||
strip_html: true
|
||||
|
||||
# Import environment specific config. This must remain at the bottom
|
||||
# of this file so it overrides the configuration defined above.
|
||||
import_config "#{Mix.env()}.exs"
|
||||
|
|
|
@ -3442,5 +3442,30 @@ config :pleroma, :config_description, [
|
|||
suggestion: [nil]
|
||||
}
|
||||
]
|
||||
},
|
||||
%{
|
||||
group: :pleroma,
|
||||
key: :argos_translate,
|
||||
type: :group,
|
||||
description: "ArgosTranslate Settings.",
|
||||
children: [
|
||||
%{
|
||||
key: :command_argos_translate,
|
||||
type: :string,
|
||||
description: "command for `argos-translate`. Can be the command if it's in your PATH, or the full path to the file.",
|
||||
suggestion: ["argos-translate"]
|
||||
},
|
||||
%{
|
||||
key: :command_argospm,
|
||||
type: :string,
|
||||
description: "command for `argospm`. Can be the command if it's in your PATH, or the full path to the file.",
|
||||
suggestion: ["argospm"]
|
||||
},
|
||||
%{
|
||||
key: :strip_html,
|
||||
type: :boolean,
|
||||
description: "Strip html from the post before translating it."
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
|
@ -1119,7 +1119,7 @@ Each job has these settings:
|
|||
### Translation Settings
|
||||
|
||||
Settings to automatically translate statuses for end users. Currently supported
|
||||
translation services are DeepL and LibreTranslate.
|
||||
translation services are DeepL and LibreTranslate. The supported command line tool is [Argos Translate](https://github.com/argosopentech/argos-translate).
|
||||
|
||||
Translations are available at `/api/v1/statuses/:id/translations/:language`, where
|
||||
`language` is the target language code (e.g `en`)
|
||||
|
@ -1128,7 +1128,7 @@ Translations are available at `/api/v1/statuses/:id/translations/:language`, whe
|
|||
|
||||
- `:enabled` - enables translation
|
||||
- `:module` - Sets module to be used
|
||||
- Either `Pleroma.Akkoma.Translators.DeepL` or `Pleroma.Akkoma.Translators.LibreTranslate`
|
||||
- Either `Pleroma.Akkoma.Translators.DeepL`, `Pleroma.Akkoma.Translators.LibreTranslate`, or `Pleroma.Akkoma.Translators.ArgosTranslate`
|
||||
|
||||
### `:deepl`
|
||||
|
||||
|
@ -1140,3 +1140,9 @@ Translations are available at `/api/v1/statuses/:id/translations/:language`, whe
|
|||
|
||||
- `:url` - URL of LibreTranslate instance
|
||||
- `:api_key` - API key for LibreTranslate
|
||||
|
||||
### `:argos_translate`
|
||||
|
||||
- `:command_argos_translate` - command for `argos-translate`. Can be the command if it's in your PATH, or the full path to the file (default: `argos-translate`).
|
||||
- `:command_argospm` - command for `argospm`. Can be the command if it's in your PATH, or the full path to the file (default: `argospm`).
|
||||
- `:strip_html` - Strip html from the post before translating it (default: `true`).
|
||||
|
|
109
lib/pleroma/akkoma/translators/argos_translate.ex
Normal file
109
lib/pleroma/akkoma/translators/argos_translate.ex
Normal file
|
@ -0,0 +1,109 @@
|
|||
defmodule Pleroma.Akkoma.Translators.ArgosTranslate do
|
||||
@behaviour Pleroma.Akkoma.Translator
|
||||
|
||||
alias Pleroma.Config
|
||||
|
||||
defp argos_translate do
|
||||
Config.get([:argos_translate, :command_argos_translate])
|
||||
end
|
||||
|
||||
defp argospm do
|
||||
Config.get([:argos_translate, :command_argospm])
|
||||
end
|
||||
|
||||
defp strip_html? do
|
||||
Config.get([:argos_translate, :strip_html])
|
||||
end
|
||||
|
||||
defp safe_languages() do
|
||||
try do
|
||||
System.cmd(argospm(), ["list"], stderr_to_stdout: true, parallelism: true)
|
||||
rescue
|
||||
_ -> {"Command #{argospm()} not found", 1}
|
||||
end
|
||||
end
|
||||
|
||||
@impl Pleroma.Akkoma.Translator
|
||||
def languages do
|
||||
with {response, 0} <- safe_languages() do
|
||||
langs =
|
||||
response
|
||||
|> String.split("\n", trim: true)
|
||||
|> Enum.map(fn
|
||||
"translate-" <> l -> String.split(l, "_")
|
||||
end)
|
||||
|
||||
source_langs =
|
||||
langs
|
||||
|> Enum.map(fn [l, _] -> %{code: l, name: l} end)
|
||||
|> Enum.uniq()
|
||||
|
||||
dest_langs =
|
||||
langs
|
||||
|> Enum.map(fn [_, l] -> %{code: l, name: l} end)
|
||||
|> Enum.uniq()
|
||||
|
||||
{:ok, source_langs, dest_langs}
|
||||
else
|
||||
{response, _} -> {:error, "ArgosTranslate failed to fetch languages (#{response})"}
|
||||
end
|
||||
end
|
||||
|
||||
defp safe_translate(string, from_language, to_language) do
|
||||
try do
|
||||
System.cmd(
|
||||
argos_translate(),
|
||||
["--from-lang", from_language, "--to-lang", to_language, string],
|
||||
stderr_to_stdout: true,
|
||||
parallelism: true
|
||||
)
|
||||
rescue
|
||||
_ -> {"Command #{argos_translate()} not found", 1}
|
||||
end
|
||||
end
|
||||
|
||||
defp clean_string(string, true) do
|
||||
string
|
||||
|> String.replace("<p>", "\n")
|
||||
|> String.replace("</p>", "\n")
|
||||
|> String.replace("<br>", "\n")
|
||||
|> String.replace("<br/>", "\n")
|
||||
|> String.replace("<li>", "\n")
|
||||
|> Pleroma.HTML.strip_tags()
|
||||
|> HtmlEntities.decode()
|
||||
end
|
||||
|
||||
defp clean_string(string, _), do: string
|
||||
|
||||
defp htmlify_response(string, true) do
|
||||
string
|
||||
|> HtmlEntities.encode()
|
||||
|> String.replace("\n", "<br/>")
|
||||
end
|
||||
|
||||
defp htmlify_response(string, _), do: string
|
||||
|
||||
@impl Pleroma.Akkoma.Translator
|
||||
def translate(string, nil, to_language) do
|
||||
# Akkoma's Pleroma-fe expects us to detect the source language automatically.
|
||||
# Argos-translate doesn't have that option (yet?)
|
||||
# see <https://github.com/argosopentech/argos-translate/issues/9>
|
||||
# For now we return the text unchanged, supposedly translated from the target language.
|
||||
# Afterwards people get the option to overwrite the source language from a dropdown.
|
||||
{:ok, to_language, string}
|
||||
end
|
||||
|
||||
def translate(string, from_language, to_language) do
|
||||
# Argos Translate doesn't properly translate HTML (yet?)
|
||||
# For now we give admins the option to strip the html before translating
|
||||
# Note that we have to add some html back to the response afterwards
|
||||
string = clean_string(string, strip_html?())
|
||||
|
||||
with {translated, 0} <-
|
||||
safe_translate(string, from_language, to_language) do
|
||||
{:ok, from_language, translated |> htmlify_response(strip_html?())}
|
||||
else
|
||||
{response, _} -> {:error, "ArgosTranslate failed to translate (#{response})"}
|
||||
end
|
||||
end
|
||||
end
|
93
test/pleroma/akkoma/translators/argos_translate_test.exs
Normal file
93
test/pleroma/akkoma/translators/argos_translate_test.exs
Normal file
|
@ -0,0 +1,93 @@
|
|||
defmodule Pleroma.Akkoma.Translators.ArgosTranslateTest do
|
||||
alias Pleroma.Akkoma.Translators.ArgosTranslate
|
||||
|
||||
import Mock
|
||||
|
||||
use Pleroma.DataCase, async: true
|
||||
|
||||
setup do
|
||||
clear_config([:argos_translate, :command_argos_translate], "argos-translate_test")
|
||||
clear_config([:argos_translate, :command_argospm], "argospm_test")
|
||||
end
|
||||
|
||||
test "it lists available languages" do
|
||||
languages =
|
||||
with_mock System, [:passthrough],
|
||||
cmd: fn "argospm_test", ["list"], _ ->
|
||||
{"translate-nl_en\ntranslate-en_nl\ntranslate-ja_en\n", 0}
|
||||
end do
|
||||
ArgosTranslate.languages()
|
||||
end
|
||||
|
||||
assert {:ok, source_langs, dest_langs} = languages
|
||||
|
||||
assert [%{code: "en", name: "en"}, %{code: "ja", name: "ja"}, %{code: "nl", name: "nl"}] =
|
||||
source_langs |> Enum.sort()
|
||||
|
||||
assert [%{code: "en", name: "en"}, %{code: "nl", name: "nl"}] = dest_langs |> Enum.sort()
|
||||
end
|
||||
|
||||
test "it translates from the to language when no language is set and returns the text unchanged" do
|
||||
assert {:ok, "nl", "blabla"} = ArgosTranslate.translate("blabla", nil, "nl")
|
||||
end
|
||||
|
||||
test "it translates from the provided language if provided" do
|
||||
translation_response =
|
||||
with_mock System, [:passthrough],
|
||||
cmd: fn "argos-translate_test", ["--from-lang", "nl", "--to-lang", "en", "blabla"], _ ->
|
||||
{"yadayada", 0}
|
||||
end do
|
||||
ArgosTranslate.translate("blabla", "nl", "en")
|
||||
end
|
||||
|
||||
assert {:ok, "nl", "yadayada"} = translation_response
|
||||
end
|
||||
|
||||
test "it returns a proper error when the executable can't be found" do
|
||||
non_existing_command = "sfqsfgqsefd"
|
||||
clear_config([:argos_translate, :command_argos_translate], non_existing_command)
|
||||
clear_config([:argos_translate, :command_argospm], non_existing_command)
|
||||
|
||||
assert nil == System.find_executable(non_existing_command)
|
||||
|
||||
assert {:error, "ArgosTranslate failed to fetch languages" <> _} = ArgosTranslate.languages()
|
||||
|
||||
assert {:error, "ArgosTranslate failed to translate" <> _} =
|
||||
ArgosTranslate.translate("blabla", "nl", "en")
|
||||
end
|
||||
|
||||
test "it can strip html" do
|
||||
content =
|
||||
~s[<p>What's up my fellow fedizens?</p><p>So anyway</p><ul><li><a class="hashtag" data-tag="cofe" href="https://suya.space/tag/cofe">#cofe</a></li><li><a class="hashtag" data-tag="suya" href="https://cofe.space/tag/suya">#Suya</a></li></ul><p>ammiright!<br/>:ablobfoxhyper:</p>]
|
||||
|
||||
stripped_content =
|
||||
"\nWhat's up my fellow fedizens?\n\nSo anyway\n\n#cofe\n#Suya\nammiright!\n:ablobfoxhyper:\n"
|
||||
|
||||
expected_response_strip_html =
|
||||
"<br/>What's up my fellow fedizens?<br/><br/>So anyway<br/><br/>#cofe<br/>#Suya<br/>ammiright!<br/>:ablobfoxhyper:<br/>"
|
||||
|
||||
response_strip_html =
|
||||
with_mock System, [:passthrough],
|
||||
cmd: fn "argos-translate_test",
|
||||
["--from-lang", _, "--to-lang", _, ^stripped_content],
|
||||
_ ->
|
||||
{stripped_content, 0}
|
||||
end do
|
||||
ArgosTranslate.translate(content, "nl", "en")
|
||||
end
|
||||
|
||||
clear_config([:argos_translate, :strip_html], false)
|
||||
|
||||
response_no_strip_html =
|
||||
with_mock System, [:passthrough],
|
||||
cmd: fn "argos-translate_test", ["--from-lang", _, "--to-lang", _, string], _ ->
|
||||
{string, 0}
|
||||
end do
|
||||
ArgosTranslate.translate(content, "nl", "en")
|
||||
end
|
||||
|
||||
assert {:ok, "nl", content} == response_no_strip_html
|
||||
|
||||
assert {:ok, "nl", expected_response_strip_html} == response_strip_html
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue