From 237b2068f9bb23c590420fda3ac677badd9d839f Mon Sep 17 00:00:00 2001
From: feld <feld@feld.me>
Date: Tue, 11 Feb 2020 16:55:18 +0000
Subject: [PATCH] Revert "Merge branch 'feat/floki-fasthtml' into 'develop'"

This reverts merge request !2194
---
 config/config.exs                             |  2 --
 lib/pleroma/html.ex                           |  1 -
 .../activity_pub/mrf/anti_link_spam_policy.ex |  1 -
 lib/pleroma/web/metadata/rel_me.ex            |  6 ++---
 lib/pleroma/web/rel_me.ex                     |  5 ++---
 lib/pleroma/web/rich_media/parser.ex          |  6 ++---
 mix.exs                                       |  2 +-
 mix.lock                                      |  8 +++----
 .../rich_media/parsers/twitter_card_test.exs  | 22 +++++--------------
 9 files changed, 18 insertions(+), 35 deletions(-)

diff --git a/config/config.exs b/config/config.exs
index 364aaf776..41c1ff637 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -612,8 +612,6 @@ config :pleroma, :modules, runtime_dir: "instance/modules"
 
 config :pleroma, configurable_from_database: false
 
-config :floki, :html_parser, Floki.HTMLParser.FastHtml
-
 # Import environment specific config. This must remain at the bottom
 # of this file so it overrides the configuration defined above.
 import_config "#{Mix.env()}.exs"
diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex
index 05946aa96..11513106e 100644
--- a/lib/pleroma/html.ex
+++ b/lib/pleroma/html.ex
@@ -108,7 +108,6 @@ defmodule Pleroma.HTML do
     Cachex.fetch!(:scrubber_cache, key, fn _key ->
       result =
         content
-        |> Floki.parse_fragment!()
         |> Floki.filter_out("a.mention,a.hashtag,a[rel~=\"tag\"]")
         |> Floki.attribute("a", "href")
         |> Enum.at(0)
diff --git a/lib/pleroma/web/activity_pub/mrf/anti_link_spam_policy.ex b/lib/pleroma/web/activity_pub/mrf/anti_link_spam_policy.ex
index 802d10edc..8abe18e29 100644
--- a/lib/pleroma/web/activity_pub/mrf/anti_link_spam_policy.ex
+++ b/lib/pleroma/web/activity_pub/mrf/anti_link_spam_policy.ex
@@ -17,7 +17,6 @@ defmodule Pleroma.Web.ActivityPub.MRF.AntiLinkSpamPolicy do
   # does the post contain links?
   defp contains_links?(%{"content" => content} = _object) do
     content
-    |> Floki.parse_fragment!()
     |> Floki.filter_out("a.mention,a.hashtag,a[rel~=\"tag\"],a.zrl")
     |> Floki.attribute("a", "href")
     |> length() > 0
diff --git a/lib/pleroma/web/metadata/rel_me.ex b/lib/pleroma/web/metadata/rel_me.ex
index 86dcc1a3b..f87fc1973 100644
--- a/lib/pleroma/web/metadata/rel_me.ex
+++ b/lib/pleroma/web/metadata/rel_me.ex
@@ -8,10 +8,8 @@ defmodule Pleroma.Web.Metadata.Providers.RelMe do
 
   @impl Provider
   def build_tags(%{user: user}) do
-    bio_tree = Floki.parse_fragment!(user.bio)
-
-    (Floki.attribute(bio_tree, "link[rel~=me]", "href") ++
-       Floki.attribute(bio_tree, "a[rel~=me]", "href"))
+    (Floki.attribute(user.bio, "link[rel~=me]", "href") ++
+       Floki.attribute(user.bio, "a[rel~=me]", "href"))
     |> Enum.map(fn link ->
       {:link, [rel: "me", href: link], []}
     end)
diff --git a/lib/pleroma/web/rel_me.ex b/lib/pleroma/web/rel_me.ex
index 540fa65df..16b1a53d2 100644
--- a/lib/pleroma/web/rel_me.ex
+++ b/lib/pleroma/web/rel_me.ex
@@ -27,10 +27,9 @@ defmodule Pleroma.Web.RelMe do
   defp parse_url(url) do
     with {:ok, %Tesla.Env{body: html, status: status}} when status in 200..299 <-
            Pleroma.HTTP.get(url, [], adapter: @hackney_options),
-         {:ok, html_tree} <- Floki.parse_document(html),
          data <-
-           Floki.attribute(html_tree, "link[rel~=me]", "href") ++
-             Floki.attribute(html_tree, "a[rel~=me]", "href") do
+           Floki.attribute(html, "link[rel~=me]", "href") ++
+             Floki.attribute(html, "a[rel~=me]", "href") do
       {:ok, data}
     end
   rescue
diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex
index 9702e90f1..c06b0a0f2 100644
--- a/lib/pleroma/web/rich_media/parser.ex
+++ b/lib/pleroma/web/rich_media/parser.ex
@@ -81,18 +81,18 @@ defmodule Pleroma.Web.RichMedia.Parser do
       {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: @hackney_options)
 
       html
-      |> parse_html()
+      |> parse_html
       |> maybe_parse()
       |> Map.put(:url, url)
       |> clean_parsed_data()
       |> check_parsed_data()
     rescue
       e ->
-        {:error, "Parsing error: #{inspect(e)} #{inspect(__STACKTRACE__)}"}
+        {:error, "Parsing error: #{inspect(e)}"}
     end
   end
 
-  defp parse_html(html), do: Floki.parse_document!(html)
+  defp parse_html(html), do: Floki.parse(html)
 
   defp maybe_parse(html) do
     Enum.reduce_while(parsers(), %{}, fn parser, acc ->
diff --git a/mix.exs b/mix.exs
index 4083da01e..3e3eac521 100644
--- a/mix.exs
+++ b/mix.exs
@@ -139,7 +139,7 @@ defmodule Pleroma.Mixfile do
       {:phoenix_swoosh, "~> 0.2"},
       {:gen_smtp, "~> 0.13"},
       {:websocket_client, git: "https://github.com/jeremyong/websocket_client.git", only: :test},
-      {:floki, "~> 0.25"},
+      {:floki, "~> 0.23.0"},
       {:ex_syslogger, github: "slashmili/ex_syslogger", tag: "1.4.0"},
       {:timex, "~> 3.5"},
       {:ueberauth, "~> 0.4"},
diff --git a/mix.lock b/mix.lock
index 5c26bdd35..69eec5431 100644
--- a/mix.lock
+++ b/mix.lock
@@ -37,16 +37,16 @@
   "ex_machina": {:hex, :ex_machina, "2.3.0", "92a5ad0a8b10ea6314b876a99c8c9e3f25f4dde71a2a835845b136b9adaf199a", [:mix], [{:ecto, "~> 2.2 or ~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_sql, "~> 3.0", [hex: :ecto_sql, repo: "hexpm", optional: true]}], "hexpm"},
   "ex_syslogger": {:git, "https://github.com/slashmili/ex_syslogger.git", "f3963399047af17e038897c69e20d552e6899e1d", [tag: "1.4.0"]},
   "excoveralls": {:hex, :excoveralls, "0.12.1", "a553c59f6850d0aff3770e4729515762ba7c8e41eedde03208182a8dc9d0ce07", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm"},
-  "fast_html": {:hex, :fast_html, "1.0.2", "b2a32022741699421e90762ce904cacb4faf12c10129acc3674262dd7fa5d2b6", [:make, :mix], [], "hexpm"},
-  "fast_sanitize": {:hex, :fast_sanitize, "0.1.7", "2a7cd8734c88a2de6de55022104f8a3b87f1fdbe8bbf131d9049764b53d50d0d", [:mix], [{:fast_html, "~> 1.0", [hex: :fast_html, repo: "hexpm", optional: false]}, {:plug, "~> 1.8", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"},
+  "fast_html": {:hex, :fast_html, "0.99.4", "d80812664f0429607e1d880fba0ef04da87a2e4fa596701bcaae17953535695c", [:make, :mix], [], "hexpm"},
+  "fast_sanitize": {:hex, :fast_sanitize, "0.1.4", "6c2e7203ca2f8275527a3021ba6e9d5d4ee213a47dc214a97c128737c9e56df1", [:mix], [{:fast_html, "~> 0.99", [hex: :fast_html, repo: "hexpm", optional: false]}, {:plug, "~> 1.8", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"},
   "flake_id": {:hex, :flake_id, "0.1.0", "7716b086d2e405d09b647121a166498a0d93d1a623bead243e1f74216079ccb3", [:mix], [{:base62, "~> 1.2", [hex: :base62, repo: "hexpm", optional: false]}, {:ecto, ">= 2.0.0", [hex: :ecto, repo: "hexpm", optional: true]}], "hexpm"},
-  "floki": {:hex, :floki, "0.25.0", "b1c9ddf5f32a3a90b43b76f3386ca054325dc2478af020e87b5111c19f2284ac", [:mix], [{:html_entities, "~> 0.5.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm"},
+  "floki": {:hex, :floki, "0.23.1", "e100306ce7d8841d70a559748e5091542e2cfc67ffb3ade92b89a8435034dab1", [:mix], [{:html_entities, "~> 0.5.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm"},
   "gen_smtp": {:hex, :gen_smtp, "0.15.0", "9f51960c17769b26833b50df0b96123605a8024738b62db747fece14eb2fbfcc", [:rebar3], [], "hexpm"},
   "gen_stage": {:hex, :gen_stage, "0.14.3", "d0c66f1c87faa301c1a85a809a3ee9097a4264b2edf7644bf5c123237ef732bf", [:mix], [], "hexpm"},
   "gen_state_machine": {:hex, :gen_state_machine, "2.0.5", "9ac15ec6e66acac994cc442dcc2c6f9796cf380ec4b08267223014be1c728a95", [:mix], [], "hexpm"},
   "gettext": {:hex, :gettext, "0.17.1", "8baab33482df4907b3eae22f719da492cee3981a26e649b9c2be1c0192616962", [:mix], [], "hexpm"},
   "hackney": {:hex, :hackney, "1.15.2", "07e33c794f8f8964ee86cebec1a8ed88db5070e52e904b8f12209773c1036085", [:rebar3], [{:certifi, "2.5.1", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "6.0.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "1.0.1", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "1.1.5", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm"},
-  "html_entities": {:hex, :html_entities, "0.5.1", "1c9715058b42c35a2ab65edc5b36d0ea66dd083767bef6e3edb57870ef556549", [:mix], [], "hexpm"},
+  "html_entities": {:hex, :html_entities, "0.5.0", "40f5c5b9cbe23073b48a4e69c67b6c11974f623a76165e2b92d098c0e88ccb1d", [:mix], [], "hexpm"},
   "html_sanitize_ex": {:hex, :html_sanitize_ex, "1.3.0", "f005ad692b717691203f940c686208aa3d8ffd9dd4bb3699240096a51fa9564e", [:mix], [{:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"},
   "http_signatures": {:git, "https://git.pleroma.social/pleroma/http_signatures.git", "293d77bb6f4a67ac8bde1428735c3b42f22cbb30", [ref: "293d77bb6f4a67ac8bde1428735c3b42f22cbb30"]},
   "httpoison": {:hex, :httpoison, "1.6.1", "2ce5bf6e535cd0ab02e905ba8c276580bab80052c5c549f53ddea52d72e81f33", [:mix], [{:hackney, "~> 1.15 and >= 1.15.2", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm"},
diff --git a/test/web/rich_media/parsers/twitter_card_test.exs b/test/web/rich_media/parsers/twitter_card_test.exs
index f2ebbde7e..751ca614c 100644
--- a/test/web/rich_media/parsers/twitter_card_test.exs
+++ b/test/web/rich_media/parsers/twitter_card_test.exs
@@ -7,14 +7,11 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
   alias Pleroma.Web.RichMedia.Parsers.TwitterCard
 
   test "returns error when html not contains twitter card" do
-    assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) ==
-             {:error, "No twitter card metadata found"}
+    assert TwitterCard.parse("", %{}) == {:error, "No twitter card metadata found"}
   end
 
   test "parses twitter card with only name attributes" do
-    html =
-      File.read!("test/fixtures/nypd-facial-recognition-children-teenagers3.html")
-      |> Floki.parse_document!()
+    html = File.read!("test/fixtures/nypd-facial-recognition-children-teenagers3.html")
 
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
@@ -29,9 +26,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
   end
 
   test "parses twitter card with only property attributes" do
-    html =
-      File.read!("test/fixtures/nypd-facial-recognition-children-teenagers2.html")
-      |> Floki.parse_document!()
+    html = File.read!("test/fixtures/nypd-facial-recognition-children-teenagers2.html")
 
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
@@ -50,9 +45,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
   end
 
   test "parses twitter card with name & property attributes" do
-    html =
-      File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
-      |> Floki.parse_document!()
+    html = File.read!("test/fixtures/nypd-facial-recognition-children-teenagers.html")
 
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
@@ -80,8 +73,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
         "YTQ5MF9EQVIgZXhodW1hdGlvbiBvZiBNYXJnYXJldCBDb3JiaW4gZ3JhdmUgMTkyNi5qcGciXSxbInAiLCJjb252ZXJ0IiwiIl0sWyJwIiwiY29udmVydCIsIi1xdWFsaXR5IDgxIC1hdXRvLW9" <>
         "yaWVudCJdLFsicCIsInRodW1iIiwiNjAweD4iXV0/DAR%20exhumation%20of%20Margaret%20Corbin%20grave%201926.jpg"
 
-    html =
-      File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
+    html = File.read!("test/fixtures/margaret-corbin-grave-west-point.html")
 
     assert TwitterCard.parse(html, %{}) ==
              {:ok,
@@ -95,9 +87,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
   end
 
   test "takes first founded title in html head if there is html markup error" do
-    html =
-      File.read!("test/fixtures/nypd-facial-recognition-children-teenagers4.html")
-      |> Floki.parse_document!()
+    html = File.read!("test/fixtures/nypd-facial-recognition-children-teenagers4.html")
 
     assert TwitterCard.parse(html, %{}) ==
              {:ok,