diff --git a/lib/pleroma/web/plugs/static_no_content_type.ex b/lib/pleroma/web/plugs/static_no_content_type.ex new file mode 100644 index 000000000..ea00a2d5d --- /dev/null +++ b/lib/pleroma/web/plugs/static_no_content_type.ex @@ -0,0 +1,469 @@ +# This is almost identical to Plug.Static from Plug 1.15.3 (2024-01-16) +# It being copied is a temporary measure to fix an urgent bug without +# needing to wait for merge of a suitable patch upstream +# The differences are: +# - this leading comment +# - renaming of the module from 'Plug.Static' to 'Pleroma.Web.Plugs.StaticNoCT' +# - additon of set_content_type option + +defmodule Pleroma.Web.Plugs.StaticNoCT do + @moduledoc """ + A plug for serving static assets. + + It requires two options: + + * `:at` - the request path to reach for static assets. + It must be a string. + + * `:from` - the file system path to read static assets from. + It can be either: a string containing a file system path, an + atom representing the application name (where assets will + be served from `priv/static`), a tuple containing the + application name and the directory to serve assets from (besides + `priv/static`), or an MFA tuple. + + The preferred form is to use `:from` with an atom or tuple, since + it will make your application independent from the starting directory. + For example, if you pass: + + plug Plug.Static, from: "priv/app/path" + + Plug.Static will be unable to serve assets if you build releases + or if you change the current directory. Instead do: + + plug Plug.Static, from: {:app_name, "priv/app/path"} + + If a static asset cannot be found, `Plug.Static` simply forwards + the connection to the rest of the pipeline. + + ## Cache mechanisms + + `Plug.Static` uses etags for HTTP caching. This means browsers/clients + should cache assets on the first request and validate the cache on + following requests, not downloading the static asset once again if it + has not changed. The cache-control for etags is specified by the + `cache_control_for_etags` option and defaults to `"public"`. + + However, `Plug.Static` also supports direct cache control by using + versioned query strings. If the request query string starts with + "?vsn=", `Plug.Static` assumes the application is versioning assets + and does not set the `ETag` header, meaning the cache behaviour will + be specified solely by the `cache_control_for_vsn_requests` config, + which defaults to `"public, max-age=31536000"`. + + ## Options + + * `:encodings` - list of 2-ary tuples where first value is value of + the `Accept-Encoding` header and second is extension of the file to + be served if given encoding is accepted by client. Entries will be tested + in order in list, so entries higher in list will be preferred. Defaults + to: `[]`. + + In addition to setting this value directly it supports 2 additional + options for compatibility reasons: + + + `:brotli` - will append `{"br", ".br"}` to the encodings list. + + `:gzip` - will append `{"gzip", ".gz"}` to the encodings list. + + Additional options will be added in the above order (Brotli takes + preference over Gzip) to reflect older behaviour which was set due + to fact that Brotli in general provides better compression ratio than + Gzip. + + * `:cache_control_for_etags` - sets the cache header for requests + that use etags. Defaults to `"public"`. + + * `:etag_generation` - specify a `{module, function, args}` to be used + to generate an etag. The `path` of the resource will be passed to + the function, as well as the `args`. If this option is not supplied, + etags will be generated based off of file size and modification time. + Note it is [recommended for the etag value to be quoted](https://tools.ietf.org/html/rfc7232#section-2.3), + which Plug won't do automatically. + + * `:cache_control_for_vsn_requests` - sets the cache header for + requests starting with "?vsn=" in the query string. Defaults to + `"public, max-age=31536000"`. + + * `:only` - filters which requests to serve. This is useful to avoid + file system access on every request when this plug is mounted + at `"/"`. For example, if `only: ["images", "favicon.ico"]` is + specified, only files in the "images" directory and the + "favicon.ico" file will be served by `Plug.Static`. + Note that `Plug.Static` matches these filters against request + uri and not against the filesystem. When requesting + a file with name containing non-ascii or special characters, + you should use urlencoded form. For example, you should write + `only: ["file%20name"]` instead of `only: ["file name"]`. + Defaults to `nil` (no filtering). + + * `:only_matching` - a relaxed version of `:only` that will + serve any request as long as one of the given values matches the + given path. For example, `only_matching: ["images", "favicon"]` + will match any request that starts at "images" or "favicon", + be it "/images/foo.png", "/images-high/foo.png", "/favicon.ico" + or "/favicon-high.ico". Such matches are useful when serving + digested files at the root. Defaults to `nil` (no filtering). + + * `:headers` - other headers to be set when serving static assets. Specify either + an enum of key-value pairs or a `{module, function, args}` to return an enum. The + `conn` will be passed to the function, as well as the `args`. + + * `:content_types` - custom MIME type mapping. As a map with filename as key + and content type as value. For example: + `content_types: %{"apple-app-site-association" => "application/json"}`. + + * `:set_content_type` - by default Plug.Static (re)sets the content type header + using auto-detection and the `:content_types` map. But when set to `false` + no content-type header will be inserted instead retaining the original + value or lack thereof. This can be useful when custom logic for appropiate + content types is needed which cannot be reasonably expressed as a static + filename map. + + ## Examples + + This plug can be mounted in a `Plug.Builder` pipeline as follows: + + defmodule MyPlug do + use Plug.Builder + + plug Plug.Static, + at: "/public", + from: :my_app, + only: ~w(images robots.txt) + plug :not_found + + def not_found(conn, _) do + send_resp(conn, 404, "not found") + end + end + + """ + + @behaviour Plug + @allowed_methods ~w(GET HEAD) + + import Plug.Conn + alias Plug.Conn + + # In this module, the `:prim_file` Erlang module along with the `:file_info` + # record are used instead of the more common and Elixir-y `File` module and + # `File.Stat` struct, respectively. The reason behind this is performance: all + # the `File` operations pass through a single process in order to support node + # operations that we simply don't need when serving assets. + + require Record + Record.defrecordp(:file_info, Record.extract(:file_info, from_lib: "kernel/include/file.hrl")) + + defmodule InvalidPathError do + defexception message: "invalid path for static asset", plug_status: 400 + end + + @impl true + def init(opts) do + from = + case Keyword.fetch!(opts, :from) do + {_, _} = from -> from + {_, _, _} = from -> from + from when is_atom(from) -> {from, "priv/static"} + from when is_binary(from) -> from + _ -> raise ArgumentError, ":from must be an atom, a binary or a tuple" + end + + encodings = + opts + |> Keyword.get(:encodings, []) + |> maybe_add("br", ".br", Keyword.get(opts, :brotli, false)) + |> maybe_add("gzip", ".gz", Keyword.get(opts, :gzip, false)) + + %{ + encodings: encodings, + only_rules: {Keyword.get(opts, :only, []), Keyword.get(opts, :only_matching, [])}, + qs_cache: Keyword.get(opts, :cache_control_for_vsn_requests, "public, max-age=31536000"), + et_cache: Keyword.get(opts, :cache_control_for_etags, "public"), + et_generation: Keyword.get(opts, :etag_generation, nil), + headers: Keyword.get(opts, :headers, %{}), + content_types: Keyword.get(opts, :content_types, %{}), + set_content_type: Keyword.get(opts, :set_content_type, true), + from: from, + at: opts |> Keyword.fetch!(:at) |> Plug.Router.Utils.split() + } + end + + @impl true + def call( + conn = %Conn{method: meth}, + %{at: at, only_rules: only_rules, from: from, encodings: encodings} = options + ) + when meth in @allowed_methods do + segments = subset(at, conn.path_info) + + if allowed?(only_rules, segments) do + segments = Enum.map(segments, &uri_decode/1) + + if invalid_path?(segments) do + raise InvalidPathError, "invalid path for static asset: #{conn.request_path}" + end + + path = path(from, segments) + range = get_req_header(conn, "range") + encoding = file_encoding(conn, path, range, encodings) + serve_static(encoding, conn, segments, range, options) + else + conn + end + end + + def call(conn, _options) do + conn + end + + defp uri_decode(path) do + # TODO: Remove rescue as this can't fail from Elixir v1.13 + try do + URI.decode(path) + rescue + ArgumentError -> + raise InvalidPathError + end + end + + defp allowed?(_only_rules, []), do: false + defp allowed?({[], []}, _list), do: true + + defp allowed?({full, prefix}, [h | _]) do + h in full or (prefix != [] and match?({0, _}, :binary.match(h, prefix))) + end + + defp maybe_put_content_type(conn, false, _, _), do: conn + + defp maybe_put_content_type(conn, _, types, filename) do + content_type = Map.get(types, filename) || MIME.from_path(filename) + + conn + |> put_resp_header("content-type", content_type) + end + + defp serve_static({content_encoding, file_info, path}, conn, segments, range, options) do + %{ + qs_cache: qs_cache, + et_cache: et_cache, + et_generation: et_generation, + headers: headers, + content_types: types, + set_content_type: set_content_type + } = options + + case put_cache_header(conn, qs_cache, et_cache, et_generation, file_info, path) do + {:stale, conn} -> + filename = List.last(segments) + + conn + |> maybe_put_content_type(set_content_type, types, filename) + |> put_resp_header("accept-ranges", "bytes") + |> maybe_add_encoding(content_encoding) + |> merge_headers(headers) + |> serve_range(file_info, path, range, options) + + {:fresh, conn} -> + conn + |> maybe_add_vary(options) + |> send_resp(304, "") + |> halt() + end + end + + defp serve_static(:error, conn, _segments, _range, _options) do + conn + end + + defp serve_range(conn, file_info, path, [range], options) do + file_info(size: file_size) = file_info + + with %{"bytes" => bytes} <- Plug.Conn.Utils.params(range), + {range_start, range_end} <- start_and_end(bytes, file_size) do + send_range(conn, path, range_start, range_end, file_size, options) + else + _ -> send_entire_file(conn, path, options) + end + end + + defp serve_range(conn, _file_info, path, _range, options) do + send_entire_file(conn, path, options) + end + + defp start_and_end("-" <> rest, file_size) do + case Integer.parse(rest) do + {last, ""} when last > 0 and last <= file_size -> {file_size - last, file_size - 1} + _ -> :error + end + end + + defp start_and_end(range, file_size) do + case Integer.parse(range) do + {first, "-"} when first >= 0 -> + {first, file_size - 1} + + {first, "-" <> rest} when first >= 0 -> + case Integer.parse(rest) do + {last, ""} when last >= first -> {first, min(last, file_size - 1)} + _ -> :error + end + + _ -> + :error + end + end + + defp send_range(conn, path, 0, range_end, file_size, options) when range_end == file_size - 1 do + send_entire_file(conn, path, options) + end + + defp send_range(conn, path, range_start, range_end, file_size, _options) do + length = range_end - range_start + 1 + + conn + |> put_resp_header("content-range", "bytes #{range_start}-#{range_end}/#{file_size}") + |> send_file(206, path, range_start, length) + |> halt() + end + + defp send_entire_file(conn, path, options) do + conn + |> maybe_add_vary(options) + |> send_file(200, path) + |> halt() + end + + defp maybe_add_encoding(conn, nil), do: conn + defp maybe_add_encoding(conn, ce), do: put_resp_header(conn, "content-encoding", ce) + + defp maybe_add_vary(conn, %{encodings: encodings}) do + # If we serve gzip or brotli at any moment, we need to set the proper vary + # header regardless of whether we are serving gzip content right now. + # See: http://www.fastly.com/blog/best-practices-for-using-the-vary-header/ + if encodings != [] do + update_in(conn.resp_headers, &[{"vary", "Accept-Encoding"} | &1]) + else + conn + end + end + + defp put_cache_header( + %Conn{query_string: "vsn=" <> _} = conn, + qs_cache, + _et_cache, + _et_generation, + _file_info, + _path + ) + when is_binary(qs_cache) do + {:stale, put_resp_header(conn, "cache-control", qs_cache)} + end + + defp put_cache_header(conn, _qs_cache, et_cache, et_generation, file_info, path) + when is_binary(et_cache) do + etag = etag_for_path(file_info, et_generation, path) + + conn = + conn + |> put_resp_header("cache-control", et_cache) + |> put_resp_header("etag", etag) + + if etag in get_req_header(conn, "if-none-match") do + {:fresh, conn} + else + {:stale, conn} + end + end + + defp put_cache_header(conn, _, _, _, _, _) do + {:stale, conn} + end + + defp etag_for_path(file_info, et_generation, path) do + case et_generation do + {module, function, args} -> + apply(module, function, [path | args]) + + nil -> + file_info(size: size, mtime: mtime) = file_info + < :erlang.phash2() |> Integer.to_string(16)::binary, ?">> + end + end + + defp file_encoding(conn, path, [_range], _encodings) do + # We do not support compression for range queries. + file_encoding(conn, path, nil, []) + end + + defp file_encoding(conn, path, _range, encodings) do + encoded = + Enum.find_value(encodings, fn {encoding, ext} -> + if file_info = accept_encoding?(conn, encoding) && regular_file_info(path <> ext) do + {encoding, file_info, path <> ext} + end + end) + + cond do + not is_nil(encoded) -> + encoded + + file_info = regular_file_info(path) -> + {nil, file_info, path} + + true -> + :error + end + end + + defp regular_file_info(path) do + case :prim_file.read_file_info(path) do + {:ok, file_info(type: :regular) = file_info} -> + file_info + + _ -> + nil + end + end + + defp accept_encoding?(conn, encoding) do + encoding? = &String.contains?(&1, [encoding, "*"]) + + Enum.any?(get_req_header(conn, "accept-encoding"), fn accept -> + accept |> Plug.Conn.Utils.list() |> Enum.any?(encoding?) + end) + end + + defp maybe_add(list, key, value, true), do: list ++ [{key, value}] + defp maybe_add(list, _key, _value, false), do: list + + defp path({module, function, arguments}, segments) + when is_atom(module) and is_atom(function) and is_list(arguments), + do: Enum.join([apply(module, function, arguments) | segments], "/") + + defp path({app, from}, segments) when is_atom(app) and is_binary(from), + do: Enum.join([Application.app_dir(app), from | segments], "/") + + defp path(from, segments), + do: Enum.join([from | segments], "/") + + defp subset([h | expected], [h | actual]), do: subset(expected, actual) + defp subset([], actual), do: actual + defp subset(_, _), do: [] + + defp invalid_path?(list) do + invalid_path?(list, :binary.compile_pattern(["/", "\\", ":", "\0"])) + end + + defp invalid_path?([h | _], _match) when h in [".", "..", ""], do: true + defp invalid_path?([h | t], match), do: String.contains?(h, match) or invalid_path?(t) + defp invalid_path?([], _match), do: false + + defp merge_headers(conn, {module, function, args}) do + merge_headers(conn, apply(module, function, [conn | args])) + end + + defp merge_headers(conn, headers) do + merge_resp_headers(conn, headers) + end +end