parser.ex 12.6 KB
Newer Older
Stephen M. Pallen's avatar
Stephen M. Pallen committed
1 2 3 4 5 6 7 8
defmodule AutoLinker.Parser do
  @moduledoc """
  Module to handle parsing the the input string.
  """

  alias AutoLinker.Builder

  @doc """
Stephen M. Pallen's avatar
Stephen M. Pallen committed
9
  Parse the given string, identifying items to link.
Stephen M. Pallen's avatar
Stephen M. Pallen committed
10

Stephen M. Pallen's avatar
Stephen M. Pallen committed
11
  Parses the string, replacing the matching urls and phone numbers with an html link.
Stephen M. Pallen's avatar
Stephen M. Pallen committed
12 13 14 15

  ## Examples

      iex> AutoLinker.Parser.parse("Check out google.com")
minibikini's avatar
minibikini committed
16
      ~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">google.com</a>}
Stephen M. Pallen's avatar
Stephen M. Pallen committed
17 18 19 20 21 22 23 24 25

      iex> AutoLinker.Parser.parse("call me at x9999", phone: true)
      ~s{call me at <a href="#" class="phone-number" data-phone="9999">x9999</a>}

      iex> AutoLinker.Parser.parse("or at home on 555.555.5555", phone: true)
      ~s{or at home on <a href="#" class="phone-number" data-phone="5555555555">555.555.5555</a>}

      iex> AutoLinker.Parser.parse(", work (555) 555-5555", phone: true)
      ~s{, work <a href="#" class="phone-number" data-phone="5555555555">(555) 555-5555</a>}
Stephen M. Pallen's avatar
Stephen M. Pallen committed
26 27
  """

Stephen M. Pallen's avatar
Stephen M. Pallen committed
28
  @invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/
Stephen M. Pallen's avatar
Stephen M. Pallen committed
29

30 31
  @match_url ~r{^[\w\.-]+(?:\.[\w\.-]+)+[\w\-\._~%:/?#[\]@!\$&'\(\)\*\+,;=.]+$}

32
  @match_scheme ~r{^(?:\W*)?(?<url>(?:\W*https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
Stephen M. Pallen's avatar
Stephen M. Pallen committed
33

34
  @match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))"
Stephen M. Pallen's avatar
Stephen M. Pallen committed
35

36
  @match_hostname ~r{^(?:\W*https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}u
minibikini's avatar
minibikini committed
37 38 39

  @match_ip ~r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"

40 41
  # @user
  # @user@example.com
42
  @match_mention ~r"^@[a-zA-Z\d_-]+@[a-zA-Z0-9_-](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*|@[a-zA-Z\d_-]+"u
43 44

  # https://www.w3.org/TR/html5/forms.html#valid-e-mail-address
45
  @match_email ~r"^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"u
46

47
  @match_hashtag ~r/^(?<tag>\#[[:word:]_]*[[:alpha:]_·][[:word:]_·\p{M}]*)/u
48 49 50 51 52 53 54 55 56 57 58 59 60 61

  @prefix_extra [
    "magnet:?",
    "dweb://",
    "dat://",
    "gopher://",
    "ipfs://",
    "ipns://",
    "irc://",
    "ircs://",
    "irc6://",
    "mumble://",
    "ssb://"
  ]
Stephen M. Pallen's avatar
Stephen M. Pallen committed
62

minibikini's avatar
minibikini committed
63
  @tlds "./priv/tlds.txt" |> File.read!() |> String.split("\n", trim: true)
minibikini's avatar
minibikini committed
64

65 66
  @default_opts ~w(url)a

minibikini's avatar
minibikini committed
67
  def parse(input, opts \\ %{})
minibikini's avatar
minibikini committed
68
  def parse(input, opts) when is_binary(input), do: {input, nil} |> parse(opts) |> elem(0)
minibikini's avatar
minibikini committed
69
  def parse(input, list) when is_list(list), do: parse(input, Enum.into(list, %{}))
Stephen M. Pallen's avatar
Stephen M. Pallen committed
70

minibikini's avatar
minibikini committed
71
  def parse(input, opts) do
Stephen M. Pallen's avatar
Stephen M. Pallen committed
72 73 74 75
    config =
      :auto_linker
      |> Application.get_env(:opts, [])
      |> Enum.into(%{})
minibikini's avatar
minibikini committed
76 77
      |> Map.put(
        :attributes,
Stephen M. Pallen's avatar
Stephen M. Pallen committed
78 79
        Application.get_env(:auto_linker, :attributes, [])
      )
Stephen M. Pallen's avatar
Stephen M. Pallen committed
80 81

    opts =
minibikini's avatar
minibikini committed
82
      Enum.reduce(@default_opts, opts, fn opt, acc ->
Stephen M. Pallen's avatar
Stephen M. Pallen committed
83
        if is_nil(opts[opt]) and is_nil(config[opt]) do
minibikini's avatar
minibikini committed
84
          Map.put(acc, opt, true)
Stephen M. Pallen's avatar
Stephen M. Pallen committed
85 86 87
        else
          acc
        end
minibikini's avatar
minibikini committed
88
      end)
Stephen M. Pallen's avatar
Stephen M. Pallen committed
89

minibikini's avatar
minibikini committed
90
    do_parse(input, Map.merge(config, opts))
Stephen M. Pallen's avatar
Stephen M. Pallen committed
91 92
  end

minibikini's avatar
minibikini committed
93 94
  defp do_parse(input, %{phone: false} = opts), do: do_parse(input, Map.delete(opts, :phone))
  defp do_parse(input, %{url: false} = opts), do: do_parse(input, Map.delete(opts, :url))
Stephen M. Pallen's avatar
Stephen M. Pallen committed
95

minibikini's avatar
minibikini committed
96 97
  defp do_parse(input, %{phone: _} = opts) do
    input
98
    |> do_parse(opts, {"", "", :parsing}, &check_and_link_phone/3)
Stephen M. Pallen's avatar
Stephen M. Pallen committed
99 100 101
    |> do_parse(Map.delete(opts, :phone))
  end

minibikini's avatar
minibikini committed
102
  defp do_parse(input, %{hashtag: true} = opts) do
minibikini's avatar
minibikini committed
103
    input
minibikini's avatar
minibikini committed
104 105
    |> do_parse(opts, {"", "", :parsing}, &check_and_link_hashtag/3)
    |> do_parse(Map.delete(opts, :hashtag))
106 107
  end

minibikini's avatar
minibikini committed
108 109
  defp do_parse(input, %{extra: true} = opts) do
    input
110
    |> do_parse(opts, {"", "", :parsing}, &check_and_link_extra/3)
111 112 113
    |> do_parse(Map.delete(opts, :extra))
  end

minibikini's avatar
minibikini committed
114
  defp do_parse({text, user_acc}, %{markdown: true} = opts) do
115 116
    text
    |> Builder.create_markdown_links(opts)
minibikini's avatar
minibikini committed
117
    |> (&{&1, user_acc}).()
118 119 120
    |> do_parse(Map.delete(opts, :markdown))
  end

minibikini's avatar
minibikini committed
121 122
  defp do_parse(input, %{email: true} = opts) do
    input
123
    |> do_parse(opts, {"", "", :parsing}, &check_and_link_email/3)
124 125 126
    |> do_parse(Map.delete(opts, :email))
  end

minibikini's avatar
minibikini committed
127
  defp do_parse({text, user_acc}, %{url: _} = opts) do
minibikini's avatar
minibikini committed
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
    input =
      with exclude <- Map.get(opts, :exclude_patterns),
           true <- is_list(exclude),
           true <- String.starts_with?(text, exclude) do
        {text, user_acc}
      else
        _ ->
          do_parse(
            {text, user_acc},
            opts,
            {"", "", :parsing},
            &check_and_link/3
          )
      end

    do_parse(input, Map.delete(opts, :url))
Stephen M. Pallen's avatar
Stephen M. Pallen committed
144 145
  end

minibikini's avatar
minibikini committed
146
  defp do_parse(input, %{mention: true} = opts) do
minibikini's avatar
minibikini committed
147
    input
minibikini's avatar
minibikini committed
148 149
    |> do_parse(opts, {"", "", :parsing}, &check_and_link_mention/3)
    |> do_parse(Map.delete(opts, :mention))
150 151
  end

minibikini's avatar
minibikini committed
152
  defp do_parse(input, _), do: input
Stephen M. Pallen's avatar
Stephen M. Pallen committed
153

154
  defp do_parse({"", user_acc}, _opts, {"", acc, _}, _handler),
minibikini's avatar
minibikini committed
155
    do: {acc, user_acc}
Stephen M. Pallen's avatar
Stephen M. Pallen committed
156

157 158
  defp do_parse({"", user_acc}, opts, {buffer, acc, _}, handler) do
    {buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
minibikini's avatar
minibikini committed
159
    {acc <> buffer, user_acc}
160
  end
minibikini's avatar
minibikini committed
161

162 163
  defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
    do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<a", :skip}, handler)
164

165 166
  defp do_parse({"</a>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
    do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</a>", :parsing}, handler)
Stephen M. Pallen's avatar
Stephen M. Pallen committed
167

168 169
  defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}, handler),
    do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}, handler)
Stephen M. Pallen's avatar
Stephen M. Pallen committed
170

171
  defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}, handler),
minibikini's avatar
minibikini committed
172 173 174 175 176 177 178
    do:
      do_parse(
        {text, user_acc},
        opts,
        {"", acc <> buffer <> ">", {:html, level}},
        handler
      )
Stephen M. Pallen's avatar
Stephen M. Pallen committed
179

180 181
  defp do_parse({<<ch::8>> <> text, user_acc}, opts, {"", acc, {:attrs, level}}, handler) do
    do_parse({text, user_acc}, opts, {"", acc <> <<ch::8>>, {:attrs, level}}, handler)
182
  end
Stephen M. Pallen's avatar
Stephen M. Pallen committed
183

184 185
  defp do_parse({"</" <> text, user_acc}, opts, {buffer, acc, {:html, level}}, handler) do
    {buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
Stephen M. Pallen's avatar
Stephen M. Pallen committed
186

187
    do_parse(
minibikini's avatar
minibikini committed
188
      {text, user_acc},
189 190 191 192 193 194
      opts,
      {"", acc <> buffer <> "</", {:close, level}},
      handler
    )
  end

195 196
  defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, 1}}, handler),
    do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> ">", :parsing}, handler)
Stephen M. Pallen's avatar
Stephen M. Pallen committed
197

198
  defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, level}}, handler),
199
    do:
minibikini's avatar
minibikini committed
200 201 202 203 204 205
      do_parse(
        {text, user_acc},
        opts,
        {"", acc <> buffer <> ">", {:html, level - 1}},
        handler
      )
Stephen M. Pallen's avatar
Stephen M. Pallen committed
206

minibikini's avatar
minibikini committed
207 208 209 210 211 212 213 214 215 216 217 218 219 220
  defp do_parse(
         {<<char::bytes-size(1), text::binary>>, user_acc},
         opts,
         {buffer, acc, {:open, level}},
         handler
       )
       when char in [" ", "\r", "\n"] do
    do_parse(
      {text, user_acc},
      opts,
      {"", acc <> buffer <> char, {:attrs, level}},
      handler
    )
  end
Stephen M. Pallen's avatar
Stephen M. Pallen committed
221 222

  # default cases where state is not important
minibikini's avatar
minibikini committed
223 224 225 226 227 228
  defp do_parse(
         {" " <> text, user_acc},
         %{phone: _} = opts,
         {buffer, acc, state},
         handler
       ),
229
       do: do_parse({text, user_acc}, opts, {buffer <> " ", acc, state}, handler)
minibikini's avatar
minibikini committed
230

minibikini's avatar
minibikini committed
231 232 233 234 235 236 237
  defp do_parse(
         {<<char::bytes-size(1), text::binary>>, user_acc},
         opts,
         {buffer, acc, state},
         handler
       )
       when char in [" ", "\r", "\n"] do
238
    {buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
Stephen M. Pallen's avatar
Stephen M. Pallen committed
239

240
    do_parse(
minibikini's avatar
minibikini committed
241
      {text, user_acc},
242
      opts,
minibikini's avatar
minibikini committed
243
      {"", acc <> buffer <> char, state},
244 245 246
      handler
    )
  end
Stephen M. Pallen's avatar
Stephen M. Pallen committed
247

248 249
  defp do_parse({<<ch::8>>, user_acc}, opts, {buffer, acc, state}, handler) do
    {buffer, user_acc} = run_handler(handler, buffer <> <<ch::8>>, opts, user_acc)
Stephen M. Pallen's avatar
Stephen M. Pallen committed
250

251
    do_parse(
minibikini's avatar
minibikini committed
252
      {"", user_acc},
253 254 255 256 257 258
      opts,
      {"", acc <> buffer, state},
      handler
    )
  end

259 260
  defp do_parse({<<ch::8>> <> text, user_acc}, opts, {buffer, acc, state}, handler),
    do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state}, handler)
Stephen M. Pallen's avatar
Stephen M. Pallen committed
261

262 263 264 265 266 267 268 269 270 271 272
  def check_and_link(buffer, %{scheme: true} = opts, _user_acc) do
    if is_url?(buffer, opts[:scheme]) do
      case Regex.run(@match_scheme, buffer, capture: [:url]) do
        [^buffer] -> link_url(true, buffer, opts)
        [url] -> String.replace(buffer, url, link_url(true, url, opts))
      end
    else
      buffer
    end
  end

273
  def check_and_link(buffer, opts, _user_acc) do
Stephen M. Pallen's avatar
Stephen M. Pallen committed
274
    buffer
275
    |> is_url?(opts[:scheme])
Stephen M. Pallen's avatar
Stephen M. Pallen committed
276 277 278
    |> link_url(buffer, opts)
  end

279
  def check_and_link_email(buffer, opts, _user_acc) do
280 281 282 283 284
    buffer
    |> is_email?
    |> link_email(buffer, opts)
  end

285
  def check_and_link_phone(buffer, opts, _user_acc) do
Stephen M. Pallen's avatar
Stephen M. Pallen committed
286 287 288 289 290
    buffer
    |> match_phone
    |> link_phone(buffer, opts)
  end

291
  def check_and_link_mention(buffer, opts, user_acc) do
292 293
    buffer
    |> match_mention
minibikini's avatar
minibikini committed
294
    |> link_mention(buffer, opts, user_acc)
295 296
  end

297
  def check_and_link_hashtag(buffer, opts, user_acc) do
298 299
    buffer
    |> match_hashtag
minibikini's avatar
minibikini committed
300
    |> link_hashtag(buffer, opts, user_acc)
301 302
  end

303
  def check_and_link_extra("xmpp:" <> handle, opts, _user_acc) do
304 305 306 307 308
    handle
    |> is_email?
    |> link_extra("xmpp:" <> handle, opts)
  end

309
  def check_and_link_extra(buffer, opts, _user_acc) do
310 311 312 313 314 315
    buffer
    |> String.starts_with?(@prefix_extra)
    |> link_extra(buffer, opts)
  end

  # @doc false
Stephen M. Pallen's avatar
Stephen M. Pallen committed
316
  def is_url?(buffer, true) do
minibikini's avatar
minibikini committed
317
    if Regex.match?(@invalid_url, buffer) do
Stephen M. Pallen's avatar
Stephen M. Pallen committed
318 319
      false
    else
minibikini's avatar
minibikini committed
320
      @match_scheme |> Regex.match?(buffer) |> is_valid_tld?(buffer)
Stephen M. Pallen's avatar
Stephen M. Pallen committed
321
    end
Stephen M. Pallen's avatar
Stephen M. Pallen committed
322
  end
Stephen M. Pallen's avatar
Stephen M. Pallen committed
323

Stephen M. Pallen's avatar
Stephen M. Pallen committed
324
  def is_url?(buffer, _) do
minibikini's avatar
minibikini committed
325
    if Regex.match?(@invalid_url, buffer) do
Stephen M. Pallen's avatar
Stephen M. Pallen committed
326 327
      false
    else
minibikini's avatar
minibikini committed
328
      @match_url |> Regex.match?(buffer) |> is_valid_tld?(buffer)
Stephen M. Pallen's avatar
Stephen M. Pallen committed
329
    end
Stephen M. Pallen's avatar
Stephen M. Pallen committed
330 331
  end

332 333 334 335
  def is_email?(buffer) do
    if Regex.match?(@invalid_url, buffer) do
      false
    else
minibikini's avatar
minibikini committed
336
      @match_email |> Regex.match?(buffer) |> is_valid_tld?(buffer)
337 338 339
    end
  end

minibikini's avatar
minibikini committed
340 341 342 343 344 345 346
  def is_valid_tld?(true, buffer) do
    [host] = Regex.run(@match_hostname, buffer, capture: [:host])

    if is_ip?(host) do
      true
    else
      tld = host |> String.split(".") |> List.last()
minibikini's avatar
minibikini committed
347 348

      Enum.member?(@tlds, tld)
minibikini's avatar
minibikini committed
349 350 351 352 353 354 355 356
    end
  end

  def is_valid_tld?(false, _), do: false

  def is_ip?(buffer) do
    Regex.match?(@match_ip, buffer)
  end
minibikini's avatar
minibikini committed
357

Stephen M. Pallen's avatar
Stephen M. Pallen committed
358 359
  @doc false
  def match_phone(buffer) do
minibikini's avatar
minibikini committed
360
    case Regex.scan(@match_phone, buffer) do
Stephen M. Pallen's avatar
Stephen M. Pallen committed
361 362 363 364 365
      [] -> nil
      other -> other
    end
  end

366 367 368 369 370 371 372 373 374 375 376 377 378 379
  def match_mention(buffer) do
    case Regex.run(@match_mention, buffer) do
      [mention] -> mention
      _ -> nil
    end
  end

  def match_hashtag(buffer) do
    case Regex.run(@match_hashtag, buffer, capture: [:tag]) do
      [hashtag] -> hashtag
      _ -> nil
    end
  end

minibikini's avatar
minibikini committed
380
  def link_hashtag(nil, buffer, _, _user_acc), do: buffer
381

minibikini's avatar
minibikini committed
382
  def link_hashtag(hashtag, buffer, %{hashtag_handler: hashtag_handler} = opts, user_acc) do
minibikini's avatar
minibikini committed
383 384 385
    hashtag
    |> hashtag_handler.(buffer, opts, user_acc)
    |> maybe_update_buffer(hashtag, buffer)
386 387
  end

minibikini's avatar
minibikini committed
388
  def link_hashtag(hashtag, buffer, opts, _user_acc) do
minibikini's avatar
minibikini committed
389 390 391
    hashtag
    |> Builder.create_hashtag_link(buffer, opts)
    |> maybe_update_buffer(hashtag, buffer)
392 393
  end

minibikini's avatar
minibikini committed
394
  def link_mention(nil, buffer, _, user_acc), do: {buffer, user_acc}
395

minibikini's avatar
minibikini committed
396
  def link_mention(mention, buffer, %{mention_handler: mention_handler} = opts, user_acc) do
minibikini's avatar
minibikini committed
397 398 399
    mention
    |> mention_handler.(buffer, opts, user_acc)
    |> maybe_update_buffer(mention, buffer)
400 401
  end

minibikini's avatar
minibikini committed
402
  def link_mention(mention, buffer, opts, _user_acc) do
minibikini's avatar
minibikini committed
403 404 405 406 407 408 409 410 411 412 413 414 415
    mention
    |> Builder.create_mention_link(buffer, opts)
    |> maybe_update_buffer(mention, buffer)
  end

  defp maybe_update_buffer(out, match, buffer) when is_binary(out) do
    maybe_update_buffer({out, nil}, match, buffer)
  end

  defp maybe_update_buffer({out, user_acc}, match, buffer)
       when match != buffer and out != buffer do
    out = String.replace(buffer, match, out)
    {out, user_acc}
416 417
  end

minibikini's avatar
minibikini committed
418 419
  defp maybe_update_buffer(out, _match, _buffer), do: out

Stephen M. Pallen's avatar
Stephen M. Pallen committed
420 421 422
  def link_phone(nil, buffer, _), do: buffer

  def link_phone(list, buffer, opts) do
minibikini's avatar
minibikini committed
423
    Builder.create_phone_link(list, buffer, opts)
Stephen M. Pallen's avatar
Stephen M. Pallen committed
424 425
  end

Stephen M. Pallen's avatar
Stephen M. Pallen committed
426 427 428 429 430
  @doc false
  def link_url(true, buffer, opts) do
    Builder.create_link(buffer, opts)
  end

minibikini's avatar
minibikini committed
431
  def link_url(_, buffer, _opts), do: buffer
432 433 434 435 436 437 438 439 440 441 442 443 444

  @doc false
  def link_email(true, buffer, opts) do
    Builder.create_email_link(buffer, opts)
  end

  def link_email(_, buffer, _opts), do: buffer

  def link_extra(true, buffer, opts) do
    Builder.create_extra_link(buffer, opts)
  end

  def link_extra(_, buffer, _opts), do: buffer
445

446 447
  defp run_handler(handler, buffer, opts, user_acc) do
    case handler.(buffer, opts, user_acc) do
minibikini's avatar
minibikini committed
448 449
      {buffer, user_acc} -> {buffer, user_acc}
      buffer -> {buffer, user_acc}
450 451
    end
  end
Stephen M. Pallen's avatar
Stephen M. Pallen committed
452
end