From dba775cbc59cacffc8a77c0d5c785d33f8118eb1 Mon Sep 17 00:00:00 2001
From: Robert Prehn <3952444+prehnRA@users.noreply.github.com>
Date: Wed, 31 Jan 2024 19:42:55 -0600
Subject: [PATCH] feat: Update dump-languages logic
---
 config/config.exs       |  2 +-
 lib/tree_sitter.ex      | 89 +++++++++++++++++++++++++++++++++++++----
 mix.exs                 |  5 ++-
 mix.lock                |  2 +
 tree-sitter/config.json | 58 +++++++++++++++++++++++++++
 5 files changed, 145 insertions(+), 11 deletions(-)
 create mode 100644 tree-sitter/config.json
diff --git a/config/config.exs b/config/config.exs
index b2b7386..0d7b7a1 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -1,3 +1,3 @@
 import Config
 
-config :tree_sitter, version: "0.20.8"
+config :tree_sitter, version: "0.20.8", config_directory: "tree-sitter"
diff --git a/lib/tree_sitter.ex b/lib/tree_sitter.ex
index 267e9a7..222bada 100644
--- a/lib/tree_sitter.ex
+++ b/lib/tree_sitter.ex
@@ -131,13 +131,8 @@ defmodule TreeSitter do
   returns the status of the underlying call.
   """
   def run(extra_args) when is_list(extra_args) do
-    opts = [
-      into: IO.stream(:stdio, :line),
-      stderr_to_stdout: true
-    ]
-
-    bin_path()
-    |> System.cmd(extra_args, opts)
+    extra_args
+    |> cmd()
     |> elem(1)
   end
 
@@ -211,6 +206,85 @@ defmodule TreeSitter do
     File.cp!(result_path, bin_path)
   end
 
+  @language_pattern ~r{tree-sitter-([^/]+)}
+  def language_to_extension do
+    {out, _} = cmd(["dump-languages"], [])
+
+    out
+    |> Enum.join("\n")
+    |> String.split("\n\n")
+    |> Enum.map(&YamlElixir.read_from_string!/1)
+    |> Enum.filter(&Map.get(&1, "parser"))
+    |> Enum.map(fn %{"parser" => parser, "file_types" => [file_type | _]} ->
+      [_, language] = Regex.run(@language_pattern, parser)
+
+      {language, file_type}
+    end)
+    |> Enum.into(%{})
+  end
+
+  def highlight_html(text, lang) do
+    case Map.get(language_to_extension(), lang) do
+      nil ->
+        {:ok, fallback_highlight_html(text)}
+
+      ext ->
+        do_highlight_html(text, ext)
+    end
+  end
+
+  defp fallback_highlight_html(text) do
+    lines = text |> String.split("\n") |> Enum.with_index()
+
+    "
" <>
+      Enum.map_join(lines, "\n", fn {text, line} ->
+        ~s{
+        | #{line + 1} | 
+        #{text} | 
+      
}
+      end) <>
+      "
"
+  end
+
+  defp do_highlight_html(text, ext) do
+    id = :crypto.strong_rand_bytes(16) |> Base.encode32()
+
+    path =
+      if String.starts_with?(ext, ".") do
+        File.mkdir_p!(Path.join([System.tmp_dir!(), id]))
+
+        Path.join([System.tmp_dir!(), id, ext])
+      else
+        Path.join([System.tmp_dir!(), "#{id}.#{ext}"])
+      end
+
+    File.write!(path, text)
+
+    case cmd(["highlight", "--html", path], []) do
+      {out, 0} ->
+        {:ok, Enum.join(out)}
+
+      {out, _error_status} ->
+        {:error, Enum.join(out)}
+    end
+  end
+
+  defp cmd(extra_args, into \\ IO.stream(:stdio, :line)) do
+    config_directory = Application.get_env(:tree_sitter, :config_directory)
+
+    env = if config_directory, do: [env: [{"TREE_SITTER_DIR", config_directory}]], else: []
+
+    opts =
+      env ++
+        [
+          into: into,
+          stderr_to_stdout: true
+        ]
+
+    bin_path()
+    |> System.cmd(extra_args, opts)
+  end
+
   defp freshdir_p(path) do
     with {:ok, _} <- File.rm_rf(path),
          :ok <- File.mkdir_p(path) do
@@ -220,7 +294,6 @@ defmodule TreeSitter do
     end
   end
 
-  # Available targets: https://github.com/evanw/tree_sitter/tree/main/npm/@tree_sitter
   defp target do
     case :os.type() do
       # Assuming it's an x86 CPU
diff --git a/mix.exs b/mix.exs
index 3282ece..2d73233 100644
--- a/mix.exs
+++ b/mix.exs
@@ -1,7 +1,7 @@
 defmodule TreeSitter.MixProject do
   use Mix.Project
 
-  @version "0.0.1"
+  @version "0.0.2"
   @source_url "https://gitlab.com/mythic-insight/tree_sitter"
 
   def project do
@@ -38,7 +38,8 @@ defmodule TreeSitter.MixProject do
   defp deps do
     [
       {:castore, ">= 0.0.0"},
-      {:ex_doc, ">= 0.0.0", only: :docs}
+      {:ex_doc, ">= 0.0.0", only: :dev, runtime: false},
+      {:yaml_elixir, "~> 2.9"}
     ]
   end
 end
diff --git a/mix.lock b/mix.lock
index 8592d73..e5410c0 100644
--- a/mix.lock
+++ b/mix.lock
@@ -6,4 +6,6 @@
   "makeup_elixir": {:hex, :makeup_elixir, "0.16.1", "cc9e3ca312f1cfeccc572b37a09980287e243648108384b97ff2b76e505c3555", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "e127a341ad1b209bd80f7bd1620a15693a9908ed780c3b763bccf7d200c767c6"},
   "makeup_erlang": {:hex, :makeup_erlang, "0.1.2", "ad87296a092a46e03b7e9b0be7631ddcf64c790fa68a9ef5323b6cbb36affc72", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "f3f5a1ca93ce6e092d92b6d9c049bcda58a3b617a8d888f8e7231c85630e8108"},
   "nimble_parsec": {:hex, :nimble_parsec, "1.3.1", "2c54013ecf170e249e9291ed0a62e5832f70a476c61da16f6aac6dca0189f2af", [:mix], [], "hexpm", "2682e3c0b2eb58d90c6375fc0cc30bc7be06f365bf72608804fb9cffa5e1b167"},
+  "yamerl": {:hex, :yamerl, "0.10.0", "4ff81fee2f1f6a46f1700c0d880b24d193ddb74bd14ef42cb0bcf46e81ef2f8e", [:rebar3], [], "hexpm", "346adb2963f1051dc837a2364e4acf6eb7d80097c0f53cbdc3046ec8ec4b4e6e"},
+  "yaml_elixir": {:hex, :yaml_elixir, "2.9.0", "9a256da867b37b8d2c1ffd5d9de373a4fda77a32a45b452f1708508ba7bbcb53", [:mix], [{:yamerl, "~> 0.10", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm", "0cb0e7d4c56f5e99a6253ed1a670ed0e39c13fc45a6da054033928607ac08dfc"},
 }
diff --git a/tree-sitter/config.json b/tree-sitter/config.json
new file mode 100644
index 0000000..fb8e54f
--- /dev/null
+++ b/tree-sitter/config.json
@@ -0,0 +1,58 @@
+{
+    "parser-directories": [
+        "/home/prehnra/github",
+        "/home/prehnra/src",
+        "/home/prehnra/source",
+        "/home/prehnra/projects",
+        "/home/prehnra/treesitter"
+    ],
+    "theme": {
+        "function": 26,
+        "variable.parameter": {
+            "underline": true
+        },
+        "constant.builtin": {
+            "bold": true,
+            "color": 94
+        },
+        "tag": 18,
+        "constructor": 136,
+        "keyword": 56,
+        "punctuation.bracket": 239,
+        "number": {
+            "color": 94,
+            "bold": true
+        },
+        "operator": {
+            "color": 239,
+            "bold": true
+        },
+        "string": 28,
+        "attribute": {
+            "italic": true,
+            "color": 124
+        },
+        "string.special": 30,
+        "variable.builtin": {
+            "bold": true
+        },
+        "constant": 94,
+        "embedded": null,
+        "type.builtin": {
+            "color": 23,
+            "bold": true
+        },
+        "property": 124,
+        "module": 136,
+        "function.builtin": {
+            "color": 26,
+            "bold": true
+        },
+        "comment": {
+            "color": 245,
+            "italic": true
+        },
+        "punctuation.delimiter": 239,
+        "type": 23
+    }
+}