From dba775cbc59cacffc8a77c0d5c785d33f8118eb1 Mon Sep 17 00:00:00 2001
From: Robert Prehn <3952444+prehnRA@users.noreply.github.com>
Date: Wed, 31 Jan 2024 19:42:55 -0600
Subject: [PATCH] feat: Update dump-languages logic
---
config/config.exs | 2 +-
lib/tree_sitter.ex | 89 +++++++++++++++++++++++++++++++++++++----
mix.exs | 5 ++-
mix.lock | 2 +
tree-sitter/config.json | 58 +++++++++++++++++++++++++++
5 files changed, 145 insertions(+), 11 deletions(-)
create mode 100644 tree-sitter/config.json
diff --git a/config/config.exs b/config/config.exs
index b2b7386..0d7b7a1 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -1,3 +1,3 @@
import Config
-config :tree_sitter, version: "0.20.8"
+config :tree_sitter, version: "0.20.8", config_directory: "tree-sitter"
diff --git a/lib/tree_sitter.ex b/lib/tree_sitter.ex
index 267e9a7..222bada 100644
--- a/lib/tree_sitter.ex
+++ b/lib/tree_sitter.ex
@@ -131,13 +131,8 @@ defmodule TreeSitter do
returns the status of the underlying call.
"""
def run(extra_args) when is_list(extra_args) do
- opts = [
- into: IO.stream(:stdio, :line),
- stderr_to_stdout: true
- ]
-
- bin_path()
- |> System.cmd(extra_args, opts)
+ extra_args
+ |> cmd()
|> elem(1)
end
@@ -211,6 +206,85 @@ defmodule TreeSitter do
File.cp!(result_path, bin_path)
end
+ @language_pattern ~r{tree-sitter-([^/]+)}
+ def language_to_extension do
+ {out, _} = cmd(["dump-languages"], [])
+
+ out
+ |> Enum.join("\n")
+ |> String.split("\n\n")
+ |> Enum.map(&YamlElixir.read_from_string!/1)
+ |> Enum.filter(&Map.get(&1, "parser"))
+ |> Enum.map(fn %{"parser" => parser, "file_types" => [file_type | _]} ->
+ [_, language] = Regex.run(@language_pattern, parser)
+
+ {language, file_type}
+ end)
+ |> Enum.into(%{})
+ end
+
+ def highlight_html(text, lang) do
+ case Map.get(language_to_extension(), lang) do
+ nil ->
+ {:ok, fallback_highlight_html(text)}
+
+ ext ->
+ do_highlight_html(text, ext)
+ end
+ end
+
+ defp fallback_highlight_html(text) do
+ lines = text |> String.split("\n") |> Enum.with_index()
+
+ "
" <>
+ Enum.map_join(lines, "\n", fn {text, line} ->
+ ~s{
+ #{line + 1} |
+ #{text} |
+
}
+ end) <>
+ "
"
+ end
+
+ defp do_highlight_html(text, ext) do
+ id = :crypto.strong_rand_bytes(16) |> Base.encode32()
+
+ path =
+ if String.starts_with?(ext, ".") do
+ File.mkdir_p!(Path.join([System.tmp_dir!(), id]))
+
+ Path.join([System.tmp_dir!(), id, ext])
+ else
+ Path.join([System.tmp_dir!(), "#{id}.#{ext}"])
+ end
+
+ File.write!(path, text)
+
+ case cmd(["highlight", "--html", path], []) do
+ {out, 0} ->
+ {:ok, Enum.join(out)}
+
+ {out, _error_status} ->
+ {:error, Enum.join(out)}
+ end
+ end
+
+ defp cmd(extra_args, into \\ IO.stream(:stdio, :line)) do
+ config_directory = Application.get_env(:tree_sitter, :config_directory)
+
+ env = if config_directory, do: [env: [{"TREE_SITTER_DIR", config_directory}]], else: []
+
+ opts =
+ env ++
+ [
+ into: into,
+ stderr_to_stdout: true
+ ]
+
+ bin_path()
+ |> System.cmd(extra_args, opts)
+ end
+
defp freshdir_p(path) do
with {:ok, _} <- File.rm_rf(path),
:ok <- File.mkdir_p(path) do
@@ -220,7 +294,6 @@ defmodule TreeSitter do
end
end
- # Available targets: https://github.com/evanw/tree_sitter/tree/main/npm/@tree_sitter
defp target do
case :os.type() do
# Assuming it's an x86 CPU
diff --git a/mix.exs b/mix.exs
index 3282ece..2d73233 100644
--- a/mix.exs
+++ b/mix.exs
@@ -1,7 +1,7 @@
defmodule TreeSitter.MixProject do
use Mix.Project
- @version "0.0.1"
+ @version "0.0.2"
@source_url "https://gitlab.com/mythic-insight/tree_sitter"
def project do
@@ -38,7 +38,8 @@ defmodule TreeSitter.MixProject do
defp deps do
[
{:castore, ">= 0.0.0"},
- {:ex_doc, ">= 0.0.0", only: :docs}
+ {:ex_doc, ">= 0.0.0", only: :dev, runtime: false},
+ {:yaml_elixir, "~> 2.9"}
]
end
end
diff --git a/mix.lock b/mix.lock
index 8592d73..e5410c0 100644
--- a/mix.lock
+++ b/mix.lock
@@ -6,4 +6,6 @@
"makeup_elixir": {:hex, :makeup_elixir, "0.16.1", "cc9e3ca312f1cfeccc572b37a09980287e243648108384b97ff2b76e505c3555", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "e127a341ad1b209bd80f7bd1620a15693a9908ed780c3b763bccf7d200c767c6"},
"makeup_erlang": {:hex, :makeup_erlang, "0.1.2", "ad87296a092a46e03b7e9b0be7631ddcf64c790fa68a9ef5323b6cbb36affc72", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "f3f5a1ca93ce6e092d92b6d9c049bcda58a3b617a8d888f8e7231c85630e8108"},
"nimble_parsec": {:hex, :nimble_parsec, "1.3.1", "2c54013ecf170e249e9291ed0a62e5832f70a476c61da16f6aac6dca0189f2af", [:mix], [], "hexpm", "2682e3c0b2eb58d90c6375fc0cc30bc7be06f365bf72608804fb9cffa5e1b167"},
+ "yamerl": {:hex, :yamerl, "0.10.0", "4ff81fee2f1f6a46f1700c0d880b24d193ddb74bd14ef42cb0bcf46e81ef2f8e", [:rebar3], [], "hexpm", "346adb2963f1051dc837a2364e4acf6eb7d80097c0f53cbdc3046ec8ec4b4e6e"},
+ "yaml_elixir": {:hex, :yaml_elixir, "2.9.0", "9a256da867b37b8d2c1ffd5d9de373a4fda77a32a45b452f1708508ba7bbcb53", [:mix], [{:yamerl, "~> 0.10", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm", "0cb0e7d4c56f5e99a6253ed1a670ed0e39c13fc45a6da054033928607ac08dfc"},
}
diff --git a/tree-sitter/config.json b/tree-sitter/config.json
new file mode 100644
index 0000000..fb8e54f
--- /dev/null
+++ b/tree-sitter/config.json
@@ -0,0 +1,58 @@
+{
+ "parser-directories": [
+ "/home/prehnra/github",
+ "/home/prehnra/src",
+ "/home/prehnra/source",
+ "/home/prehnra/projects",
+ "/home/prehnra/treesitter"
+ ],
+ "theme": {
+ "function": 26,
+ "variable.parameter": {
+ "underline": true
+ },
+ "constant.builtin": {
+ "bold": true,
+ "color": 94
+ },
+ "tag": 18,
+ "constructor": 136,
+ "keyword": 56,
+ "punctuation.bracket": 239,
+ "number": {
+ "color": 94,
+ "bold": true
+ },
+ "operator": {
+ "color": 239,
+ "bold": true
+ },
+ "string": 28,
+ "attribute": {
+ "italic": true,
+ "color": 124
+ },
+ "string.special": 30,
+ "variable.builtin": {
+ "bold": true
+ },
+ "constant": 94,
+ "embedded": null,
+ "type.builtin": {
+ "color": 23,
+ "bold": true
+ },
+ "property": 124,
+ "module": 136,
+ "function.builtin": {
+ "color": 26,
+ "bold": true
+ },
+ "comment": {
+ "color": 245,
+ "italic": true
+ },
+ "punctuation.delimiter": 239,
+ "type": 23
+ }
+}