2023-09-22 09:42:35 +00:00
|
|
|
defmodule TreeSitter do
|
|
|
|
# https://registry.npmjs.org/tree-sitter/latest
|
|
|
|
@latest_version "0.20.8"
|
|
|
|
|
|
|
|
@moduledoc """
|
|
|
|
TreeSitter is an installer and runner for [tree_sitter](https://tree-sitter.github.io/tree-sitter/).
|
|
|
|
|
|
|
|
## TreeSitter configuration
|
|
|
|
|
|
|
|
There are two global configurations for the tree_sitter application:
|
|
|
|
|
|
|
|
* `:version` - the expected tree_sitter version
|
|
|
|
|
|
|
|
* `:cacerts_path` - the directory to find certificates for
|
|
|
|
https connections
|
|
|
|
|
|
|
|
* `:path` - the path to find the tree_sitter executable at. By
|
|
|
|
default, it is automatically downloaded and placed inside
|
|
|
|
the `_build` directory of your current app
|
|
|
|
|
|
|
|
Overriding the `:path` is not recommended, as we will automatically
|
|
|
|
download and manage `tree-sitter` for you. But in case you can't download
|
|
|
|
it (for example, the npm registry is behind a proxy), you may want to
|
|
|
|
set the `:path` to a configurable system location.
|
|
|
|
|
|
|
|
For instance, you can install `tree-sitter` globally with `npm`:
|
|
|
|
|
|
|
|
$ npm install -g tree-sitter
|
|
|
|
|
|
|
|
On Unix, the executable will be at:
|
|
|
|
|
|
|
|
NPM_ROOT/tree-sitter/node_modules/@tree-sitter/TARGET/bin/tree_sitter
|
|
|
|
|
|
|
|
On Windows, it will be at:
|
|
|
|
|
|
|
|
NPM_ROOT/tree-sitter/node_modules/@tree-sitter/win32-x(32|64)/tree_sitter.exe
|
|
|
|
|
|
|
|
Where `NPM_ROOT` is the result of `npm root -g` and `TARGET` is your system
|
|
|
|
target architecture.
|
|
|
|
|
|
|
|
Once you find the location of the executable, you can store it in a
|
|
|
|
`MIX_TREE_SITTER_PATH` environment variable, which you can then read in
|
|
|
|
your configuration file:
|
|
|
|
|
|
|
|
config :tree_sitter, path: System.get_env("MIX_TREE_SITTER_PATH")
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
use Application
|
|
|
|
require Logger
|
|
|
|
|
|
|
|
@doc false
|
|
|
|
def start(_, _) do
|
|
|
|
unless Application.get_env(:tree_sitter, :version) do
|
|
|
|
Logger.warning("""
|
|
|
|
tree_sitter version is not configured. Please set it in your config files:
|
|
|
|
|
|
|
|
config :tree_sitter, :version, "#{latest_version()}"
|
|
|
|
""")
|
|
|
|
end
|
|
|
|
|
|
|
|
configured_version = configured_version()
|
|
|
|
|
|
|
|
case bin_version() do
|
|
|
|
{:ok, version} ->
|
|
|
|
if version =~ configured_version do
|
|
|
|
:ok
|
|
|
|
else
|
|
|
|
Logger.warning("""
|
|
|
|
Outdated tree_sitter version. Expected #{configured_version}, got #{version}. \
|
|
|
|
Please run `mix tree_sitter.install` or update the version in your config files.\
|
|
|
|
""")
|
|
|
|
end
|
|
|
|
|
|
|
|
:error ->
|
|
|
|
:ok
|
|
|
|
end
|
|
|
|
|
|
|
|
Supervisor.start_link([], strategy: :one_for_one, name: __MODULE__.Supervisor)
|
|
|
|
end
|
|
|
|
|
|
|
|
@doc false
|
|
|
|
# Latest known version at the time of publishing.
|
|
|
|
def latest_version, do: @latest_version
|
|
|
|
|
|
|
|
@doc """
|
|
|
|
Returns the configured tree_sitter version.
|
|
|
|
"""
|
|
|
|
def configured_version do
|
|
|
|
Application.get_env(:tree_sitter, :version, latest_version())
|
|
|
|
end
|
|
|
|
|
|
|
|
@doc """
|
|
|
|
Returns the path to the executable.
|
|
|
|
|
|
|
|
The executable may not be available if it was not yet installed.
|
|
|
|
"""
|
|
|
|
def bin_path do
|
|
|
|
name = "tree_sitter-#{target()}"
|
|
|
|
|
|
|
|
Application.get_env(:tree_sitter, :path) ||
|
|
|
|
if Code.ensure_loaded?(Mix.Project) do
|
|
|
|
Path.join(Path.dirname(Mix.Project.build_path()), name)
|
|
|
|
else
|
|
|
|
Path.expand("_build/#{name}")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
@doc """
|
|
|
|
Returns the version of the tree_sitter executable.
|
|
|
|
|
|
|
|
Returns `{:ok, version_string}` on success or `:error` when the executable
|
|
|
|
is not available.
|
|
|
|
"""
|
|
|
|
def bin_version do
|
|
|
|
path = bin_path()
|
|
|
|
|
|
|
|
with true <- File.exists?(path),
|
|
|
|
{result, 0} <- System.cmd(path, ["--version"]) do
|
|
|
|
{:ok, String.trim(result)}
|
|
|
|
else
|
|
|
|
_ -> :error
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
@doc """
|
|
|
|
Runs the given command with `args`.
|
|
|
|
|
|
|
|
The given args will be appended to the configured args.
|
|
|
|
The task output will be streamed directly to stdio. It
|
|
|
|
returns the status of the underlying call.
|
|
|
|
"""
|
|
|
|
def run(extra_args) when is_list(extra_args) do
|
2024-02-01 01:42:55 +00:00
|
|
|
extra_args
|
|
|
|
|> cmd()
|
2023-09-22 09:42:35 +00:00
|
|
|
|> elem(1)
|
|
|
|
end
|
|
|
|
|
|
|
|
defp start_unique_install_worker() do
|
|
|
|
ref =
|
|
|
|
__MODULE__.Supervisor
|
|
|
|
|> Supervisor.start_child(
|
|
|
|
Supervisor.child_spec({Task, &install/0}, restart: :transient, id: __MODULE__.Installer)
|
|
|
|
)
|
|
|
|
|> case do
|
|
|
|
{:ok, pid} -> pid
|
|
|
|
{:error, {:already_started, pid}} -> pid
|
|
|
|
end
|
|
|
|
|> Process.monitor()
|
|
|
|
|
|
|
|
receive do
|
|
|
|
{:DOWN, ^ref, _, _, _} -> :ok
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
@doc """
|
|
|
|
Installs, if not available, and then runs `tree_sitter`.
|
|
|
|
|
|
|
|
Returns the same as `run/1`.
|
|
|
|
"""
|
|
|
|
def install_and_run(args) do
|
|
|
|
File.exists?(bin_path()) || start_unique_install_worker()
|
|
|
|
|
|
|
|
run(args)
|
|
|
|
end
|
|
|
|
|
|
|
|
@doc """
|
|
|
|
Installs tree_sitter with `configured_version/0`.
|
|
|
|
"""
|
|
|
|
def install do
|
|
|
|
version = configured_version()
|
|
|
|
tmp_opts = if System.get_env("MIX_XDG"), do: %{os: :linux}, else: %{}
|
|
|
|
|
|
|
|
tmp_dir =
|
|
|
|
freshdir_p(:filename.basedir(:user_cache, "tree_sitter", tmp_opts)) ||
|
|
|
|
freshdir_p(Path.join(System.tmp_dir!(), "tree_sitter")) ||
|
|
|
|
raise "could not install tree-sitter. Set MIX_XGD=1 and then set XDG_CACHE_HOME to the path you want to use as cache"
|
|
|
|
|
|
|
|
target = target()
|
|
|
|
|
|
|
|
url =
|
|
|
|
"https://github.com/tree-sitter/tree-sitter/releases/download/v#{version}/tree-sitter-#{target}.gz"
|
|
|
|
|
|
|
|
gz = fetch_body!(url)
|
|
|
|
|
|
|
|
result_path =
|
|
|
|
case :os.type() do
|
|
|
|
{:win32, _} ->
|
|
|
|
Path.join([tmp_dir, "tree_sitter.exe"])
|
|
|
|
|
|
|
|
_ ->
|
|
|
|
Path.join([tmp_dir, "tree_sitter"])
|
|
|
|
end
|
|
|
|
|
|
|
|
gz
|
|
|
|
|> :zlib.gunzip()
|
|
|
|
|> then(fn data ->
|
|
|
|
File.write!(result_path, data)
|
|
|
|
end)
|
|
|
|
|
|
|
|
File.chmod!(result_path, 0o700)
|
|
|
|
|
|
|
|
bin_path = bin_path()
|
|
|
|
File.mkdir_p!(Path.dirname(bin_path))
|
|
|
|
|
|
|
|
File.cp!(result_path, bin_path)
|
|
|
|
end
|
|
|
|
|
2024-02-01 01:42:55 +00:00
|
|
|
@language_pattern ~r{tree-sitter-([^/]+)}
|
|
|
|
def language_to_extension do
|
|
|
|
{out, _} = cmd(["dump-languages"], [])
|
|
|
|
|
|
|
|
out
|
|
|
|
|> Enum.join("\n")
|
|
|
|
|> String.split("\n\n")
|
|
|
|
|> Enum.map(&YamlElixir.read_from_string!/1)
|
|
|
|
|> Enum.filter(&Map.get(&1, "parser"))
|
|
|
|
|> Enum.map(fn %{"parser" => parser, "file_types" => [file_type | _]} ->
|
|
|
|
[_, language] = Regex.run(@language_pattern, parser)
|
|
|
|
|
|
|
|
{language, file_type}
|
|
|
|
end)
|
|
|
|
|> Enum.into(%{})
|
|
|
|
end
|
|
|
|
|
|
|
|
def highlight_html(text, lang) do
|
|
|
|
case Map.get(language_to_extension(), lang) do
|
|
|
|
nil ->
|
|
|
|
{:ok, fallback_highlight_html(text)}
|
|
|
|
|
|
|
|
ext ->
|
|
|
|
do_highlight_html(text, ext)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp fallback_highlight_html(text) do
|
|
|
|
lines = text |> String.split("\n") |> Enum.with_index()
|
|
|
|
|
|
|
|
"<table>" <>
|
|
|
|
Enum.map_join(lines, "\n", fn {text, line} ->
|
|
|
|
~s{<tr>
|
|
|
|
<td class="line-number">#{line + 1}</td>
|
|
|
|
<td class="line">#{text}</td>
|
|
|
|
</tr>}
|
|
|
|
end) <>
|
|
|
|
"</table>"
|
|
|
|
end
|
|
|
|
|
|
|
|
defp do_highlight_html(text, ext) do
|
|
|
|
id = :crypto.strong_rand_bytes(16) |> Base.encode32()
|
|
|
|
|
|
|
|
path =
|
|
|
|
if String.starts_with?(ext, ".") do
|
|
|
|
File.mkdir_p!(Path.join([System.tmp_dir!(), id]))
|
|
|
|
|
|
|
|
Path.join([System.tmp_dir!(), id, ext])
|
|
|
|
else
|
|
|
|
Path.join([System.tmp_dir!(), "#{id}.#{ext}"])
|
|
|
|
end
|
|
|
|
|
|
|
|
File.write!(path, text)
|
|
|
|
|
|
|
|
case cmd(["highlight", "--html", path], []) do
|
|
|
|
{out, 0} ->
|
|
|
|
{:ok, Enum.join(out)}
|
|
|
|
|
|
|
|
{out, _error_status} ->
|
|
|
|
{:error, Enum.join(out)}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp cmd(extra_args, into \\ IO.stream(:stdio, :line)) do
|
|
|
|
config_directory = Application.get_env(:tree_sitter, :config_directory)
|
|
|
|
|
|
|
|
env = if config_directory, do: [env: [{"TREE_SITTER_DIR", config_directory}]], else: []
|
|
|
|
|
|
|
|
opts =
|
|
|
|
env ++
|
|
|
|
[
|
|
|
|
into: into,
|
|
|
|
stderr_to_stdout: true
|
|
|
|
]
|
|
|
|
|
|
|
|
bin_path()
|
|
|
|
|> System.cmd(extra_args, opts)
|
|
|
|
end
|
|
|
|
|
2023-09-22 09:42:35 +00:00
|
|
|
defp freshdir_p(path) do
|
|
|
|
with {:ok, _} <- File.rm_rf(path),
|
|
|
|
:ok <- File.mkdir_p(path) do
|
|
|
|
path
|
|
|
|
else
|
|
|
|
_ -> nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp target do
|
|
|
|
case :os.type() do
|
|
|
|
# Assuming it's an x86 CPU
|
|
|
|
{:win32, _} ->
|
|
|
|
wordsize = :erlang.system_info(:wordsize)
|
|
|
|
|
|
|
|
if wordsize == 8 do
|
|
|
|
"windows-x64"
|
|
|
|
else
|
|
|
|
"windows-x86"
|
|
|
|
end
|
|
|
|
|
|
|
|
{:unix, osname} ->
|
|
|
|
arch_str = :erlang.system_info(:system_architecture)
|
|
|
|
[arch | _] = arch_str |> List.to_string() |> String.split("-")
|
|
|
|
|
|
|
|
osname =
|
|
|
|
if osname == :darwin do
|
|
|
|
"macos"
|
|
|
|
else
|
|
|
|
"linux"
|
|
|
|
end
|
|
|
|
|
|
|
|
case arch do
|
|
|
|
"amd64" -> "#{osname}-arm64"
|
|
|
|
"x86_64" -> "#{osname}-x64"
|
|
|
|
"i686" -> "#{osname}-x86"
|
|
|
|
"i386" -> "#{osname}-x86"
|
|
|
|
"aarch64" -> "#{osname}-arm64"
|
|
|
|
"arm" when osname == "macos" -> "darwin-arm64"
|
|
|
|
"arm" -> "#{osname}-arm"
|
|
|
|
"armv7" <> _ -> "#{osname}-arm"
|
|
|
|
_ -> raise "tree_sitter is not available for architecture: #{arch_str}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp fetch_body!(url) do
|
|
|
|
scheme = URI.parse(url).scheme
|
|
|
|
url = String.to_charlist(url)
|
|
|
|
Logger.debug("Downloading tree_sitter from #{url}")
|
|
|
|
|
|
|
|
{:ok, _} = Application.ensure_all_started(:inets)
|
|
|
|
{:ok, _} = Application.ensure_all_started(:ssl)
|
|
|
|
|
|
|
|
if proxy = proxy_for_scheme(scheme) do
|
|
|
|
%{host: host, port: port} = URI.parse(proxy)
|
|
|
|
Logger.debug("Using #{String.upcase(scheme)}_PROXY: #{proxy}")
|
|
|
|
set_option = if "https" == scheme, do: :https_proxy, else: :proxy
|
|
|
|
:httpc.set_options([{set_option, {{String.to_charlist(host), port}, []}}])
|
|
|
|
end
|
|
|
|
|
|
|
|
# https://erlef.github.io/security-wg/secure_coding_and_deployment_hardening/inets
|
|
|
|
cacertfile = cacertfile() |> String.to_charlist()
|
|
|
|
|
|
|
|
http_options =
|
|
|
|
[
|
|
|
|
ssl: [
|
|
|
|
verify: :verify_peer,
|
|
|
|
cacertfile: cacertfile,
|
|
|
|
depth: 2,
|
|
|
|
customize_hostname_check: [
|
|
|
|
match_fun: :public_key.pkix_verify_hostname_match_fun(:https)
|
|
|
|
]
|
|
|
|
]
|
|
|
|
]
|
|
|
|
|> maybe_add_proxy_auth(scheme)
|
|
|
|
|
|
|
|
options = [body_format: :binary]
|
|
|
|
|
|
|
|
case :httpc.request(:get, {url, []}, http_options, options) do
|
|
|
|
{:ok, {{_, 200, _}, _headers, body}} ->
|
|
|
|
body
|
|
|
|
|
|
|
|
other ->
|
|
|
|
raise """
|
|
|
|
couldn't fetch #{url}: #{inspect(other)}
|
|
|
|
|
|
|
|
You may also install the "tree_sitter" executable manually, \
|
|
|
|
see the docs: https://hexdocs.pm/tree_sitter
|
|
|
|
"""
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp proxy_for_scheme("http") do
|
|
|
|
System.get_env("HTTP_PROXY") || System.get_env("http_proxy")
|
|
|
|
end
|
|
|
|
|
|
|
|
defp proxy_for_scheme("https") do
|
|
|
|
System.get_env("HTTPS_PROXY") || System.get_env("https_proxy")
|
|
|
|
end
|
|
|
|
|
|
|
|
defp maybe_add_proxy_auth(http_options, scheme) do
|
|
|
|
case proxy_auth(scheme) do
|
|
|
|
nil -> http_options
|
|
|
|
auth -> [{:proxy_auth, auth} | http_options]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp proxy_auth(scheme) do
|
|
|
|
with proxy when is_binary(proxy) <- proxy_for_scheme(scheme),
|
|
|
|
%{userinfo: userinfo} when is_binary(userinfo) <- URI.parse(proxy),
|
|
|
|
[username, password] <- String.split(userinfo, ":") do
|
|
|
|
{String.to_charlist(username), String.to_charlist(password)}
|
|
|
|
else
|
|
|
|
_ -> nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp cacertfile() do
|
|
|
|
Application.get_env(:tree_sitter, :cacerts_path) || CAStore.file_path()
|
|
|
|
end
|
|
|
|
end
|