Merge pull request #129 from tuncer/erlc-speedup-v5

Speed up the compilation process v5
This commit is contained in:
Andrew Thompson 2014-03-05 13:39:55 -05:00
commit 62b006227c
8 changed files with 364 additions and 121 deletions

2
.gitignore vendored
View file

@ -4,9 +4,9 @@ rebar
*.orig
.*.swp
rt.work
.hgignore
.test
dialyzer_warnings
rebar.cmd
.eunit
deps
.rebar/*

View file

@ -8,6 +8,7 @@ all:
clean:
@rm -rf rebar ebin/*.beam inttest/rt.work rt.work .eunit
@rm -f .rebarinfo
distclean: clean
@rm -f dialyzer_warnings

1
THANKS
View file

@ -120,3 +120,4 @@ Pedram Nimreezi
Sylvain Benner
Oliver Ferrigni
Dave Thomas
Evgeniy Khramtsov

View file

@ -1,3 +1,3 @@
rebar_eunit.erl:434: Call to missing or unexported function eunit_test:function_wrapper/2
rebar_utils.erl:163: Call to missing or unexported function escript:foldl/3
rebar_utils.erl:164: Call to missing or unexported function escript:foldl/3

View file

@ -304,7 +304,7 @@ get_deps_dir(Config) ->
get_deps_dir(Config, "").
get_deps_dir(Config, App) ->
BaseDir = rebar_config:get_xconf(Config, base_dir, []),
BaseDir = rebar_utils:base_dir(Config),
DepsDir = get_shared_deps_dir(Config, "deps"),
{true, filename:join([BaseDir, DepsDir, App])}.

View file

@ -36,6 +36,17 @@
-include("rebar.hrl").
-include_lib("stdlib/include/erl_compile.hrl").
-define(ERLCINFO_VSN, 1).
-define(ERLCINFO_FILE, "erlcinfo").
-type erlc_info_v() :: {digraph:vertex(), term()} | 'false'.
-type erlc_info_e() :: {digraph:vertex(), digraph:vertex()}.
-type erlc_info() :: {list(erlc_info_v()), list(erlc_info_e())}.
-record(erlcinfo,
{
vsn = ?ERLCINFO_VSN :: pos_integer(),
info = {[], []} :: erlc_info()
}).
%% ===================================================================
%% Public API
%% ===================================================================
@ -90,7 +101,7 @@ compile(Config, _AppFile) ->
doterl_compile(Config, "ebin").
-spec clean(rebar_config:config(), file:filename()) -> 'ok'.
clean(_Config, _AppFile) ->
clean(Config, _AppFile) ->
MibFiles = rebar_utils:find_files("mibs", "^.*\\.mib\$"),
MIBs = [filename:rootname(filename:basename(MIB)) || MIB <- MibFiles],
rebar_file_utils:delete_each(
@ -103,6 +114,9 @@ clean(_Config, _AppFile) ->
[ binary_to_list(iolist_to_binary(re:replace(F, "\\.[x|y]rl$", ".erl")))
|| F <- YrlFiles ]),
%% Delete the build graph, if any
rebar_file_utils:rm_rf(erlcinfo_file(Config)),
%% Erlang compilation is recursive, so it's possible that we have a nested
%% directory structure in ebin with .beam files within. As such, we want
%% to scan whatever is left in the ebin/ directory for sub-dirs which
@ -260,7 +274,7 @@ doterl_compile(Config, OutDir) ->
doterl_compile(Config, OutDir, []).
doterl_compile(Config, OutDir, MoreSources) ->
FirstErls = rebar_config:get_list(Config, erl_first_files, []),
ErlFirstFiles = rebar_config:get_list(Config, erl_first_files, []),
ErlOpts = rebar_utils:erl_opts(Config),
?DEBUG("erl_opts ~p~n", [ErlOpts]),
%% Support the src_dirs option allowing multiple directories to
@ -268,114 +282,270 @@ doterl_compile(Config, OutDir, MoreSources) ->
%% eunit tests be separated from the core application source.
SrcDirs = rebar_utils:src_dirs(proplists:append_values(src_dirs, ErlOpts)),
RestErls = [Source || Source <- gather_src(SrcDirs, []) ++ MoreSources,
not lists:member(Source, FirstErls)],
%% Split RestErls so that parse_transforms and behaviours are instead added
%% to erl_first_files, parse transforms first.
%% This should probably be somewhat combined with inspect_epp
[ParseTransforms, Behaviours, OtherErls] =
lists:foldl(fun(F, [A, B, C]) ->
case compile_priority(F) of
parse_transform ->
[[F | A], B, C];
behaviour ->
[A, [F | B], C];
callback ->
[A, [F | B], C];
_ ->
[A, B, [F | C]]
end
end, [[], [], []], RestErls),
NewFirstErls = FirstErls ++ ParseTransforms ++ Behaviours,
not lists:member(Source, ErlFirstFiles)],
%% Make sure that ebin/ exists and is on the path
ok = filelib:ensure_dir(filename:join("ebin", "dummy.beam")),
CurrPath = code:get_path(),
true = code:add_path(filename:absname("ebin")),
OutDir1 = proplists:get_value(outdir, ErlOpts, OutDir),
rebar_base_compiler:run(Config, NewFirstErls, OtherErls,
fun(S, C) ->
internal_erl_compile(C, S, OutDir1, ErlOpts)
end),
G = init_erlcinfo(Config, RestErls),
%% Split RestErls so that files which are depended on are treated
%% like erl_first_files.
{OtherFirstErls, OtherErls} =
lists:partition(
fun(F) ->
Children = get_children(G, F),
log_files(?FMT("Files dependent on ~s", [F]), Children),
case erls(Children) of
[] ->
%% There are no files dependent on this file.
false;
_ ->
%% There are some files dependent on the file.
%% Thus the file has higher priority
%% and should be compiled in the first place.
true
end
end, RestErls),
%% Dependencies of OtherFirstErls that must be compiled first.
OtherFirstErlsDeps = lists:flatmap(
fun(Erl) -> erls(get_parents(G, Erl)) end,
OtherFirstErls),
%% NOTE: In case the way we retrieve OtherFirstErlsDeps or merge
%% it with OtherFirstErls does not result in the correct compile
%% priorities, or the method in use proves to be too slow for
%% certain projects, consider using a more elaborate method (maybe
%% digraph_utils) or alternatively getting and compiling the .erl
%% parents of an individual Source in internal_erl_compile. By not
%% handling this in internal_erl_compile, we also avoid extra
%% needs_compile/2 calls.
FirstErls = ErlFirstFiles ++ uo_merge(OtherFirstErlsDeps, OtherFirstErls),
?DEBUG("Files to compile first: ~p~n", [FirstErls]),
rebar_base_compiler:run(
Config, FirstErls, OtherErls,
fun(S, C) ->
internal_erl_compile(C, S, OutDir1, ErlOpts, G)
end),
true = code:set_path(CurrPath),
ok.
%%
%% Return all .erl files from a list of files
%%
erls(Files) ->
[Erl || Erl <- Files, filename:extension(Erl) =:= ".erl"].
%%
%% Return a list without duplicates while preserving order
%%
ulist(L) ->
ulist(L, []).
ulist([H|T], Acc) ->
case lists:member(H, T) of
true ->
ulist(T, Acc);
false ->
ulist(T, [H|Acc])
end;
ulist([], Acc) ->
lists:reverse(Acc).
%%
%% Merge two lists without duplicates while preserving order
%%
uo_merge(L1, L2) ->
lists:foldl(fun(E, Acc) -> u_add_element(E, Acc) end, ulist(L1), L2).
u_add_element(Elem, [Elem|_]=Set) -> Set;
u_add_element(Elem, [E1|Set]) -> [E1|u_add_element(Elem, Set)];
u_add_element(Elem, []) -> [Elem].
-spec include_path(file:filename(),
rebar_config:config()) -> [file:filename(), ...].
include_path(Source, Config) ->
ErlOpts = rebar_config:get(Config, erl_opts, []),
["include", filename:dirname(Source)]
++ proplists:get_all_values(i, ErlOpts).
-spec inspect(file:filename(),
[file:filename(), ...]) -> {string(), [string()]}.
inspect(Source, IncludePath) ->
ModuleDefault = filename:basename(Source, ".erl"),
case epp:open(Source, IncludePath) of
{ok, Epp} ->
inspect_epp(Epp, Source, ModuleDefault, []);
{error, Reason} ->
?DEBUG("Failed to inspect ~s: ~p\n", [Source, Reason]),
{ModuleDefault, []}
end.
-spec inspect_epp(pid(), file:filename(), file:filename(),
[string()]) -> {string(), [string()]}.
inspect_epp(Epp, Source, Module, Includes) ->
case epp:parse_erl_form(Epp) of
{ok, {attribute, _, module, ModInfo}} ->
ActualModuleStr =
case ModInfo of
%% Typical module name, single atom
ActualModule when is_atom(ActualModule) ->
atom_to_list(ActualModule);
%% Packag-ized module name, list of atoms
ActualModule when is_list(ActualModule) ->
string:join([atom_to_list(P) ||
P <- ActualModule], ".");
%% Parameterized module name, single atom
{ActualModule, _} when is_atom(ActualModule) ->
atom_to_list(ActualModule);
%% Parameterized and packagized module name, list of atoms
{ActualModule, _} when is_list(ActualModule) ->
string:join([atom_to_list(P) ||
P <- ActualModule], ".")
end,
inspect_epp(Epp, Source, ActualModuleStr, Includes);
{ok, {attribute, 1, file, {Module, 1}}} ->
inspect_epp(Epp, Source, Module, Includes);
{ok, {attribute, 1, file, {Source, 1}}} ->
inspect_epp(Epp, Source, Module, Includes);
{ok, {attribute, 1, file, {IncFile, 1}}} ->
inspect_epp(Epp, Source, Module, [IncFile | Includes]);
{eof, _} ->
epp:close(Epp),
{Module, Includes};
_ ->
inspect_epp(Epp, Source, Module, Includes)
end.
lists:usort(["include", filename:dirname(Source)]
++ proplists:get_all_values(i, ErlOpts)).
-spec needs_compile(file:filename(), file:filename(),
[string()]) -> boolean().
needs_compile(Source, Target, Hrls) ->
needs_compile(Source, Target, Parents) ->
TargetLastMod = filelib:last_modified(Target),
lists:any(fun(I) -> TargetLastMod < filelib:last_modified(I) end,
[Source] ++ Hrls).
[Source] ++ Parents).
check_erlcinfo(_Config, #erlcinfo{vsn=?ERLCINFO_VSN}) ->
ok;
check_erlcinfo(Config, #erlcinfo{vsn=Vsn}) ->
?ABORT("~s file version is incompatible. expected: ~b got: ~b~n",
[erlcinfo_file(Config), ?ERLCINFO_VSN, Vsn]);
check_erlcinfo(Config, _) ->
?ABORT("~s file is invalid. Please delete before next run.~n",
[erlcinfo_file(Config)]).
erlcinfo_file(Config) ->
filename:join([rebar_utils:base_dir(Config), ".rebar", ?ERLCINFO_FILE]).
init_erlcinfo(Config, Erls) ->
G = restore_erlcinfo(Config),
%% Get a unique list of dirs based on the source files' locations.
%% This is used for finding files in sub dirs of the configured
%% src_dirs. For example, src/sub_dir/foo.erl.
Dirs = sets:to_list(lists:foldl(
fun(Erl, Acc) ->
Dir = filename:dirname(Erl),
sets:add_element(Dir, Acc)
end, sets:new(), Erls)),
Updates = [update_erlcinfo(G, Erl, include_path(Erl, Config) ++ Dirs)
|| Erl <- Erls],
Modified = lists:member(modified, Updates),
ok = store_erlcinfo(G, Config, Modified),
G.
update_erlcinfo(G, Source, Dirs) ->
case digraph:vertex(G, Source) of
{_, LastUpdated} ->
LastModified = filelib:last_modified(Source),
if LastModified == 0 ->
%% The file doesn't exist anymore,
%% erase it from the graph.
%% All the edges will be erased automatically.
digraph:del_vertex(G, Source),
modified;
LastUpdated < LastModified ->
modify_erlcinfo(G, Source, Dirs);
modified;
true ->
unmodified
end;
false ->
modify_erlcinfo(G, Source, Dirs),
modified
end.
modify_erlcinfo(G, Source, Dirs) ->
{ok, Fd} = file:open(Source, [read]),
Incls = parse_attrs(Fd, []),
AbsIncls = expand_file_names(Incls, Dirs),
ok = file:close(Fd),
LastUpdated = {date(), time()},
digraph:add_vertex(G, Source, LastUpdated),
lists:foreach(
fun(Incl) ->
update_erlcinfo(G, Incl, Dirs),
digraph:add_edge(G, Source, Incl)
end, AbsIncls).
restore_erlcinfo(Config) ->
File = erlcinfo_file(Config),
G = digraph:new(),
case file:read_file(File) of
{ok, Data} ->
try binary_to_term(Data) of
Erlcinfo ->
ok = check_erlcinfo(Config, Erlcinfo),
#erlcinfo{info=ErlcInfo} = Erlcinfo,
{Vs, Es} = ErlcInfo,
lists:foreach(
fun({V, LastUpdated}) ->
digraph:add_vertex(G, V, LastUpdated)
end, Vs),
lists:foreach(
fun({V1, V2}) ->
digraph:add_edge(G, V1, V2)
end, Es)
catch
error:badarg ->
?ERROR(
"Failed (binary_to_term) to restore rebar info file."
" Discard file.~n", []),
ok
end;
_Err ->
ok
end,
G.
store_erlcinfo(_G, _Config, _Modified = false) ->
ok;
store_erlcinfo(G, Config, _Modified) ->
Vs = lists:map(
fun(V) ->
digraph:vertex(G, V)
end, digraph:vertices(G)),
Es = lists:flatmap(
fun({V, _}) ->
lists:map(
fun(E) ->
{_, V1, V2, _} = digraph:edge(G, E),
{V1, V2}
end, digraph:out_edges(G, V))
end, Vs),
File = erlcinfo_file(Config),
ok = filelib:ensure_dir(File),
Data = term_to_binary(#erlcinfo{info={Vs, Es}}, [{compressed, 9}]),
file:write_file(File, Data).
%% NOTE: If, for example, one of the entries in Files, refers to
%% gen_server.erl, that entry will be dropped. It is dropped because
%% such an entry usually refers to the beam file, and we don't pass a
%% list of OTP src dirs for finding gen_server.erl's full path. Also,
%% if gen_server.erl was modified, it's not rebar's task to compile a
%% new version of the beam file. Therefore, it's reasonable to drop
%% such entries. Also see process_attr(behaviour, Form, Includes).
-spec expand_file_names([file:filename()],
[file:filename()]) -> [file:filename()].
expand_file_names(Files, Dirs) ->
%% We check if Files exist by itself or within the directories
%% listed in Dirs.
%% Return the list of files matched.
lists:flatmap(
fun(Incl) ->
case filelib:is_regular(Incl) of
true ->
[Incl];
false ->
lists:flatmap(
fun(Dir) ->
FullPath = filename:join(Dir, Incl),
case filelib:is_regular(FullPath) of
true ->
[FullPath];
false ->
[]
end
end, Dirs)
end
end, Files).
-spec get_parents(digraph(), file:filename()) -> [file:filename()].
get_parents(G, Source) ->
%% Return all files which the Source depends upon.
digraph_utils:reachable_neighbours([Source], G).
-spec get_children(digraph(), file:filename()) -> [file:filename()].
get_children(G, Source) ->
%% Return all files dependent on the Source.
digraph_utils:reaching_neighbours([Source], G).
-spec internal_erl_compile(rebar_config:config(), file:filename(),
file:filename(), list()) -> 'ok' | 'skipped'.
internal_erl_compile(Config, Source, Outdir, ErlOpts) ->
file:filename(), list(),
digraph()) -> 'ok' | 'skipped'.
internal_erl_compile(Config, Source, OutDir, ErlOpts, G) ->
%% Determine the target name and includes list by inspecting the source file
{Module, Hrls} = inspect(Source, include_path(Source, Config)),
Module = filename:basename(Source, ".erl"),
Parents = get_parents(G, Source),
log_files(?FMT("~s depends on", [Source]), Parents),
%% Construct the target filename
Target = filename:join([Outdir | string:tokens(Module, ".")]) ++ ".beam",
Target = filename:join([OutDir | string:tokens(Module, ".")]) ++ ".beam",
ok = filelib:ensure_dir(Target),
%% If the file needs compilation, based on last mod date of includes or
%% the target
case needs_compile(Source, Target, Hrls) of
case needs_compile(Source, Target, Parents) of
true ->
Opts = [{outdir, filename:dirname(Target)}] ++
ErlOpts ++ [{i, "include"}, return],
@ -463,40 +633,97 @@ delete_dir(Dir, Subdirs) ->
lists:foreach(fun(D) -> delete_dir(D, dirs(D)) end, Subdirs),
file:del_dir(Dir).
-spec compile_priority(file:filename()) -> 'normal' | 'behaviour' |
'callback' |
'parse_transform'.
compile_priority(File) ->
case epp_dodger:parse_file(File) of
{error, _} ->
normal; % couldn't parse the file, default priority
{ok, Trees} ->
F2 = fun({tree,arity_qualifier,_,
{arity_qualifier,{tree,atom,_,behaviour_info},
{tree,integer,_,1}}}, _) ->
behaviour;
({tree,arity_qualifier,_,
{arity_qualifier,{tree,atom,_,parse_transform},
{tree,integer,_,2}}}, _) ->
parse_transform;
(_, Acc) ->
Acc
end,
F = fun({tree, attribute, _,
{attribute, {tree, atom, _, export},
[{tree, list, _, {list, List, none}}]}}, Acc) ->
lists:foldl(F2, Acc, List);
({tree, attribute, _,
{attribute, {tree, atom, _, callback},_}}, _Acc) ->
callback;
(_, Acc) ->
Acc
end,
lists:foldl(F, normal, Trees)
parse_attrs(Fd, Includes) ->
case io:parse_erl_form(Fd, "") of
{ok, Form, _Line} ->
case erl_syntax:type(Form) of
attribute ->
NewIncludes = process_attr(Form, Includes),
parse_attrs(Fd, NewIncludes);
_ ->
parse_attrs(Fd, Includes)
end;
{eof, _} ->
Includes;
_Err ->
parse_attrs(Fd, Includes)
end.
process_attr(Form, Includes) ->
try
AttrName = erl_syntax:atom_value(erl_syntax:attribute_name(Form)),
process_attr(AttrName, Form, Includes)
catch _:_ ->
%% TODO: We should probably try to be more specific here
%% and not suppress all errors.
Includes
end.
process_attr(import, Form, Includes) ->
case erl_syntax_lib:analyze_import_attribute(Form) of
{Mod, _Funs} ->
[atom_to_list(Mod) ++ ".erl"|Includes];
Mod ->
[atom_to_list(Mod) ++ ".erl"|Includes]
end;
process_attr(file, Form, Includes) ->
{File, _} = erl_syntax_lib:analyze_file_attribute(Form),
[File|Includes];
process_attr(include, Form, Includes) ->
[FileNode] = erl_syntax:attribute_arguments(Form),
File = erl_syntax:string_value(FileNode),
[File|Includes];
process_attr(include_lib, Form, Includes) ->
[FileNode] = erl_syntax:attribute_arguments(Form),
RawFile = erl_syntax:string_value(FileNode),
File = maybe_expand_include_lib_path(RawFile),
[File|Includes];
process_attr(behaviour, Form, Includes) ->
[FileNode] = erl_syntax:attribute_arguments(Form),
File = erl_syntax:atom_name(FileNode) ++ ".erl",
[File|Includes];
process_attr(compile, Form, Includes) ->
[Arg] = erl_syntax:attribute_arguments(Form),
case erl_syntax:concrete(Arg) of
{parse_transform, Mod} ->
[atom_to_list(Mod) ++ ".erl"|Includes];
{core_transform, Mod} ->
[atom_to_list(Mod) ++ ".erl"|Includes];
L when is_list(L) ->
lists:foldl(
fun({parse_transform, M}, Acc) ->
[atom_to_list(M) ++ ".erl"|Acc];
({core_transform, M}, Acc) ->
[atom_to_list(M) ++ ".erl"|Acc];
(_, Acc) ->
Acc
end, Includes, L)
end.
%% Given the filename from an include_lib attribute, if the path
%% exists, return unmodified, or else get the absolute ERL_LIBS
%% path.
maybe_expand_include_lib_path(File) ->
case filelib:is_regular(File) of
true ->
File;
false ->
expand_include_lib_path(File)
end.
%% Given a path like "stdlib/include/erl_compile.hrl", return
%% "OTP_INSTALL_DIR/lib/erlang/lib/stdlib-x.y.z/include/erl_compile.hrl".
%% Usually a simple [Lib, SubDir, File1] = filename:split(File) should
%% work, but to not crash when an unusual include_lib path is used,
%% utilize more elaborate logic.
expand_include_lib_path(File) ->
File1 = filename:basename(File),
Split = filename:split(filename:dirname(File)),
Lib = hd(Split),
SubDir = filename:join(tl(Split)),
Dir = code:lib_dir(list_to_atom(Lib), list_to_atom(SubDir)),
filename:join(Dir, File1).
%%
%% Ensure all files in a list are present and abort if one is missing
%%
@ -509,3 +736,13 @@ check_file(File) ->
false -> ?ABORT("File ~p is missing, aborting\n", [File]);
true -> File
end.
%% Print prefix followed by list of files. If the list is empty, print
%% on the same line, otherwise use a separate line.
log_files(Prefix, Files) ->
case Files of
[] ->
?DEBUG("~s: ~p~n", [Prefix, Files]);
_ ->
?DEBUG("~s:~n~p~n", [Prefix, Files])
end.

View file

@ -52,6 +52,7 @@
erl_opts/1,
src_dirs/1,
ebin_dir/0,
base_dir/1,
processing_base_dir/1, processing_base_dir/2]).
-include("rebar.hrl").
@ -307,12 +308,15 @@ src_dirs(SrcDirs) ->
ebin_dir() ->
filename:join(get_cwd(), "ebin").
base_dir(Config) ->
rebar_config:get_xconf(Config, base_dir).
processing_base_dir(Config) ->
Cwd = rebar_utils:get_cwd(),
processing_base_dir(Config, Cwd).
processing_base_dir(Config, Dir) ->
Dir =:= rebar_config:get_xconf(Config, base_dir).
Dir =:= base_dir(Config).
%% ====================================================================
%% Internal functions

View file

@ -146,7 +146,7 @@ code_path(Config) ->
%% functions, even though those functions are present as part
%% of compilation. H/t to @dluna. Long term we should tie more
%% properly into the overall compile code path if possible.
BaseDir = rebar_config:get_xconf(Config, base_dir),
BaseDir = rebar_utils:base_dir(Config),
[P || P <- code:get_path() ++
rebar_config:get(Config, xref_extra_paths, []) ++
[filename:join(BaseDir, filename:join(SubDir, "ebin"))