Skip to content

Commit c6807a4

Browse files
pcloudsgitster
authored andcommitted
clone: open a shortcut for connectivity check
In order to make sure the cloned repository is good, we run "rev-list --objects --not --all $new_refs" on the repository. This is expensive on large repositories. This patch attempts to mitigate the impact in this special case. In the "good" clone case, we only have one pack. If all of the following are met, we can be sure that all objects reachable from the new refs exist, which is the intention of running "rev-list ...": - all refs point to an object in the pack - there are no dangling pointers in any object in the pack - no objects in the pack point to objects outside the pack The second and third checks can be done with the help of index-pack as a slight variation of --strict check (which introduces a new condition for the shortcut: pack transfer must be used and the number of objects large enough to call index-pack). The first is checked in check_everything_connected after we get an "ok" from index-pack. "index-pack + new checks" is still faster than the current "index-pack + rev-list", which is the whole point of this patch. If any of the conditions fail, we fall back to the good old but expensive "rev-list ..". In that case it's even more expensive because we have to pay for the new checks in index-pack. But that should only happen when the other side is either buggy or malicious. Cloning linux-2.6 over file:// before after real 3m25.693s 2m53.050s user 5m2.037s 4m42.396s sys 0m13.750s 0m16.574s A more realistic test with ssh:// over wireless before after real 11m26.629s 10m4.213s user 5m43.196s 5m19.444s sys 0m35.812s 0m37.630s This shortcut is not applied to shallow clones, partly because shallow clones should have no more objects than a usual fetch and the cost of rev-list is acceptable, partly to avoid dealing with corner cases when grafting is involved. This shortcut does not apply to unpack-objects code path either because the number of objects must be small in order to trigger that code path. Signed-off-by: Nguyễn Thái Ngọc Duy <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 920734b commit c6807a4

File tree

9 files changed

+94
-15
lines changed

9 files changed

+94
-15
lines changed

Documentation/git-index-pack.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ OPTIONS
7474
--strict::
7575
Die, if the pack contains broken objects or links.
7676

77+
--check-self-contained-and-connected::
78+
Die if the pack contains broken links. For internal use only.
79+
7780
--threads=<n>::
7881
Specifies the number of threads to spawn when resolving
7982
deltas. This requires that index-pack be compiled with

builtin/clone.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -542,13 +542,15 @@ static void update_remote_refs(const struct ref *refs,
542542
const struct ref *mapped_refs,
543543
const struct ref *remote_head_points_at,
544544
const char *branch_top,
545-
const char *msg)
545+
const char *msg,
546+
struct transport *transport)
546547
{
547548
const struct ref *rm = mapped_refs;
548549

549550
if (0 <= option_verbosity)
550551
printf(_("Checking connectivity... "));
551-
if (check_everything_connected(iterate_ref_map, 0, &rm))
552+
if (check_everything_connected_with_transport(iterate_ref_map,
553+
0, &rm, transport))
552554
die(_("remote did not send all necessary objects"));
553555
if (0 <= option_verbosity)
554556
printf(_("done\n"));
@@ -893,6 +895,9 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
893895
if (option_upload_pack)
894896
transport_set_option(transport, TRANS_OPT_UPLOADPACK,
895897
option_upload_pack);
898+
899+
if (transport->smart_options && !option_depth)
900+
transport->smart_options->check_self_contained_and_connected = 1;
896901
}
897902

898903
refs = transport_get_remote_refs(transport);
@@ -954,7 +959,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
954959
transport_fetch_refs(transport, mapped_refs);
955960

956961
update_remote_refs(refs, mapped_refs, remote_head_points_at,
957-
branch_top.buf, reflog_msg.buf);
962+
branch_top.buf, reflog_msg.buf, transport);
958963

959964
update_head(our_head_points_at, remote_head, reflog_msg.buf);
960965

builtin/index-pack.c

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,10 @@ static int nr_threads;
7777

7878
static int from_stdin;
7979
static int strict;
80+
static int do_fsck_object;
8081
static int verbose;
8182
static int show_stat;
83+
static int check_self_contained_and_connected;
8284

8385
static struct progress *progress;
8486

@@ -187,31 +189,34 @@ static int mark_link(struct object *obj, int type, void *data)
187189

188190
/* The content of each linked object must have been checked
189191
or it must be already present in the object database */
190-
static void check_object(struct object *obj)
192+
static unsigned check_object(struct object *obj)
191193
{
192194
if (!obj)
193-
return;
195+
return 0;
194196

195197
if (!(obj->flags & FLAG_LINK))
196-
return;
198+
return 0;
197199

198200
if (!(obj->flags & FLAG_CHECKED)) {
199201
unsigned long size;
200202
int type = sha1_object_info(obj->sha1, &size);
201203
if (type != obj->type || type <= 0)
202204
die(_("object of unexpected type"));
203205
obj->flags |= FLAG_CHECKED;
204-
return;
206+
return 1;
205207
}
208+
209+
return 0;
206210
}
207211

208-
static void check_objects(void)
212+
static unsigned check_objects(void)
209213
{
210-
unsigned i, max;
214+
unsigned i, max, foreign_nr = 0;
211215

212216
max = get_max_object_index();
213217
for (i = 0; i < max; i++)
214-
check_object(get_indexed_object(i));
218+
foreign_nr += check_object(get_indexed_object(i));
219+
return foreign_nr;
215220
}
216221

217222

@@ -756,7 +761,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
756761
obj = parse_object_buffer(sha1, type, size, buf, &eaten);
757762
if (!obj)
758763
die(_("invalid %s"), typename(type));
759-
if (fsck_object(obj, 1, fsck_error_function))
764+
if (do_fsck_object &&
765+
fsck_object(obj, 1, fsck_error_function))
760766
die(_("Error in object"));
761767
if (fsck_walk(obj, mark_link, NULL))
762768
die(_("Not all child objects of %s are reachable"), sha1_to_hex(obj->sha1));
@@ -1490,6 +1496,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
14901496
struct pack_idx_entry **idx_objects;
14911497
struct pack_idx_option opts;
14921498
unsigned char pack_sha1[20];
1499+
unsigned foreign_nr = 1; /* zero is a "good" value, assume bad */
14931500

14941501
if (argc == 2 && !strcmp(argv[1], "-h"))
14951502
usage(index_pack_usage);
@@ -1511,6 +1518,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
15111518
fix_thin_pack = 1;
15121519
} else if (!strcmp(arg, "--strict")) {
15131520
strict = 1;
1521+
do_fsck_object = 1;
1522+
} else if (!strcmp(arg, "--check-self-contained-and-connected")) {
1523+
strict = 1;
1524+
check_self_contained_and_connected = 1;
15141525
} else if (!strcmp(arg, "--verify")) {
15151526
verify = 1;
15161527
} else if (!strcmp(arg, "--verify-stat")) {
@@ -1624,7 +1635,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
16241635
conclude_pack(fix_thin_pack, curr_pack, pack_sha1);
16251636
free(deltas);
16261637
if (strict)
1627-
check_objects();
1638+
foreign_nr = check_objects();
16281639

16291640
if (show_stat)
16301641
show_pack_info(stat_only);
@@ -1650,5 +1661,11 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
16501661
if (index_name == NULL)
16511662
free((void *) curr_index);
16521663

1664+
/*
1665+
* Let the caller know this pack is not self contained
1666+
*/
1667+
if (check_self_contained_and_connected && foreign_nr)
1668+
return 1;
1669+
16531670
return 0;
16541671
}

connected.c

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@
22
#include "run-command.h"
33
#include "sigchain.h"
44
#include "connected.h"
5+
#include "transport.h"
56

7+
int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data)
8+
{
9+
return check_everything_connected_with_transport(fn, quiet, cb_data, NULL);
10+
}
611
/*
712
* If we feed all the commits we want to verify to this command
813
*
@@ -14,18 +19,34 @@
1419
*
1520
* Returns 0 if everything is connected, non-zero otherwise.
1621
*/
17-
int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data)
22+
int check_everything_connected_with_transport(sha1_iterate_fn fn,
23+
int quiet,
24+
void *cb_data,
25+
struct transport *transport)
1826
{
1927
struct child_process rev_list;
2028
const char *argv[] = {"rev-list", "--objects",
2129
"--stdin", "--not", "--all", NULL, NULL};
2230
char commit[41];
2331
unsigned char sha1[20];
2432
int err = 0;
33+
struct packed_git *new_pack = NULL;
2534

2635
if (fn(cb_data, sha1))
2736
return err;
2837

38+
if (transport && transport->smart_options &&
39+
transport->smart_options->self_contained_and_connected &&
40+
transport->pack_lockfile &&
41+
!suffixcmp(transport->pack_lockfile, ".keep")) {
42+
struct strbuf idx_file = STRBUF_INIT;
43+
strbuf_addstr(&idx_file, transport->pack_lockfile);
44+
strbuf_setlen(&idx_file, idx_file.len - 5); /* ".keep" */
45+
strbuf_addstr(&idx_file, ".idx");
46+
new_pack = add_packed_git(idx_file.buf, idx_file.len, 1);
47+
strbuf_release(&idx_file);
48+
}
49+
2950
if (quiet)
3051
argv[5] = "--quiet";
3152

@@ -42,6 +63,17 @@ int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data)
4263

4364
commit[40] = '\n';
4465
do {
66+
/*
67+
* If index-pack already checked that:
68+
* - there are no dangling pointers in the new pack
69+
* - the pack is self contained
70+
* Then if the updated ref is in the new pack, then we
71+
* are sure the ref is good and not sending it to
72+
* rev-list for verification.
73+
*/
74+
if (new_pack && find_pack_entry_one(sha1, new_pack))
75+
continue;
76+
4577
memcpy(commit, sha1_to_hex(sha1), 40);
4678
if (write_in_full(rev_list.in, commit, 41) < 0) {
4779
if (errno != EPIPE && errno != EINVAL)

connected.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#ifndef CONNECTED_H
22
#define CONNECTED_H
33

4+
struct transport;
5+
46
/*
57
* Take callback data, and return next object name in the buffer.
68
* When called after returning the name for the last object, return -1
@@ -16,5 +18,8 @@ typedef int (*sha1_iterate_fn)(void *, unsigned char [20]);
1618
* Return 0 if Ok, non zero otherwise (i.e. some missing objects)
1719
*/
1820
extern int check_everything_connected(sha1_iterate_fn, int quiet, void *cb_data);
21+
extern int check_everything_connected_with_transport(sha1_iterate_fn, int quiet,
22+
void *cb_data,
23+
struct transport *transport);
1924

2025
#endif /* CONNECTED_H */

fetch-pack.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,7 @@ static int get_pack(struct fetch_pack_args *args,
691691
const char **av;
692692
int do_keep = args->keep_pack;
693693
struct child_process cmd;
694+
int ret;
694695

695696
memset(&demux, 0, sizeof(demux));
696697
if (use_sideband) {
@@ -747,11 +748,14 @@ static int get_pack(struct fetch_pack_args *args,
747748
strcpy(keep_arg + s, "localhost");
748749
*av++ = keep_arg;
749750
}
751+
if (args->check_self_contained_and_connected)
752+
*av++ = "--check-self-contained-and-connected";
750753
}
751754
else {
752755
*av++ = "unpack-objects";
753756
if (args->quiet || args->no_progress)
754757
*av++ = "-q";
758+
args->check_self_contained_and_connected = 0;
755759
}
756760
if (*hdr_arg)
757761
*av++ = hdr_arg;
@@ -772,7 +776,12 @@ static int get_pack(struct fetch_pack_args *args,
772776
close(cmd.out);
773777
}
774778

775-
if (finish_command(&cmd))
779+
ret = finish_command(&cmd);
780+
if (!ret || (args->check_self_contained_and_connected && ret == 1))
781+
args->self_contained_and_connected =
782+
args->check_self_contained_and_connected &&
783+
ret == 0;
784+
else
776785
die("%s failed", argv[0]);
777786
if (use_sideband && finish_async(&demux))
778787
die("error in sideband demultiplexer");

fetch-pack.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ struct fetch_pack_args {
1616
verbose:1,
1717
no_progress:1,
1818
include_tag:1,
19-
stateless_rpc:1;
19+
stateless_rpc:1,
20+
check_self_contained_and_connected:1,
21+
self_contained_and_connected:1;
2022
};
2123

2224
/*

transport.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,8 @@ static int fetch_refs_via_pack(struct transport *transport,
534534
args.quiet = (transport->verbose < 0);
535535
args.no_progress = !transport->progress;
536536
args.depth = data->options.depth;
537+
args.check_self_contained_and_connected =
538+
data->options.check_self_contained_and_connected;
537539

538540
if (!data->got_remote_heads) {
539541
connect_setup(transport, 0, 0);
@@ -551,6 +553,8 @@ static int fetch_refs_via_pack(struct transport *transport,
551553
refs = NULL;
552554
data->conn = NULL;
553555
data->got_remote_heads = 0;
556+
data->options.self_contained_and_connected =
557+
args.self_contained_and_connected;
554558

555559
free_refs(refs_tmp);
556560

transport.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ struct git_transport_options {
88
unsigned thin : 1;
99
unsigned keep : 1;
1010
unsigned followtags : 1;
11+
unsigned check_self_contained_and_connected : 1;
12+
unsigned self_contained_and_connected : 1;
1113
int depth;
1214
const char *uploadpack;
1315
const char *receivepack;

0 commit comments

Comments
 (0)