Skip to content

Commit d54d99c

Browse files
committed
Add internal-tools git-to-sh to serialize a git repo partially to a shell script.
The shell-script will reproduce the repository, as long as the history is linear.
1 parent ba91274 commit d54d99c

File tree

6 files changed

+295
-100
lines changed

6 files changed

+295
-100
lines changed

tests/it/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ path = "src/main.rs"
1515
clap = { version = "4.5.16", features = ["derive"] }
1616
anyhow = "1.0.86"
1717

18-
gix = { version = "0.64.0", path = "../../gix", default-features = false, features = ["attributes"] }
18+
gix = { version = "0.64.0", path = "../../gix", default-features = false, features = ["attributes", "revision"] }
1919

tests/it/src/args.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,37 @@ pub enum Subcommands {
3131
#[clap(value_parser = AsPathSpec)]
3232
patterns: Vec<gix::pathspec::Pattern>,
3333
},
34+
/// Serialize a git repository as linear history while degenerating content into a shell script that reproduces it.
35+
#[clap(visible_alias = "gts")]
36+
GitToSh {
37+
/// The amount of commits to copy from `committish`.
38+
///
39+
/// If 0, all traversable commits will be copied.
40+
#[clap(long, short = 'c', default_value_t = 0)]
41+
count: usize,
42+
/// Do not use `copy-royal` to degenerate information of blobs, but take blobs verbatim.
43+
///
44+
/// Note that this should only be done if the source repository is purely for testing
45+
/// or was created by yourself.
46+
#[clap(long)]
47+
verbatim: bool,
48+
/// The directory into which the blobs and tree declarations will be written.
49+
#[clap(long, short = 'o', default_value = ".")]
50+
output_dir: PathBuf,
51+
/// The path to the git repository to serialize.
52+
repo_dir: PathBuf,
53+
/// The name of the directory within `output_dir` for storing blobs and trees.
54+
name: String,
55+
/// A revspec of the commit to start the iteration from, like `@`.
56+
///
57+
/// Note that the history will be serialized, and multiple parents aren't allowed.
58+
committish: String,
59+
/// The pathspecs to determine which paths to copy from each commit's tree.
60+
///
61+
/// None will copy everything.
62+
#[clap(value_parser = AsPathSpec)]
63+
patterns: Vec<gix::pathspec::Pattern>,
64+
},
3465
}
3566

3667
#[derive(Clone)]

tests/it/src/commands/copy_royal.rs

Lines changed: 100 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,111 +1,114 @@
1-
use anyhow::Context;
2-
use gix::fs::Stack;
3-
use gix::pathspec::Pattern;
4-
use std::path::{Path, PathBuf};
1+
pub(super) mod function {
2+
use anyhow::Context;
3+
use gix::fs::Stack;
4+
use gix::pathspec::Pattern;
5+
use std::path::{Path, PathBuf};
56

6-
pub fn doit(
7-
dry_run: bool,
8-
worktree_dir: &Path,
9-
destination_dir: PathBuf,
10-
patterns: Vec<Pattern>,
11-
) -> anyhow::Result<()> {
12-
let prefix = if dry_run { "WOULD" } else { "Will" };
13-
let repo = gix::open(worktree_dir)?;
14-
let index = repo.index()?;
15-
let mut specs = repo.pathspec(
16-
true,
17-
// TODO: ideally this could accept patterns already.
18-
patterns.into_iter().map(|p| p.to_bstring()),
19-
true,
20-
&index,
21-
gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping,
22-
)?;
23-
let mut create_dir = CreateDir { dry_run };
24-
let mut stack = gix::fs::Stack::new(destination_dir);
25-
for (rela_path, _entry) in specs
26-
.index_entries_with_paths(&index)
27-
.context("Didn't find a single entry to copy")?
28-
{
29-
let rela_path = gix::path::from_bstr(rela_path);
30-
let src = worktree_dir.join(&rela_path);
31-
stack.make_relative_path_current(&rela_path, &mut create_dir)?;
32-
let dst = stack.current();
7+
pub fn copy_royal(
8+
dry_run: bool,
9+
worktree_dir: &Path,
10+
destination_dir: PathBuf,
11+
patterns: Vec<Pattern>,
12+
) -> anyhow::Result<()> {
13+
let prefix = if dry_run { "WOULD" } else { "Will" };
14+
let repo = gix::open(worktree_dir)?;
15+
let index = repo.index()?;
16+
let mut specs = repo.pathspec(
17+
true,
18+
// TODO: ideally this could accept patterns already.
19+
patterns.into_iter().map(|p| p.to_bstring()),
20+
true,
21+
&index,
22+
gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping,
23+
)?;
24+
let mut create_dir = CreateDir { dry_run };
25+
let mut stack = gix::fs::Stack::new(destination_dir);
26+
for (rela_path, _entry) in specs
27+
.index_entries_with_paths(&index)
28+
.context("Didn't find a single entry to copy")?
29+
{
30+
let rela_path = gix::path::from_bstr(rela_path);
31+
let src = worktree_dir.join(&rela_path);
32+
stack.make_relative_path_current(&rela_path, &mut create_dir)?;
33+
let dst = stack.current();
3334

34-
eprintln!(
35-
"{prefix} copy '{src}' to '{dst}'",
36-
src = src.display(),
37-
dst = dst.display()
38-
);
39-
if !dry_run {
40-
let content = std::fs::read_to_string(&src).with_context(|| {
41-
format!(
42-
"Need UTF-8 decodable content in '{src}' - skip binaries with pathspec",
43-
src = src.display()
44-
)
45-
})?;
46-
std::fs::write(dst, remapped(content))?
35+
eprintln!(
36+
"{prefix} copy '{src}' to '{dst}'",
37+
src = src.display(),
38+
dst = dst.display()
39+
);
40+
if !dry_run {
41+
let content = std::fs::read_to_string(&src).with_context(|| {
42+
format!(
43+
"Need UTF-8 decodable content in '{src}' - skip binaries with pathspec",
44+
src = src.display()
45+
)
46+
})?;
47+
std::fs::write(dst, remapped(&content))?
48+
}
4749
}
50+
Ok(())
4851
}
49-
Ok(())
50-
}
5152

52-
fn remapped(i: String) -> String {
53-
i.chars()
54-
.filter_map(|c| {
55-
Some(if c.is_alphabetic() {
56-
if c.is_uppercase() {
57-
match (c as u32) % 10 {
58-
0 => 'A',
59-
1 => 'E',
60-
2 => 'I',
61-
3 => 'O',
62-
4 => 'U',
63-
5 => 'X',
64-
6 => 'R',
65-
7 => 'S',
66-
8 => 'T',
67-
9 => 'Y',
68-
_ => unreachable!(),
53+
pub fn remapped(i: &str) -> String {
54+
i.chars()
55+
.filter_map(|c| {
56+
Some(if c.is_alphabetic() {
57+
if c.is_uppercase() {
58+
match (c as u32) % 10 {
59+
0 => 'A',
60+
1 => 'E',
61+
2 => 'I',
62+
3 => 'O',
63+
4 => 'U',
64+
5 => 'X',
65+
6 => 'R',
66+
7 => 'S',
67+
8 => 'T',
68+
9 => 'Y',
69+
_ => unreachable!(),
70+
}
71+
} else {
72+
match (c as u32) % 10 {
73+
0 => 'a',
74+
1 => 'e',
75+
2 => 'i',
76+
3 => 'o',
77+
4 => 'u',
78+
5 => 'x',
79+
6 => 'r',
80+
7 => 's',
81+
8 => 't',
82+
9 => 'y',
83+
_ => unreachable!(),
84+
}
6985
}
86+
} else if c.is_whitespace() || c.is_ascii_punctuation() || c.is_ascii_digit() {
87+
c
7088
} else {
71-
match (c as u32) % 10 {
72-
0 => 'a',
73-
1 => 'e',
74-
2 => 'i',
75-
3 => 'o',
76-
4 => 'u',
77-
5 => 'x',
78-
6 => 'r',
79-
7 => 's',
80-
8 => 't',
81-
9 => 'y',
82-
_ => unreachable!(),
83-
}
84-
}
85-
} else if c.is_whitespace() || c.is_ascii_punctuation() || c.is_ascii_digit() {
86-
c
87-
} else {
88-
return None;
89+
return None;
90+
})
8991
})
90-
})
91-
.collect()
92-
}
92+
.collect()
93+
}
9394

94-
struct CreateDir {
95-
dry_run: bool,
96-
}
95+
struct CreateDir {
96+
dry_run: bool,
97+
}
9798

98-
impl gix::fs::stack::Delegate for CreateDir {
99-
fn push_directory(&mut self, stack: &Stack) -> std::io::Result<()> {
100-
if !self.dry_run && !stack.current().is_dir() {
101-
std::fs::create_dir(stack.current())?;
99+
impl gix::fs::stack::Delegate for CreateDir {
100+
fn push_directory(&mut self, stack: &Stack) -> std::io::Result<()> {
101+
if !self.dry_run && !stack.current().is_dir() {
102+
std::fs::create_dir(stack.current())?;
103+
}
104+
Ok(())
102105
}
103-
Ok(())
104-
}
105106

106-
fn push(&mut self, _is_last_component: bool, _stack: &Stack) -> std::io::Result<()> {
107-
Ok(())
108-
}
107+
fn push(&mut self, _is_last_component: bool, _stack: &Stack) -> std::io::Result<()> {
108+
Ok(())
109+
}
109110

110-
fn pop_directory(&mut self) {}
111+
fn pop_directory(&mut self) {}
112+
}
111113
}
114+
pub use function::remapped;

tests/it/src/commands/git_to_sh.rs

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
pub struct Options {
2+
pub patterns: Vec<gix::pathspec::Pattern>,
3+
pub verbatim: bool,
4+
pub max_count: usize,
5+
}
6+
7+
pub(super) mod function {
8+
use anyhow::{bail, Context};
9+
use gix::object::tree::EntryKind;
10+
use gix::objs::FindExt;
11+
use std::borrow::Cow;
12+
use std::path::Path;
13+
14+
use super::Options;
15+
16+
pub fn git_to_sh(
17+
output_dir: &Path,
18+
repo_dir: &Path,
19+
name: &str,
20+
committish: &str,
21+
mut out: impl std::io::Write,
22+
Options {
23+
patterns,
24+
verbatim,
25+
max_count,
26+
}: Options,
27+
) -> anyhow::Result<()> {
28+
let repo = gix::open(repo_dir)?;
29+
let commit = repo.rev_parse_single(committish)?.object()?.try_into_commit()?;
30+
31+
let assets = output_dir.join(name);
32+
std::fs::create_dir_all(&assets)?;
33+
34+
let mut commits = Vec::new();
35+
let mut tree_buf = Vec::new();
36+
let mut current = 0;
37+
for entry in commit.id().ancestors().first_parent_only().all()? {
38+
let entry = entry?;
39+
40+
let commit = entry.id().object()?.into_commit();
41+
commits.push((commit.id, commit.message_raw_sloppy().to_owned()));
42+
let index = repo.index_from_tree(&commit.tree_id()?)?;
43+
44+
tree_buf.clear();
45+
write_tree_as_update_index_format(&repo, &index, &mut tree_buf, &assets, verbatim, patterns.clone())?;
46+
47+
let tree_file = assets.join(format!("{}.tree", commit.id));
48+
std::fs::write(tree_file, &tree_buf)?;
49+
current += 1;
50+
51+
if current >= max_count {
52+
break;
53+
}
54+
}
55+
56+
writeln!(
57+
&mut out,
58+
"# The following is to be executed in the receiving git repository"
59+
)?;
60+
writeln!(&mut out, "ROOT=to-be-specified-by-user")?;
61+
writeln!(&mut out, "index=.git/index")?;
62+
writeln!(&mut out, "git hash-object -w -t blob -- $ROOT/{name}/*.blob")?;
63+
for (commit_id, commit_msg) in commits.iter().rev() {
64+
writeln!(&mut out, "rm \"$index\"")?;
65+
writeln!(
66+
&mut out,
67+
"git update-index --index-info < \"$ROOT/{name}/{commit_id}.tree\""
68+
)?;
69+
let commit_msg_file = assets.join(format!("{commit_id}.msg"));
70+
std::fs::write(commit_msg_file, commit_msg)?;
71+
writeln!(&mut out, "git commit --allow-empty -F \"$ROOT/{name}/{commit_id}.msg\"")?;
72+
}
73+
74+
Ok(())
75+
}
76+
77+
fn write_tree_as_update_index_format(
78+
repo: &gix::Repository,
79+
index: &gix::index::State,
80+
out: &mut dyn std::io::Write,
81+
output_dir: &Path,
82+
verbatim: bool,
83+
patterns: Vec<gix::pathspec::Pattern>,
84+
) -> anyhow::Result<()> {
85+
let mut blob_buf = Vec::new();
86+
let mut specs = repo.pathspec(
87+
true,
88+
// TODO: ideally this could accept patterns already.
89+
patterns.clone().into_iter().map(|p| p.to_bstring()),
90+
true,
91+
index,
92+
gix::worktree::stack::state::attributes::Source::IdMapping,
93+
)?;
94+
95+
for (rela_path, entry) in specs.index_entries_with_paths(index).into_iter().flatten() {
96+
if rela_path.contains(&b'\n') {
97+
bail!("Entry at '{rela_path}' contained a newline, which currently can't be encoded. Preferred newlines over NULL separation.")
98+
}
99+
100+
let (blob_id, blob_data) = match entry.mode.to_tree_entry_mode() {
101+
None => {
102+
bail!("Couldn't interpret mode of tree entry at '{rela_path}'")
103+
}
104+
Some(mode) => match mode.kind() {
105+
EntryKind::Tree => {
106+
unreachable!("Can't have trees in indices")
107+
}
108+
EntryKind::Blob | EntryKind::BlobExecutable => {
109+
let obj = repo.objects.find(&entry.id, &mut blob_buf)?;
110+
if verbatim {
111+
(entry.id, Cow::Borrowed(&blob_buf))
112+
} else {
113+
let data = std::str::from_utf8(obj.data).with_context(|| {
114+
format!("Entry at '{rela_path}' was not valid UTF8 and can't be remapped")
115+
})?;
116+
let mapped = crate::commands::copy_royal::remapped(data);
117+
(
118+
gix::objs::compute_hash(repo.object_hash(), gix::object::Kind::Blob, mapped.as_bytes()),
119+
Cow::Owned(mapped.into()),
120+
)
121+
}
122+
}
123+
EntryKind::Link => {
124+
repo.objects.find(&entry.id, &mut blob_buf)?;
125+
(entry.id, Cow::Borrowed(&blob_buf))
126+
}
127+
EntryKind::Commit => continue,
128+
},
129+
};
130+
let blob_path = output_dir.join(format!("{blob_id}.blob"));
131+
std::fs::write(blob_path, blob_data.as_ref())?;
132+
133+
writeln!(out, "{mode:06o} {blob_id}\t{rela_path}", mode = entry.mode)?;
134+
}
135+
Ok(())
136+
}
137+
}

0 commit comments

Comments
 (0)