Steve Haslam (araqnid) wrote,
Steve Haslam
araqnid

git fsck the hard way

"git fsck" the hard way:

rm -f raw-* && perl -MCompress::Zlib=uncompress -e 'undef $/; for my $infile (@ARGV) { open(INPUT, $infile) or die "open: $infile: $!\n"; $deflated = <INPUT>; close(INPUT); $infile =~ m{objects/([0-9a-f]{2})/([0-9a-f]{38})$} or die "Bad filename: $infile\n"; $objid="$1$2"; $inflated = uncompress($deflated); ($raw = $inflated) =~ s/^(\S+) (\d+)\0// or die "$infile: bad header\n"; ($type,$length) = ($1, $2); length($raw) == $length or die "$infile: raw length mismatch\n"; $foundtype{$objid} = $type; open(OUTPUT, ">raw-$objid") or die "open: raw-$objid: $!\n"; print OUTPUT $inflated; close(OUTPUT); print "$objid\x20\x20raw-$objid\n"; }' objects/??/* | sha1sum -c && perl -w -e 'undef $/; sub get_objtype($) { open(INPUT, "raw-$_[0]") or die "object $_[0] not found\n"; $inflated2 = <INPUT>; close(INPUT); ($type2) = split(/ /, $inflated2, 2); return $type2 } sub assert_object_is($$) { $type2 = get_objtype($_[0]); $type2 eq $_[1] or die "object $_[0] is not a $_[1] but rather a $type2\n" } for my $infile (@ARGV) { open(INPUT, $infile) or die "open: $infile: $!\n"; $inflated = <INPUT>; ($header, $raw) = split(/\0/, $inflated, 2); ($type, $length) = split(/ /, $header); ($objid = $infile) =~ s{raw-}{}; print "$objid $type $length\n"; if ($type eq "commit") { ($intro) = split(/\n\n/, $raw, 2); @hdrs = map { [split(/ /, $_, 2)] } split(/\n/, $intro); $found_tree = 0; for (@hdrs) { if ($_->[0] eq "tree") { $found_tree = 1; print " -> tree $_->[1]\n"; assert_object_is($_->[1], "tree") } elsif ($_->[0] eq "parent") { print " -> commit $_->[1]\n"; assert_object_is($_->[1], "commit") } } die "$infile: no tree\n" unless ($found_tree) } elsif ($type eq "tree") { $residual = $raw; while ($residual) { ($info, $residual) = split(/\0/, $residual, 2); ($mode, $filename) = split(/ /, $info, 2); $refd_objid = join("", map { sprintf("%02x", $_) } unpack("C*", substr($residual, 0, 20))); $residual = substr($residual, 20); print " -> $filename $mode $refd_objid\n"; $refd_objtype = get_objtype($refd_objid); $refd_objtype eq "tree" || $refd_objtype eq "blob" or die "$objid: referenced object $refd_objid neither a tree nor a blob: $refd_objtype\n" } } elsif ($type eq "tag") { ($intro) = split(/\n\n/, $raw); @hdrs = map { [split(/ /, $_, 2)] } split(/\n/, $intro); ($refd_objid) = map { $_->[1] } grep { $_->[0] eq "object" } @hdrs; ($refd_objtype) = map { $_->[1] } grep { $_->[0] eq "type" } @hdrs; print " -> $refd_objtype $refd_objid\n"; assert_object_is($refd_objid, $refd_objtype) } }' raw-* && rm raw-*


NB all objects need to be unpacked first. that is left as an exercise for the reader. making the script automatically get objects from packs is left as an exercise for the particularly masochistic user.
Tags: geek, git
Subscribe
  • Post a new comment

    Error

    default userpic

    Your reply will be screened

    Your IP address will be recorded 

    When you submit the form an invisible reCAPTCHA check will be performed.
    You must follow the Privacy Policy and Google Terms of use.
  • 0 comments