#!/usr/bin/env perl require 5.008_001; use warnings; use strict; use File::Basename; my $whoami = basename($0); my $dirname = dirname($0); if ((@ARGV == 1) && ($ARGV[0] eq '--version')) { exec "$dirname/qpdf", '--version'; exit 2; } my $offset = 0; my $last_offset = 0; my $file = shift(@ARGV); if (defined $file) { open(F, "<$file") or die "$whoami: can't open $file: $!\n"; } else { $file = 'stdin'; open(F, "<&STDIN") or die "$whoami: can't dup stdin: $!\n"; } binmode F; binmode STDOUT; my $line = get_line(); if (! ((defined $line) && ($line =~ m/^%PDF-1\.\d+\b/))) { die "$whoami: $file: not a pdf file\n"; } print $line; $line = get_line(); die "$whoami: $file: premature EOF\n" unless defined $line; print $line; $line = get_line(); if (! ((defined $line) && ($line =~ m/^%QDF-1.\d+\b/))) { die "$whoami: $file: not a qdf file\n"; } print $line; my $last_obj = 0; my @xref = (); my $stream_start = 0; my $stream_length = 0; my $xref_offset = 0; my $xref_f1_nbytes = 0; my $xref_f2_nbytes = 0; my $xref_size = 0; my $cur_state = 0; my $st_top = ++$cur_state; my $st_in_obj = ++$cur_state; my $st_in_stream = ++$cur_state; my $st_after_stream = ++$cur_state; my $st_in_ostream_dict = ++$cur_state; my $st_in_ostream_offsets = ++$cur_state; my $st_in_ostream_outer = ++$cur_state; my $st_in_ostream_obj = ++$cur_state; my $st_in_xref_stream_dict = ++$cur_state; my $st_in_length = ++$cur_state; my $st_at_xref = ++$cur_state; my $st_before_trailer = ++$cur_state; my $st_in_trailer = ++$cur_state; my $st_done = ++$cur_state; my @ostream = (); my @ostream_offsets = (); my @ostream_discarded = (); my $ostream_idx = 0; my $ostream_id = 0; my $ostream_extends = ""; my $state = $st_top; while (defined($line = get_line())) { if ($state == $st_top) { if ($line =~ m/^(\d+) 0 obj$/) { check_obj_id($1); $state = $st_in_obj; } elsif ($line =~ m/^xref$/) { $xref_offset = $last_offset; $state = $st_at_xref; } print $line; } elsif ($state == $st_in_obj) { print $line; if ($line =~ m/^stream$/) { $state = $st_in_stream; $stream_start = $offset; } elsif ($line =~ m/^endobj$/) { $state = $st_top; } elsif ($line =~ m,/Type /ObjStm,) { $state = $st_in_ostream_dict; $ostream_id = $last_obj; } elsif ($line =~ m,/Type /XRef,) { $xref_offset = $xref[-1][1]; $xref_f1_nbytes = 0; my $t = $xref_offset; while ($t) { $t >>= 8; ++$xref_f1_nbytes; } # Figure out how many bytes we need for ostream index. # Make sure we get at least 1 byte even if there are no # object streams. my $max_objects = 1; foreach my $e (@xref) { my ($type, $f1, $f2) = @$e; if ((defined $f2) && ($f2 > $max_objects)) { $max_objects = $f2; } } while ($max_objects) { $max_objects >>=8; ++$xref_f2_nbytes; } my $esize = 1 + $xref_f1_nbytes + $xref_f2_nbytes; $xref_size = 1 + @xref; my $length = $xref_size * $esize; print " /Length $length\n"; print " /W [ 1 $xref_f1_nbytes $xref_f2_nbytes ]\n"; $state = $st_in_xref_stream_dict; } } elsif ($state == $st_in_ostream_dict) { if ($line =~ m/^stream/) { $state = $st_in_ostream_offsets; } else { push(@ostream_discarded, $line); if ($line =~ m,/Extends (\d+ 0 R),) { $ostream_extends = $1; } } # discard line } elsif ($state == $st_in_ostream_offsets) { if ($line =~ m/^\%\% Object stream: object (\d+)/) { check_obj_id($1); $stream_start = $last_offset; $state = $st_in_ostream_outer; push(@ostream, $line); } else { push(@ostream_discarded, $line); } # discard line } elsif ($state == $st_in_ostream_outer) { adjust_ostream_xref(); push(@ostream_offsets, $last_offset - $stream_start); $state = $st_in_ostream_obj; push(@ostream, $line); } elsif ($state == $st_in_ostream_obj) { push(@ostream, $line); if ($line =~ m/^\%\% Object stream: object (\d+)/) { check_obj_id($1); $state = $st_in_ostream_outer; } elsif ($line =~ m/^endstream/) { $stream_length = $last_offset - $stream_start; write_ostream(); $state = $st_in_obj; } } elsif ($state == $st_in_xref_stream_dict) { if ($line =~ m,/(Length|W) ,) { # already printed } elsif ($line =~ m,/Size ,) { my $size = 1 + @xref; print " /Size $xref_size\n"; } else { print $line; } if ($line =~ m/^stream\n/) { my $pack = "(C C$xref_f1_nbytes C$xref_f2_nbytes)"; print pack($pack, 0, 0, 0); foreach my $x (@xref) { my ($type, $f1, $f2) = @$x; $f2 = 0 unless defined $f2; my @f1 = (); my @f2 = (); foreach my $d ([\@f1, $f1, $xref_f1_nbytes], [\@f2, $f2, $xref_f2_nbytes]) { my ($fa, $f, $nbytes) = @$d; for (my $i = 0; $i < $nbytes; ++$i) { unshift(@$fa, $f & 0xff); $f >>= 8; } } print pack($pack, $type, @f1, @f2); } print "\nendstream\nendobj\n\n"; print "startxref\n$xref_offset\n\%\%EOF\n"; $state = $st_done; } } elsif ($state == $st_in_stream) { if ($line =~ m/^endstream$/) { $stream_length = $last_offset - $stream_start; $state = $st_after_stream; } print $line; } elsif ($state == $st_after_stream) { if ($line =~ m/^\%QDF: ignore_newline$/) { --$stream_length; } elsif ($line =~ m/^(\d+) 0 obj$/) { check_obj_id($1); $state = $st_in_length; } print $line; } elsif ($state == $st_in_length) { if ($line !~ m/^\d+$/) { die "$file:$.: expected integer\n"; } my $new = "$stream_length\n"; $offset -= length($line); $offset += length($new); print $new; $state = $st_top; } elsif ($state == $st_at_xref) { my $n = scalar(@xref); print "0 ", $n+1, "\n0000000000 65535 f \n"; for (@xref) { my ($type, $f1, $f2) = @$_; printf("%010d 00000 n \n", $f1); } $state = $st_before_trailer; } elsif ($state == $st_before_trailer) { if ($line =~ m/^trailer <>$/) { print "startxref\n$xref_offset\n\%\%EOF\n"; $state = $st_done; } } elsif ($state == $st_done) { # ignore } } die "$whoami: $file: premature EOF\n" unless $state == $st_done; sub get_line { my $line = scalar(); if (defined $line) { $last_offset = $offset; $offset += length($line); } $line; } sub check_obj_id { my $cur_obj = shift; if ($cur_obj != $last_obj + 1) { die "$file:$.: expected object ", $last_obj + 1, "\n"; } $last_obj = $cur_obj; push(@xref, [1, $last_offset]); } sub adjust_ostream_xref { pop(@xref); push(@xref, [2, $ostream_id, $ostream_idx++]); } sub write_ostream { my $first = $ostream_offsets[0]; my $onum = $ostream_id; my $offsets = ""; my $n = scalar(@ostream_offsets); for (@ostream_offsets) { $_ -= $first; ++$onum; $offsets .= "$onum $_\n"; } my $offset_adjust = length($offsets); $first += length($offsets); $stream_length += length($offsets); my $dict_data = ""; $dict_data .= " /Length $stream_length\n"; $dict_data .= " /N $n\n"; $dict_data .= " /First $first\n"; if ($ostream_extends) { $dict_data .= " /Extends $ostream_extends\n"; } $dict_data .= ">>\n"; $offset_adjust += length($dict_data); print $dict_data; print "stream\n"; print $offsets; foreach (@ostream) { print $_; } for (@ostream_discarded) { $offset -= length($_); } $offset += $offset_adjust; $ostream_idx = 0; $ostream_id = 0; @ostream = (); @ostream_offsets = (); @ostream_discarded = (); $ostream_extends = ""; }