2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-11-10 23:20:58 +00:00
qpdf/qpdf/fix-qdf
Jay Berkenbilt b356b9dfa2 fix-qdf: handle object streams with > 255 objects
fix-qdf was previously hard-coding the number of bytes for the f2
field of the xref stream entry. This addresses issue #37. Thanks
aluebcke for reporting.
2015-05-24 16:52:42 -04:00

398 lines
8.2 KiB
Perl
Executable File

#!/usr/bin/env perl
require 5.008_001;
use warnings;
use strict;
use File::Basename;
my $whoami = basename($0);
my $dirname = dirname($0);
if ((@ARGV == 1) && ($ARGV[0] eq '--version'))
{
exec "$dirname/qpdf", '--version';
exit 2;
}
my $offset = 0;
my $last_offset = 0;
my $file = shift(@ARGV);
if (defined $file)
{
open(F, "<$file") or die "$whoami: can't open $file: $!\n";
}
else
{
$file = 'stdin';
open(F, "<&STDIN") or die "$whoami: can't dup stdin: $!\n";
}
binmode F;
binmode STDOUT;
my $line = get_line();
if (! ((defined $line) && ($line =~ m/^%PDF-1\.\d+\b/)))
{
die "$whoami: $file: not a pdf file\n";
}
print $line;
$line = get_line();
die "$whoami: $file: premature EOF\n" unless defined $line;
print $line;
$line = get_line();
if (! ((defined $line) && ($line =~ m/^%QDF-1.\d+\b/)))
{
die "$whoami: $file: not a qdf file\n";
}
print $line;
my $last_obj = 0;
my @xref = ();
my $stream_start = 0;
my $stream_length = 0;
my $xref_offset = 0;
my $xref_f1_nbytes = 0;
my $xref_f2_nbytes = 0;
my $xref_size = 0;
my $cur_state = 0;
my $st_top = ++$cur_state;
my $st_in_obj = ++$cur_state;
my $st_in_stream = ++$cur_state;
my $st_after_stream = ++$cur_state;
my $st_in_ostream_dict = ++$cur_state;
my $st_in_ostream_offsets = ++$cur_state;
my $st_in_ostream_outer = ++$cur_state;
my $st_in_ostream_obj = ++$cur_state;
my $st_in_xref_stream_dict = ++$cur_state;
my $st_in_length = ++$cur_state;
my $st_at_xref = ++$cur_state;
my $st_before_trailer = ++$cur_state;
my $st_in_trailer = ++$cur_state;
my $st_done = ++$cur_state;
my @ostream = ();
my @ostream_offsets = ();
my @ostream_discarded = ();
my $ostream_idx = 0;
my $ostream_id = 0;
my $ostream_extends = "";
my $state = $st_top;
while (defined($line = get_line()))
{
if ($state == $st_top)
{
if ($line =~ m/^(\d+) 0 obj$/)
{
check_obj_id($1);
$state = $st_in_obj;
}
elsif ($line =~ m/^xref$/)
{
$xref_offset = $last_offset;
$state = $st_at_xref;
}
print $line;
}
elsif ($state == $st_in_obj)
{
print $line;
if ($line =~ m/^stream$/)
{
$state = $st_in_stream;
$stream_start = $offset;
}
elsif ($line =~ m/^endobj$/)
{
$state = $st_top;
}
elsif ($line =~ m,/Type /ObjStm,)
{
$state = $st_in_ostream_dict;
$ostream_id = $last_obj;
}
elsif ($line =~ m,/Type /XRef,)
{
$xref_offset = $xref[-1][1];
$xref_f1_nbytes = 0;
my $t = $xref_offset;
while ($t)
{
$t >>= 8;
++$xref_f1_nbytes;
}
# Figure out how many bytes we need for ostream index.
# Make sure we get at least 1 byte even if there are no
# object streams.
my $max_objects = 1;
foreach my $e (@xref)
{
my ($type, $f1, $f2) = @$e;
if ((defined $f2) && ($f2 > $max_objects))
{
$max_objects = $f2;
}
}
while ($max_objects)
{
$max_objects >>=8;
++$xref_f2_nbytes;
}
my $esize = 1 + $xref_f1_nbytes + $xref_f2_nbytes;
$xref_size = 1 + @xref;
my $length = $xref_size * $esize;
print " /Length $length\n";
print " /W [ 1 $xref_f1_nbytes $xref_f2_nbytes ]\n";
$state = $st_in_xref_stream_dict;
}
}
elsif ($state == $st_in_ostream_dict)
{
if ($line =~ m/^stream/)
{
$state = $st_in_ostream_offsets;
}
else
{
push(@ostream_discarded, $line);
if ($line =~ m,/Extends (\d+ 0 R),)
{
$ostream_extends = $1;
}
}
# discard line
}
elsif ($state == $st_in_ostream_offsets)
{
if ($line =~ m/^\%\% Object stream: object (\d+)/)
{
check_obj_id($1);
$stream_start = $last_offset;
$state = $st_in_ostream_outer;
push(@ostream, $line);
}
else
{
push(@ostream_discarded, $line);
}
# discard line
}
elsif ($state == $st_in_ostream_outer)
{
adjust_ostream_xref();
push(@ostream_offsets, $last_offset - $stream_start);
$state = $st_in_ostream_obj;
push(@ostream, $line);
}
elsif ($state == $st_in_ostream_obj)
{
push(@ostream, $line);
if ($line =~ m/^\%\% Object stream: object (\d+)/)
{
check_obj_id($1);
$state = $st_in_ostream_outer;
}
elsif ($line =~ m/^endstream/)
{
$stream_length = $last_offset - $stream_start;
write_ostream();
$state = $st_in_obj;
}
}
elsif ($state == $st_in_xref_stream_dict)
{
if ($line =~ m,/(Length|W) ,)
{
# already printed
}
elsif ($line =~ m,/Size ,)
{
my $size = 1 + @xref;
print " /Size $xref_size\n";
}
else
{
print $line;
}
if ($line =~ m/^stream\n/)
{
my $pack = "(C C$xref_f1_nbytes C$xref_f2_nbytes)";
print pack($pack, 0, 0, 0);
foreach my $x (@xref)
{
my ($type, $f1, $f2) = @$x;
$f2 = 0 unless defined $f2;
my @f1 = ();
my @f2 = ();
foreach my $d ([\@f1, $f1, $xref_f1_nbytes],
[\@f2, $f2, $xref_f2_nbytes])
{
my ($fa, $f, $nbytes) = @$d;
for (my $i = 0; $i < $nbytes; ++$i)
{
unshift(@$fa, $f & 0xff);
$f >>= 8;
}
}
print pack($pack, $type, @f1, @f2);
}
print "\nendstream\nendobj\n\n";
print "startxref\n$xref_offset\n\%\%EOF\n";
$state = $st_done;
}
}
elsif ($state == $st_in_stream)
{
if ($line =~ m/^endstream$/)
{
$stream_length = $last_offset - $stream_start;
$state = $st_after_stream;
}
print $line;
}
elsif ($state == $st_after_stream)
{
if ($line =~ m/^\%QDF: ignore_newline$/)
{
--$stream_length;
}
elsif ($line =~ m/^(\d+) 0 obj$/)
{
check_obj_id($1);
$state = $st_in_length;
}
print $line;
}
elsif ($state == $st_in_length)
{
if ($line !~ m/^\d+$/)
{
die "$file:$.: expected integer\n";
}
my $new = "$stream_length\n";
$offset -= length($line);
$offset += length($new);
print $new;
$state = $st_top;
}
elsif ($state == $st_at_xref)
{
my $n = scalar(@xref);
print "0 ", $n+1, "\n0000000000 65535 f \n";
for (@xref)
{
my ($type, $f1, $f2) = @$_;
printf("%010d 00000 n \n", $f1);
}
$state = $st_before_trailer;
}
elsif ($state == $st_before_trailer)
{
if ($line =~ m/^trailer <</)
{
print $line;
$state = $st_in_trailer;
}
# no output
}
elsif ($state == $st_in_trailer)
{
if ($line =~ m/^ \/Size \d+$/)
{
print " /Size ", scalar(@xref) + 1, "\n";
}
else
{
print $line;
}
if ($line =~ m/^>>$/)
{
print "startxref\n$xref_offset\n\%\%EOF\n";
$state = $st_done;
}
}
elsif ($state == $st_done)
{
# ignore
}
}
die "$whoami: $file: premature EOF\n" unless $state == $st_done;
sub get_line
{
my $line = scalar(<F>);
if (defined $line)
{
$last_offset = $offset;
$offset += length($line);
}
$line;
}
sub check_obj_id
{
my $cur_obj = shift;
if ($cur_obj != $last_obj + 1)
{
die "$file:$.: expected object ", $last_obj + 1, "\n";
}
$last_obj = $cur_obj;
push(@xref, [1, $last_offset]);
}
sub adjust_ostream_xref
{
pop(@xref);
push(@xref, [2, $ostream_id, $ostream_idx++]);
}
sub write_ostream
{
my $first = $ostream_offsets[0];
my $onum = $ostream_id;
my $offsets = "";
my $n = scalar(@ostream_offsets);
for (@ostream_offsets)
{
$_ -= $first;
++$onum;
$offsets .= "$onum $_\n";
}
my $offset_adjust = length($offsets);
$first += length($offsets);
$stream_length += length($offsets);
my $dict_data = "";
$dict_data .= " /Length $stream_length\n";
$dict_data .= " /N $n\n";
$dict_data .= " /First $first\n";
if ($ostream_extends)
{
$dict_data .= " /Extends $ostream_extends\n";
}
$dict_data .= ">>\n";
$offset_adjust += length($dict_data);
print $dict_data;
print "stream\n";
print $offsets;
foreach (@ostream)
{
print $_;
}
for (@ostream_discarded)
{
$offset -= length($_);
}
$offset += $offset_adjust;
$ostream_idx = 0;
$ostream_id = 0;
@ostream = ();
@ostream_offsets = ();
@ostream_discarded = ();
$ostream_extends = "";
}