mirror of https://github.com/qpdf/qpdf.git synced 2025-02-01 11:28:24 +00:00
Jay Berkenbilt b356b9dfa2 fix-qdf: handle object streams with > 255 objects
fix-qdf was previously hard-coding the number of bytes for the f2
field of the xref stream entry. This addresses issue #37. Thanks
aluebcke for reporting.
2015-05-24 16:52:42 -04:00

398 lines
8.2 KiB
Executable File

#!/usr/bin/env perl
require 5.008_001;
use warnings;
use strict;
use File::Basename;
my $whoami = basename($0);
my $dirname = dirname($0);
if ((@ARGV == 1) && ($ARGV[0] eq '--version'))
exec "$dirname/qpdf", '--version';
exit 2;
my $offset = 0;
my $last_offset = 0;
my $file = shift(@ARGV);
if (defined $file)
open(F, "<$file") or die "$whoami: can't open $file: $!\n";
$file = 'stdin';
open(F, "<&STDIN") or die "$whoami: can't dup stdin: $!\n";
binmode F;
binmode STDOUT;
my $line = get_line();
if (! ((defined $line) && ($line =~ m/^%PDF-1\.\d+\b/)))
die "$whoami: $file: not a pdf file\n";
print $line;
$line = get_line();
die "$whoami: $file: premature EOF\n" unless defined $line;
print $line;
$line = get_line();
if (! ((defined $line) && ($line =~ m/^%QDF-1.\d+\b/)))
die "$whoami: $file: not a qdf file\n";
print $line;
my $last_obj = 0;
my @xref = ();
my $stream_start = 0;
my $stream_length = 0;
my $xref_offset = 0;
my $xref_f1_nbytes = 0;
my $xref_f2_nbytes = 0;
my $xref_size = 0;
my $cur_state = 0;
my $st_top = ++$cur_state;
my $st_in_obj = ++$cur_state;
my $st_in_stream = ++$cur_state;
my $st_after_stream = ++$cur_state;
my $st_in_ostream_dict = ++$cur_state;
my $st_in_ostream_offsets = ++$cur_state;
my $st_in_ostream_outer = ++$cur_state;
my $st_in_ostream_obj = ++$cur_state;
my $st_in_xref_stream_dict = ++$cur_state;
my $st_in_length = ++$cur_state;
my $st_at_xref = ++$cur_state;
my $st_before_trailer = ++$cur_state;
my $st_in_trailer = ++$cur_state;
my $st_done = ++$cur_state;
my @ostream = ();
my @ostream_offsets = ();
my @ostream_discarded = ();
my $ostream_idx = 0;
my $ostream_id = 0;
my $ostream_extends = "";
my $state = $st_top;
while (defined($line = get_line()))
if ($state == $st_top)
if ($line =~ m/^(\d+) 0 obj$/)
$state = $st_in_obj;
elsif ($line =~ m/^xref$/)
$xref_offset = $last_offset;
$state = $st_at_xref;
print $line;
elsif ($state == $st_in_obj)
print $line;
if ($line =~ m/^stream$/)
$state = $st_in_stream;
$stream_start = $offset;
elsif ($line =~ m/^endobj$/)
$state = $st_top;
elsif ($line =~ m,/Type /ObjStm,)
$state = $st_in_ostream_dict;
$ostream_id = $last_obj;
elsif ($line =~ m,/Type /XRef,)
$xref_offset = $xref[-1][1];
$xref_f1_nbytes = 0;
my $t = $xref_offset;
while ($t)
$t >>= 8;
# Figure out how many bytes we need for ostream index.
# Make sure we get at least 1 byte even if there are no
# object streams.
my $max_objects = 1;
foreach my $e (@xref)
my ($type, $f1, $f2) = @$e;
if ((defined $f2) && ($f2 > $max_objects))
$max_objects = $f2;
while ($max_objects)
$max_objects >>=8;
my $esize = 1 + $xref_f1_nbytes + $xref_f2_nbytes;
$xref_size = 1 + @xref;
my $length = $xref_size * $esize;
print " /Length $length\n";
print " /W [ 1 $xref_f1_nbytes $xref_f2_nbytes ]\n";
$state = $st_in_xref_stream_dict;
elsif ($state == $st_in_ostream_dict)
if ($line =~ m/^stream/)
$state = $st_in_ostream_offsets;
push(@ostream_discarded, $line);
if ($line =~ m,/Extends (\d+ 0 R),)
$ostream_extends = $1;
# discard line
elsif ($state == $st_in_ostream_offsets)
if ($line =~ m/^\%\% Object stream: object (\d+)/)
$stream_start = $last_offset;
$state = $st_in_ostream_outer;
push(@ostream, $line);
push(@ostream_discarded, $line);
# discard line
elsif ($state == $st_in_ostream_outer)
push(@ostream_offsets, $last_offset - $stream_start);
$state = $st_in_ostream_obj;
push(@ostream, $line);
elsif ($state == $st_in_ostream_obj)
push(@ostream, $line);
if ($line =~ m/^\%\% Object stream: object (\d+)/)
$state = $st_in_ostream_outer;
elsif ($line =~ m/^endstream/)
$stream_length = $last_offset - $stream_start;
$state = $st_in_obj;
elsif ($state == $st_in_xref_stream_dict)
if ($line =~ m,/(Length|W) ,)
# already printed
elsif ($line =~ m,/Size ,)
my $size = 1 + @xref;
print " /Size $xref_size\n";
print $line;
if ($line =~ m/^stream\n/)
my $pack = "(C C$xref_f1_nbytes C$xref_f2_nbytes)";
print pack($pack, 0, 0, 0);
foreach my $x (@xref)
my ($type, $f1, $f2) = @$x;
$f2 = 0 unless defined $f2;
my @f1 = ();
my @f2 = ();
foreach my $d ([\@f1, $f1, $xref_f1_nbytes],
[\@f2, $f2, $xref_f2_nbytes])
my ($fa, $f, $nbytes) = @$d;
for (my $i = 0; $i < $nbytes; ++$i)
unshift(@$fa, $f & 0xff);
$f >>= 8;
print pack($pack, $type, @f1, @f2);
print "\nendstream\nendobj\n\n";
print "startxref\n$xref_offset\n\%\%EOF\n";
$state = $st_done;
elsif ($state == $st_in_stream)
if ($line =~ m/^endstream$/)
$stream_length = $last_offset - $stream_start;
$state = $st_after_stream;
print $line;
elsif ($state == $st_after_stream)
if ($line =~ m/^\%QDF: ignore_newline$/)
elsif ($line =~ m/^(\d+) 0 obj$/)
$state = $st_in_length;
print $line;
elsif ($state == $st_in_length)
if ($line !~ m/^\d+$/)
die "$file:$.: expected integer\n";
my $new = "$stream_length\n";
$offset -= length($line);
$offset += length($new);
print $new;
$state = $st_top;
elsif ($state == $st_at_xref)
my $n = scalar(@xref);
print "0 ", $n+1, "\n0000000000 65535 f \n";
for (@xref)
my ($type, $f1, $f2) = @$_;
printf("%010d 00000 n \n", $f1);
$state = $st_before_trailer;
elsif ($state == $st_before_trailer)
if ($line =~ m/^trailer <</)
print $line;
$state = $st_in_trailer;
# no output
elsif ($state == $st_in_trailer)
if ($line =~ m/^ \/Size \d+$/)
print " /Size ", scalar(@xref) + 1, "\n";
print $line;
if ($line =~ m/^>>$/)
print "startxref\n$xref_offset\n\%\%EOF\n";
$state = $st_done;
elsif ($state == $st_done)
# ignore
die "$whoami: $file: premature EOF\n" unless $state == $st_done;
sub get_line
my $line = scalar(<F>);
if (defined $line)
$last_offset = $offset;
$offset += length($line);
sub check_obj_id
my $cur_obj = shift;
if ($cur_obj != $last_obj + 1)
die "$file:$.: expected object ", $last_obj + 1, "\n";
$last_obj = $cur_obj;
push(@xref, [1, $last_offset]);
sub adjust_ostream_xref
push(@xref, [2, $ostream_id, $ostream_idx++]);
sub write_ostream
my $first = $ostream_offsets[0];
my $onum = $ostream_id;
my $offsets = "";
my $n = scalar(@ostream_offsets);
for (@ostream_offsets)
$_ -= $first;
$offsets .= "$onum $_\n";
my $offset_adjust = length($offsets);
$first += length($offsets);
$stream_length += length($offsets);
my $dict_data = "";
$dict_data .= " /Length $stream_length\n";
$dict_data .= " /N $n\n";
$dict_data .= " /First $first\n";
if ($ostream_extends)
$dict_data .= " /Extends $ostream_extends\n";
$dict_data .= ">>\n";
$offset_adjust += length($dict_data);
print $dict_data;
print "stream\n";
print $offsets;
foreach (@ostream)
print $_;
for (@ostream_discarded)
$offset -= length($_);
$offset += $offset_adjust;
$ostream_idx = 0;
$ostream_id = 0;
@ostream = ();
@ostream_offsets = ();
@ostream_discarded = ();
$ostream_extends = "";