mirror of
https://github.com/qpdf/qpdf.git
synced 2024-11-10 23:20:58 +00:00
b356b9dfa2
fix-qdf was previously hard-coding the number of bytes for the f2 field of the xref stream entry. This addresses issue #37. Thanks aluebcke for reporting.
398 lines
8.2 KiB
Perl
Executable File
398 lines
8.2 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
|
|
require 5.008_001;
|
|
use warnings;
|
|
use strict;
|
|
use File::Basename;
|
|
|
|
my $whoami = basename($0);
|
|
my $dirname = dirname($0);
|
|
|
|
if ((@ARGV == 1) && ($ARGV[0] eq '--version'))
|
|
{
|
|
exec "$dirname/qpdf", '--version';
|
|
exit 2;
|
|
}
|
|
|
|
my $offset = 0;
|
|
my $last_offset = 0;
|
|
|
|
my $file = shift(@ARGV);
|
|
if (defined $file)
|
|
{
|
|
open(F, "<$file") or die "$whoami: can't open $file: $!\n";
|
|
}
|
|
else
|
|
{
|
|
$file = 'stdin';
|
|
open(F, "<&STDIN") or die "$whoami: can't dup stdin: $!\n";
|
|
}
|
|
binmode F;
|
|
binmode STDOUT;
|
|
|
|
my $line = get_line();
|
|
if (! ((defined $line) && ($line =~ m/^%PDF-1\.\d+\b/)))
|
|
{
|
|
die "$whoami: $file: not a pdf file\n";
|
|
}
|
|
print $line;
|
|
$line = get_line();
|
|
die "$whoami: $file: premature EOF\n" unless defined $line;
|
|
print $line;
|
|
$line = get_line();
|
|
if (! ((defined $line) && ($line =~ m/^%QDF-1.\d+\b/)))
|
|
{
|
|
die "$whoami: $file: not a qdf file\n";
|
|
}
|
|
print $line;
|
|
|
|
my $last_obj = 0;
|
|
my @xref = ();
|
|
|
|
my $stream_start = 0;
|
|
my $stream_length = 0;
|
|
my $xref_offset = 0;
|
|
my $xref_f1_nbytes = 0;
|
|
my $xref_f2_nbytes = 0;
|
|
my $xref_size = 0;
|
|
|
|
my $cur_state = 0;
|
|
my $st_top = ++$cur_state;
|
|
my $st_in_obj = ++$cur_state;
|
|
my $st_in_stream = ++$cur_state;
|
|
my $st_after_stream = ++$cur_state;
|
|
my $st_in_ostream_dict = ++$cur_state;
|
|
my $st_in_ostream_offsets = ++$cur_state;
|
|
my $st_in_ostream_outer = ++$cur_state;
|
|
my $st_in_ostream_obj = ++$cur_state;
|
|
my $st_in_xref_stream_dict = ++$cur_state;
|
|
my $st_in_length = ++$cur_state;
|
|
my $st_at_xref = ++$cur_state;
|
|
my $st_before_trailer = ++$cur_state;
|
|
my $st_in_trailer = ++$cur_state;
|
|
my $st_done = ++$cur_state;
|
|
|
|
my @ostream = ();
|
|
my @ostream_offsets = ();
|
|
my @ostream_discarded = ();
|
|
my $ostream_idx = 0;
|
|
my $ostream_id = 0;
|
|
my $ostream_extends = "";
|
|
|
|
my $state = $st_top;
|
|
while (defined($line = get_line()))
|
|
{
|
|
if ($state == $st_top)
|
|
{
|
|
if ($line =~ m/^(\d+) 0 obj$/)
|
|
{
|
|
check_obj_id($1);
|
|
$state = $st_in_obj;
|
|
}
|
|
elsif ($line =~ m/^xref$/)
|
|
{
|
|
$xref_offset = $last_offset;
|
|
$state = $st_at_xref;
|
|
}
|
|
print $line;
|
|
}
|
|
elsif ($state == $st_in_obj)
|
|
{
|
|
print $line;
|
|
if ($line =~ m/^stream$/)
|
|
{
|
|
$state = $st_in_stream;
|
|
$stream_start = $offset;
|
|
}
|
|
elsif ($line =~ m/^endobj$/)
|
|
{
|
|
$state = $st_top;
|
|
}
|
|
elsif ($line =~ m,/Type /ObjStm,)
|
|
{
|
|
$state = $st_in_ostream_dict;
|
|
$ostream_id = $last_obj;
|
|
}
|
|
elsif ($line =~ m,/Type /XRef,)
|
|
{
|
|
$xref_offset = $xref[-1][1];
|
|
$xref_f1_nbytes = 0;
|
|
my $t = $xref_offset;
|
|
while ($t)
|
|
{
|
|
$t >>= 8;
|
|
++$xref_f1_nbytes;
|
|
}
|
|
# Figure out how many bytes we need for ostream index.
|
|
# Make sure we get at least 1 byte even if there are no
|
|
# object streams.
|
|
my $max_objects = 1;
|
|
foreach my $e (@xref)
|
|
{
|
|
my ($type, $f1, $f2) = @$e;
|
|
if ((defined $f2) && ($f2 > $max_objects))
|
|
{
|
|
$max_objects = $f2;
|
|
}
|
|
}
|
|
while ($max_objects)
|
|
{
|
|
$max_objects >>=8;
|
|
++$xref_f2_nbytes;
|
|
}
|
|
my $esize = 1 + $xref_f1_nbytes + $xref_f2_nbytes;
|
|
$xref_size = 1 + @xref;
|
|
my $length = $xref_size * $esize;
|
|
print " /Length $length\n";
|
|
print " /W [ 1 $xref_f1_nbytes $xref_f2_nbytes ]\n";
|
|
$state = $st_in_xref_stream_dict;
|
|
}
|
|
}
|
|
elsif ($state == $st_in_ostream_dict)
|
|
{
|
|
if ($line =~ m/^stream/)
|
|
{
|
|
$state = $st_in_ostream_offsets;
|
|
}
|
|
else
|
|
{
|
|
push(@ostream_discarded, $line);
|
|
if ($line =~ m,/Extends (\d+ 0 R),)
|
|
{
|
|
$ostream_extends = $1;
|
|
}
|
|
}
|
|
# discard line
|
|
}
|
|
elsif ($state == $st_in_ostream_offsets)
|
|
{
|
|
if ($line =~ m/^\%\% Object stream: object (\d+)/)
|
|
{
|
|
check_obj_id($1);
|
|
$stream_start = $last_offset;
|
|
$state = $st_in_ostream_outer;
|
|
push(@ostream, $line);
|
|
}
|
|
else
|
|
{
|
|
push(@ostream_discarded, $line);
|
|
}
|
|
# discard line
|
|
}
|
|
elsif ($state == $st_in_ostream_outer)
|
|
{
|
|
adjust_ostream_xref();
|
|
push(@ostream_offsets, $last_offset - $stream_start);
|
|
$state = $st_in_ostream_obj;
|
|
push(@ostream, $line);
|
|
}
|
|
elsif ($state == $st_in_ostream_obj)
|
|
{
|
|
push(@ostream, $line);
|
|
if ($line =~ m/^\%\% Object stream: object (\d+)/)
|
|
{
|
|
check_obj_id($1);
|
|
$state = $st_in_ostream_outer;
|
|
}
|
|
elsif ($line =~ m/^endstream/)
|
|
{
|
|
$stream_length = $last_offset - $stream_start;
|
|
write_ostream();
|
|
$state = $st_in_obj;
|
|
}
|
|
}
|
|
elsif ($state == $st_in_xref_stream_dict)
|
|
{
|
|
if ($line =~ m,/(Length|W) ,)
|
|
{
|
|
# already printed
|
|
}
|
|
elsif ($line =~ m,/Size ,)
|
|
{
|
|
my $size = 1 + @xref;
|
|
print " /Size $xref_size\n";
|
|
}
|
|
else
|
|
{
|
|
print $line;
|
|
}
|
|
if ($line =~ m/^stream\n/)
|
|
{
|
|
my $pack = "(C C$xref_f1_nbytes C$xref_f2_nbytes)";
|
|
print pack($pack, 0, 0, 0);
|
|
foreach my $x (@xref)
|
|
{
|
|
my ($type, $f1, $f2) = @$x;
|
|
$f2 = 0 unless defined $f2;
|
|
my @f1 = ();
|
|
my @f2 = ();
|
|
foreach my $d ([\@f1, $f1, $xref_f1_nbytes],
|
|
[\@f2, $f2, $xref_f2_nbytes])
|
|
{
|
|
my ($fa, $f, $nbytes) = @$d;
|
|
for (my $i = 0; $i < $nbytes; ++$i)
|
|
{
|
|
unshift(@$fa, $f & 0xff);
|
|
$f >>= 8;
|
|
}
|
|
}
|
|
print pack($pack, $type, @f1, @f2);
|
|
}
|
|
print "\nendstream\nendobj\n\n";
|
|
print "startxref\n$xref_offset\n\%\%EOF\n";
|
|
$state = $st_done;
|
|
}
|
|
}
|
|
elsif ($state == $st_in_stream)
|
|
{
|
|
if ($line =~ m/^endstream$/)
|
|
{
|
|
$stream_length = $last_offset - $stream_start;
|
|
$state = $st_after_stream;
|
|
}
|
|
print $line;
|
|
}
|
|
elsif ($state == $st_after_stream)
|
|
{
|
|
if ($line =~ m/^\%QDF: ignore_newline$/)
|
|
{
|
|
--$stream_length;
|
|
}
|
|
elsif ($line =~ m/^(\d+) 0 obj$/)
|
|
{
|
|
check_obj_id($1);
|
|
$state = $st_in_length;
|
|
}
|
|
print $line;
|
|
}
|
|
elsif ($state == $st_in_length)
|
|
{
|
|
if ($line !~ m/^\d+$/)
|
|
{
|
|
die "$file:$.: expected integer\n";
|
|
}
|
|
my $new = "$stream_length\n";
|
|
$offset -= length($line);
|
|
$offset += length($new);
|
|
print $new;
|
|
$state = $st_top;
|
|
}
|
|
elsif ($state == $st_at_xref)
|
|
{
|
|
my $n = scalar(@xref);
|
|
print "0 ", $n+1, "\n0000000000 65535 f \n";
|
|
for (@xref)
|
|
{
|
|
my ($type, $f1, $f2) = @$_;
|
|
printf("%010d 00000 n \n", $f1);
|
|
}
|
|
$state = $st_before_trailer;
|
|
}
|
|
elsif ($state == $st_before_trailer)
|
|
{
|
|
if ($line =~ m/^trailer <</)
|
|
{
|
|
print $line;
|
|
$state = $st_in_trailer;
|
|
}
|
|
# no output
|
|
}
|
|
elsif ($state == $st_in_trailer)
|
|
{
|
|
if ($line =~ m/^ \/Size \d+$/)
|
|
{
|
|
print " /Size ", scalar(@xref) + 1, "\n";
|
|
}
|
|
else
|
|
{
|
|
print $line;
|
|
}
|
|
if ($line =~ m/^>>$/)
|
|
{
|
|
print "startxref\n$xref_offset\n\%\%EOF\n";
|
|
$state = $st_done;
|
|
}
|
|
}
|
|
elsif ($state == $st_done)
|
|
{
|
|
# ignore
|
|
}
|
|
}
|
|
|
|
die "$whoami: $file: premature EOF\n" unless $state == $st_done;
|
|
|
|
sub get_line
|
|
{
|
|
my $line = scalar(<F>);
|
|
if (defined $line)
|
|
{
|
|
$last_offset = $offset;
|
|
$offset += length($line);
|
|
}
|
|
$line;
|
|
}
|
|
|
|
sub check_obj_id
|
|
{
|
|
my $cur_obj = shift;
|
|
if ($cur_obj != $last_obj + 1)
|
|
{
|
|
die "$file:$.: expected object ", $last_obj + 1, "\n";
|
|
}
|
|
$last_obj = $cur_obj;
|
|
push(@xref, [1, $last_offset]);
|
|
}
|
|
|
|
sub adjust_ostream_xref
|
|
{
|
|
pop(@xref);
|
|
push(@xref, [2, $ostream_id, $ostream_idx++]);
|
|
}
|
|
|
|
sub write_ostream
|
|
{
|
|
my $first = $ostream_offsets[0];
|
|
my $onum = $ostream_id;
|
|
my $offsets = "";
|
|
my $n = scalar(@ostream_offsets);
|
|
for (@ostream_offsets)
|
|
{
|
|
$_ -= $first;
|
|
++$onum;
|
|
$offsets .= "$onum $_\n";
|
|
}
|
|
my $offset_adjust = length($offsets);
|
|
$first += length($offsets);
|
|
$stream_length += length($offsets);
|
|
my $dict_data = "";
|
|
$dict_data .= " /Length $stream_length\n";
|
|
$dict_data .= " /N $n\n";
|
|
$dict_data .= " /First $first\n";
|
|
if ($ostream_extends)
|
|
{
|
|
$dict_data .= " /Extends $ostream_extends\n";
|
|
}
|
|
$dict_data .= ">>\n";
|
|
$offset_adjust += length($dict_data);
|
|
print $dict_data;
|
|
print "stream\n";
|
|
print $offsets;
|
|
foreach (@ostream)
|
|
{
|
|
print $_;
|
|
}
|
|
|
|
for (@ostream_discarded)
|
|
{
|
|
$offset -= length($_);
|
|
}
|
|
$offset += $offset_adjust;
|
|
|
|
$ostream_idx = 0;
|
|
$ostream_id = 0;
|
|
@ostream = ();
|
|
@ostream_offsets = ();
|
|
@ostream_discarded = ();
|
|
$ostream_extends = "";
|
|
}
|