#!/usr/bin/env perl

## jpg-recover -- Copyright (c) 2001-2002, Adam Glass <clarity.net@adam>
##
## jpg-recover scans through a file (probably a disk image) looking for
##  JPEG headers, copying them to sequential filenames.  I wrote this
##  when I lost a bunch of pictures to a corrupted file allocation
##  table, and it worked well for me.  Not 100% successful, but usually
##  better than nothing.
##
## NOTE!  USE THIS PROGRAM AT YOUR OWN RISK; THIS PROGRAM COMES WITH
##        NO WARRANTY WHATSOEVER.
##
## Current version:
##  $Id: jpg-recover.pl,v 1.2 2002/02/24 23:07:38 adam Exp $
##
## Permission to use, copy, modify, and distribute this software for any
## purpose with or without fee is hereby granted, provided that the above
## copyright notice and this permission notice appear in all copies.
##
## Many thanks to all the people who have sent in updates and suggestions!
##
## Generally, the following variables are fine -- you may want to tune
## them if you know what you're doing, but you shouldn't have to.
##
## $minsize may be worth fiddling with, since the end marker ('FFD9')
## is not unique -- it can occur multiple times within JPEG data.
## If $minsize is set too low, you'll end up with truncated images (false
## matches on the end marker), but if $minsize is too long, jpg-recover
## will miss the end of the file -- and combine two images or simply have
## corruption at the end of the image.  So you may need to try a number
## of different values.  (The units are bytes; the default is to not
## believe the data in the file if it's specifying an image shorter
## than 102400 bytes -- 100k.)
##
## Also, you may have JPEG start markers other than what's already
## included in the @starts array.  In which case jpg-recover probably
## won't find any of your images and you'll need to poke around and
## find JPEG start markers by hand.  Sorry!  :(
##
## ------------------------------------------------------------------------

if ($ARGV[0] ne "") {
  $infile=$ARGV[0];     # use user-defined file if one is supplied
} else {
  $infile="smcard.img"; # default input file to read (the disk image file)
}
$outtemp="lostpic{}.jpg"; # output file template: '{}' is replaced with file #
$filenum=1;             # output file numbering starts here
$position=0;            # position in input file, for reassuring output
$minsize=409600;        # ignore apparent JPEG file end markers before here
$announce=102400;       # announce position every 100k (0=disable announcemnts)
$state=0;               # current read state: 0=waiting for start marker,
                        #                     1=inside file, waiting for end
$outlen=0;              # output file length (for pretty output)
$buffer="";             # scanning buffer (leave this alone)
$maxbufsize=60;         # maximum buffer size (must be greater than markers)
$last="";               # where characters get read in (leave this alone)
@starts = (             # array of valid JPEG start markers...
  "\xff\xd8\xff\xe1",
  "\xff\xd8\xff\xe0");  # Markus Hammori: changed start headers to the necessary minimum
$end="\xff\xd9";        # JPEG end marker
# start should really match:
# "\xff\xd8\xff\xe0<any two bytes>JFIF"
# or maybe even:
# "\xff\xd8\xff\xe0<any two bytes>EXIF"
# but no one seems to comply with this ... why?

open(INF,"< $infile") or die "couldn't open $infile";
binmode(INF);
print "Scanning '$infile' looking for pictures...\n";
while(!eof(INF)) {
  $position += read(INF,$last,1);
  if (length($buffer) >= $maxbufsize) {
    $gone = substr($buffer,0,1);
    $buffer = substr($buffer,1,length($buffer)-1).$last;
  } else {
    $gone = "";
    $buffer .= $last;
  }
  if ($announce && ($position % $announce == 0)) {
    print "Passing byte $position of $infile ...\n";
  }
  if ($state == 0) {   # waiting for start
    if (matched_start($buffer)) {
      $outfile = seq_filename($outtemp,$filenum);
      print "Image #$filenum: start found, dumping to $outfile ...\n";
      $buffer="";
      open(OUTF,">$outfile");
      binmode(OUTF);
      $outlen = length($start);
      print OUTF $start;
      $state=1;
    }
  } else {               # waiting for end
    if ((index($buffer,$end) >= 0) && ($outlen > $minsize)) {
      print OUTF $gone.$buffer;
      close OUTF;
      print "Done -- wrote ~$outlen bytes to $outfile (JPEG end \@ $position)\n";
      $filenum += 1;
      $state=0;
      $buffer="";
    }
    elsif (matched_start($buffer)) {
      $newstart = index($buffer,$start);
      print OUTF $gone.substr($buffer,0,$newstart).$end;
      close OUTF;
      print "Done -- wrote ~$outlen bytes to $outfile (JPEG start \@ $position)\n";
      $filenum += 1;
      $outfile = seq_filename($outtemp,$filenum);
      print "Image #$filenum: start found, dumping to $outfile ...\n";
      open(OUTF,">$outfile");
      binmode(OUTF);
      $outlen = length($start);
      $buffer="";
      print OUTF $start;
      # leave state at 1
    } else {
      print OUTF $gone;
      $outlen += 1;
    }
  }
}
close INF;
if ($state) {
  print OUTF $gone.$buffer.$end;
  print "Done -- wrote ~$outlen bytes to $outfile (EOF \@ $position.)\n";
  close OUTF;
  $filenum += 1;
}
print "Finished scanning $infile.\n";
print "\n";
print "Created ".($filenum-1)." file(s) -- check 'em out!\n";

exit 0;

sub matched_start {
  local ($buf) = @_;
  local $i = 0;
  for($i=0;$i<@starts;$i++) {
    if (index($buf,$starts[$i]) >= 0) {
      $start = $starts[$i];
      return 1;
    }
  }
  return 0;
}

sub seq_filename {
  local ($template,$number) = @_;
  local $numstr = "";

  $numstr = sprintf("%.3d",$number);
  $template =~ s/{}/$numstr/;
  return $template;
}
