#!/usr/bin/perl -sw -Ietc
use strict;

use Entity qw(
  %ENTITY
  set_num
  set_num_diff
  check
  check_eq
);
use Misc();
use Segment();
use SegmentSet();
use Section();
use SectionSet();
use Disasm();

#
# getHexEnv
#
sub getHexEnv($)
{
  return hex(Misc::get_env(shift()));
}

#
# global variables
#

my $ELF_ADDR_SIZE = Misc::get_env('TEVWH_ELF_ADDR_SIZE');
my $ELF_BASE = getHexEnv('TEVWH_ELF_BASE');
my $ELF_ALIGN = getHexEnv('TEVWH_ELF_ALIGN');
my $UNAME = Misc::get_env('TEVWH_UNAME');
my $ARCH = Misc::get_env('TEVWH_ARCH');
my $OUT = Misc::get_env('TEVWH_OUT');
my $TMP = Misc::get_env('TEVWH_TMP');

#
# first_line
#
sub first_line($;$)
{
  my $line = shift;
  my $base = shift;
  $ENTITY{$base} = $$line[0];
  return undef;
}

#
# credits
#
sub credits($;$)
{
  my $credits = shift;
  my $line = join(', ', sort(@$credits));
  $line =~ s/\s*\n//g;
  $ENTITY{'credits'} = $line;

  return undef;
}

#
# sh_variables
#
sub sh_variables($;$)
{
  my $line = shift;
  my $base = shift() . '.' ;

  for $_(@$line)
  {
    # catch hexadecimal values through variable postfix
    if (m/^([\w]+)_x=([A-Fa-f0-9]+)/)
    {
      my $value = hex($2);
      my $var = $1; $var =~ s/_/./g;
      set_num $base . $var, $value;
    }
    elsif (m/^([\w]+)=([A-Fa-f0-9]+)/)
    {
      my $value = int($2);
      my $var = $1; $var =~ s/_/./g;
      set_num $base . $var, $value;
    }
  }
  return undef;
}

#
# od_entry_point
#
sub od_entry_point($;$)
{
  my $line = shift;
  my $base = shift;

  return "\$#\$line == $#$line" if ($#$line != 0);
  my @number = split(/ /, $$line[0]);
  return "\$#number == $#number" if ($#number != 1);

  my $number = $number[0];
  $number =~ s/ *0x//;
  my $start = hex($number);

  set_num $base . '.start', $start;
  my $ofs = $start - $ELF_BASE;
  set_num $base . '.ofs', $ofs;

  return undef;
}

#
# nm_write
#
sub nm_write($;$)
{
  my $line = shift;
  my $base = shift() . '.nm.';

  for $_(@$line)
  {
    next if (!(m/\s_+write\b/));
    my @number = split;
    return "\$#number == $#number" if ($#number != 2);

    set_num $base . 'value', hex($number[0]);
    $ENTITY{$base . 'name'} = $number[2];
    return undef;
  }
  return 'No symbol matching "libc_write" found.';
}

#
# ls
#
sub ls($$)
{
  my $ref_line = shift;
  my $base = shift;

  my $nr = 0;
  while(my $line0 = $$ref_line[0])
  {
    if (!($line0 =~ m/^-r[w-]/))
    {
      return ($nr == 0) ? "Expected -rw or -r-\ngot [$line0]" : undef;
    }
    my @number = split(/ +/, shift(@$ref_line));
    return "\$#number == $#number" if ($#number < 4);
    set_num $base . '.' . $nr++ . '.size', $number[4];
  }
  return undef;
}

#
# disasm
#
sub disasm($$)
{
  my $ref_line = shift;
  my $base = shift;

  my $d = Disasm->new($ARCH, $base);
  return $d->disasm($ref_line);
}

#
# scanner_summary
#
sub scanner_summary($$)
{
  my $line = shift;
  my $base = shift() . '.';

  my @assignment = split /[; ]+/, $$line[$#$line];
  for my $assignment(@assignment)
  {
    if ($assignment =~ m/^(\w+)=(0x)([A-Fa-f0-9]+)$/)
      { set_num $base . $1, hex($3); }
    elsif ($assignment =~ m/^(\w+)=([0-9]+)$/)
      { set_num $base . $1, $2; }
    else
      { return "Illegal assignment $assignment"; }
  }
  return undef;
}

#
# gdb_stack
#
sub gdb_stack($$)
{
  my $line = shift;
  my $base = shift() . '.gdb.stack';

  my $frame;
  my $value = 0;
  for $_(@$line)
  {
    if (s/^\(gdb\)\s+#// || s/^#//)
    {
      my @word = split;
      $frame = $base . '.frame' . $word[0];
      my $prefix = $frame . '.ret';
      set_num $prefix, hex($word[1]);
      $ENTITY{$prefix} = $word[3];
    }
    elsif (s/^\s*at\s+//)
    {
      my @word = split /:/;

      my $prefix = $frame . '.source';
      set_num $prefix, $word[1];
      $ENTITY{$prefix} = $word[0];
    }
    elsif (s/^\(gdb\) 0x([0-9A-Fa-f]+):\s+/$1 /)
    {
      my @number = split;
      my $addr = hex($number[0]);

      for my $number(@number[1..$#number])
      {
        my $prefix = $base . $value++;
        set_num $prefix, hex($number);
	set_num $prefix . '.addr', $addr;
	$addr += 4;
      }
    }
  }
  return undef;
}

#
# segments
#
sub segments
{
  my $ref_line = shift;
  my $base = shift;
  my $readelf = shift;
  my $filename = shift;

  my $msg = ls($ref_line, $base);
  return 'ls: ' . $msg if defined($msg);

  my $set = new SegmentSet($readelf);
  $msg = $set->read($ref_line);
  return $msg if defined($msg);

  set_num $base . '.e_phnum', $set->{e_phnum};
  set_num $base . '.e_phoff', $set->{e_phoff};
  my $entry_point = $set->{ENTRY_POINT};
  set_num $base . '.entry.point', $entry_point;
  set_num $base . '.start.ofs', $entry_point - $ELF_BASE;
  
  my %count_type;
  my $ref_segment = $set->{SEGMENT};
  for my $segment(@$ref_segment)
  {
    my $type_name = $segment->{TypeName};
    my $count = ++$count_type{$type_name};
    my $name = $base . '.' . $type_name . $count;

    my $offset = hex($segment->{Offset});
    my $virtaddr = hex($segment->{VirtAddr});
    my $filesiz = hex($segment->{FileSiz});
    my $memsiz = hex($segment->{MemSiz});

    set_num $name . '.offset', $offset;
    set_num $name . '.virtaddr', $virtaddr;
    set_num $name . '.filesiz', $filesiz;
    set_num $name . '.memsiz', $memsiz;
    set_num $name . '.virt.end', $virtaddr + $memsiz;
    set_num $name . '.file.end', $offset + $filesiz;
    set_num $name . '.align', hex($segment->{Align});
  }
  
  my $ref_load = $set->{LOAD};
  my $load1 = $$ref_load[0];
  my $align;
  if (defined($load1))
  {
    $align = hex($load1->{Align});
    return "\$align = $align != $ELF_ALIGN" if ($align != $ELF_ALIGN);

    my $virtaddr = hex($load1->{VirtAddr});
    return sprintf("\$virtaddr = %#x != %#x", $virtaddr, $ELF_BASE)
    if ($virtaddr != $ELF_BASE);

    # 'evil.magic.ofs.D' is set by parse_od()
    my $ofs = $ENTITY{$base . '.ofs.D'};
    if (defined($ofs))
    {
      my $codesize = hex($load1->{FileSiz}) - $ofs;
      set_num $base . '.codesize', $codesize;
      my $bloat = $codesize / check($base . '.0.size.D');

      my $name = $base . '.bloat';
      die "defined($name) [$ENTITY{$name}] vs. [$bloat]"
      if (defined($ENTITY{$name}));

      $ENTITY{$name} = sprintf("%.3f", $bloat);
      $ENTITY{$name . '.percent'} = sprintf("%.0f", $bloat * 100);
    }
  }

  my $load2 = $$ref_load[1];
  if (defined($load2))
  {
    my $name = $base . '.LOAD1.virt.end.D';
    my $end = $ENTITY{$name} || return "undefined $name";
    set_num $base . '.entry.point.dist.to.end', $end - $entry_point;

    my ( $file_diff, $virt_diff ) = $load1->calc_gap($load2);
    $base .= '.LOAD2.';
    set_num $base . 'file.diff', $file_diff;
    set_num $base . 'virt.diff', $virt_diff;
    my $align_diff = $virt_diff - $align;
    set_num $base . 'diff.align', $align_diff if ($align_diff >= 0);
  }
  return undef;
} 

#
# sections
# 
sub sections($$)
{ 
  my $ref_line = shift;
  my $base = shift;
  my $from_readelf = shift;
  my $filename = shift;

  my $set = new SectionSet($from_readelf);
  my $msg = $set->read($ref_line);
  return $msg if defined($msg);

  my $section = $set->{'.text'};
  return 'No section ".text".' if (!defined($section));

  my $e_shnum = $set->{e_shnum};
  if (defined($e_shnum))
  {
    $e_shnum = hex($e_shnum);
    set_num($base . '.e_shnum', $e_shnum);
    my $e_shoff = $set->{e_shoff};
    if (defined($e_shoff))
    {
      $e_shoff = hex($e_shoff);
      set_num $base . '.e_shoff', $e_shoff;
      my $sizeof_Shdr = Entity::check("elf.sizeof.Elf$ELF_ADDR_SIZE.Shdr.D");
      set_num $base . '.section.table.end',
	$e_shoff + $e_shnum * $sizeof_Shdr;
    }
  }
  $base .= '.text.';
  set_num $base . 'addr', hex($section->{ADDR});
  set_num $base . 'off', hex($section->{OFFSET});
  set_num $base . 'size', hex($section->{SIZE});
  return undef;
} 

#
# main
#

$ENTITY{'base.addr'} = sprintf('0x%x', $ELF_BASE);

Misc::parse_file_set 0,
  [ 'credits', 'CREDITS' ],
  [ 'sh_variables', "$OUT/magic_elf/addr_of_main", 'magic.elf' ],
  [ 'sh_variables', "$OUT/evil_magic/ofs_entry", 'elf' ],
  [ 'disasm', "$OUT/magic_elf/gdb", 'magic_elf.gdb' ],
  [ 'disasm', "$OUT/evil_magic/static_main.gdb", 'magic_elf_static.gdb' ],
  [ 'scanner_summary', "$OUT/scanner/segment_padding/big.dynamic",
	'segment.padding.big.d' ],
  [ 'scanner_summary', "$OUT/scanner/segment_padding/big.static",
	'segment.padding.big.s' ],
  [ 'scanner_summary', "$OUT/scanner/additional_cs/big.dynamic",
	'additional.cs.big.d' ],
  [ 'scanner_summary', "$OUT/scanner/additional_cs/big.static",
	'additional.cs.big.s' ],
  [ 'segments', "$OUT/additional_cs/objdump", 'additional.cs', 0 ],

  # set evil.magic.ofs.D, required by segments below
  [ 'od_entry_point', "$OUT/evil_magic/e_entry", 'evil.magic' ],

  [ 'nm_write', "$OUT/evil_magic/nm", 'evil.magic' ],
  [ 'segments', "$OUT/segment_padding/objdump", 'infe1', 0 ],
  [ 'disasm', "$OUT/entry_point/sh.gdb", 'sh.gdb' ],
  [ 'ls', "ls -l $TMP/one_step_closer/i1/infection |", 'infection' ],
  [ 'first_line', "$OUT/scanner/segment_padding/infect", 'target.sh' ]
  ;
Misc::parse_file_set 1,
  [ 'sections', "$OUT/sections/readelf", 'evil.magic', 1 ],
  [ 'sections', "$OUT/sections/objdump", 'evil.magic', 0 ]
  ;
Misc::parse_file_set 1,
  [ 'sections', "$OUT/sections/sh/readelf", 'sh', 1 ],
  [ 'sections', "$OUT/sections/sh/objdump", 'sh', 0 ]
  ;
Misc::parse_file_set 1,
  [ 'segments', "$OUT/segments/readelf", 'evil.magic', 1 ],
  [ 'segments', "$OUT/segments/objdump", 'evil.magic', 0 ]
  ;
Misc::parse_file_set 1,
  [ 'segments', "$OUT/segments/sh/readelf", 'sh', 1 ],
  [ 'segments', "$OUT/segments/sh/objdump", 'sh', 0 ]
  ;

if ($OUT =~ m#out/i386-re.*-linux#)
{
  Misc::parse_file_set 0,
    [ 'disasm', "$OUT/entry_point/sh.disasm", 'sh.disasm' ],
    [ 'disasm', "$OUT/entry_point/e2.disasm", 'infe2.disasm' ],
    [ 'disasm', "$OUT/entry_point/e3.disasm", 'infe3.disasm' ],
    [ 'gdb_stack', "$OUT/stub_revisited/stack", 'evil.magic' ],
    ;
  
  # check 'evil.magic.LOAD1.filesiz.D', 0x97;
}

set_num_diff 'additional.cs.0.size.delta',
  'additional.cs.0.size.D', 'additional.cs.2.size.D';

check 'sh.entry.point.D', check('sh.text.addr.D');
check 'evil.magic.entry.point.D', check('evil.magic.start.D');

# check_eq 'sh.gdb.call0.name', '__libc_start_main';
# check 'evil.magic.gdb.stack4.D',
# check('evil.magic.gdb.stack.frame1.ret.D');

set_num_diff 'infe1.0.size.delta',
  'infe1.0.size.D', 'infe1.2.size.D';
set_num_diff 'infe1.LOAD1.filesiz.delta',
  'infe1.LOAD1.filesiz.D', 'sh.LOAD1.filesiz.D';
set_num_diff 'infe1.LOAD2.offset.delta',
  'infe1.LOAD2.offset.D', 'sh.LOAD2.offset.D';
set_num_diff 'infe1.DYNAMIC1.offset.delta',
  'infe1.DYNAMIC1.offset.D', 'sh.DYNAMIC1.offset.D';

open(XML, "> $::xml") || die "$!";
for my $name(sort(keys(%ENTITY)))
{
  printf XML "<!ENTITY calc.%s \"%s\">\n", $name, $ENTITY{$name};
}
close XML;
