The Boston Diaries

The ongoing saga of a programmer who doesn't live in Boston, nor does he even like Boston, but yet named his weblog/journal “The Boston Diaries.”

Go figure.

Wednesday, February 01, 2012

99 ways to program a hex, Part 24: more lookup tables

So we went from a character encoding specific version to a character encoding agnostic version to today's version—another character encoding specific version (ASCII to be exact). But today's version also eliminates a branch point in the code, using a 256-element string to pick which character to display as part of the hexidecimal dump.

/*************************************************************************
*
* Copyright 2012 by Sean Conner.  All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*
* Comments, questions and criticisms can be sent to: sean@conman.org
*
*************************************************************************/

/* Style: C89, const correctness, assertive, system calls, full buffering */
/*	  lookup tables */

#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

#define LINESIZE	16

/********************************************************************/

extern const char *sys_errlist[];
extern int         sys_nerr;

static void	do_dump		(const int,const int);
static size_t	dump_line	(char **const,unsigned char *,size_t,const unsigned long);
static void	hexout		(char *const,unsigned long,size_t,const int);
static void	myperror	(const char *const);
static size_t	myread		(const int,char *,size_t);
static void	mywrite		(const int,const char *const,const size_t);

/********************************************************************/

int main(const int argc,const char *const argv[])
{
  if (argc == 1)
    do_dump(STDIN_FILENO,STDOUT_FILENO);
  else
  {
    int i;
    
    for (i = 1 ; i < argc ; i++)
    {
      int fhin;
      
      fhin = open(argv[i],O_RDONLY);
      if (fhin == -1)
      {
        myperror(argv[i]);
        continue;
      }
      
      mywrite(STDOUT_FILENO,"-----",5);
      mywrite(STDOUT_FILENO,argv[i],strlen(argv[i]));
      mywrite(STDOUT_FILENO,"-----\n",6);
      
      do_dump(fhin,STDOUT_FILENO);
      if (close(fhin) < 0)
        myperror(argv[i]);
    }
  }
  
  return EXIT_SUCCESS;
}
      
/************************************************************************/     

static void do_dump(const int fhin,const int fhout)
{
  unsigned char  buffer[4096];
  char           outbuffer[75 * 109];
  char          *pout;
  unsigned long  off;
  size_t         bytes;
  size_t         count;
  
  assert(fhin  >= 0);
  assert(fhout >= 0);

  memset(outbuffer,' ',sizeof(outbuffer));
  off      = 0;
  count    = 0;
  pout     = outbuffer;
  
  while((bytes = myread(fhin,(char *)buffer,sizeof(buffer))) > 0)
  {
    unsigned char *p = buffer;
    
    for (p = buffer ; bytes > 0 ; )
    {
      size_t amount;
      
      amount    = dump_line(&pout,p,bytes,off);
      p        += amount;
      bytes    -= amount;
      off      += amount;
      count++;
      
      if (count == 109)
      {
        mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
        memset(outbuffer,' ',sizeof(outbuffer));
        count    = 0;
        pout     = outbuffer;
      }      
    }
  }
  
  if ((size_t)(pout - outbuffer) > 0)
    mywrite(fhout,outbuffer,(size_t)(pout - outbuffer));
}

/********************************************************************/

static size_t dump_line(
	char                **const pline,
	unsigned char              *p,
	size_t                      bytes,
	const unsigned long         off
)
{
  char   *line;
  char   *dh;
  char   *da;
  size_t  count;
  
  assert(pline  != NULL);
  assert(*pline != NULL);
  assert(p      != NULL);
  assert(bytes  >  0);
  
  line = *pline;
  
  hexout(line,off,8,':');
  if (bytes > LINESIZE)
    bytes = LINESIZE;
  
  p  += bytes;
  dh  = &line[10 + bytes * 3];
  da  = &line[58 + bytes];
  
  for (count = 0 ; count < bytes ; count++)
  {
    p  --;
    da --;
    dh -= 3;
    
    *da = "................................ !\"#$%&'()*+,-./0123456789:;<=>?"
	"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~."
	"................................................................"
	"........................................................"
	"........"[*p];
    
    hexout(dh,(unsigned long)*p,2,' ');
  }
  
  line[58 + count] = '\n';
  *pline = &line[59 + count];
  return count;
}

/**********************************************************************/  

static void hexout(char *const dest,unsigned long value,size_t size,const int padding)
{
  assert(dest != NULL);
  assert(size >  0);
  assert((padding >= ' ') && (padding <= '~'));
  
  dest[size] = padding;
  while(size--)
  {
    dest[size] = "0123456789ABCDEF"[value & 0x0f];
    value >>= 4;
  }
}

/************************************************************************/

static void myperror(const char *const s)
{
  int err = errno;
  
  assert(s != NULL);
  
  mywrite(STDERR_FILENO,s,strlen(s));
  mywrite(STDERR_FILENO,": ",2);
  
  if (err > sys_nerr)
    mywrite(STDERR_FILENO,"(unknown)",9);
  else
    mywrite(STDERR_FILENO,sys_errlist[err],strlen(sys_errlist[err]));
  mywrite(STDERR_FILENO,"\n",1);
}

/************************************************************************/

static size_t myread(const int fh,char *buf,size_t size)
{
  size_t amount = 0;
  
  assert(fh   >= 0);
  assert(buf  != NULL);
  assert(size >  0);
  
  while(size > 0)
  {
    ssize_t bytes;
    
    bytes = read(fh,buf,size);
    if (bytes < 0)
    {
      myperror("read()");
      exit(EXIT_FAILURE);
    }
    if (bytes == 0)
      break;
    
    amount += bytes;
    size   -= bytes;
    buf    += bytes;
  }
  
  return amount;
}

/*********************************************************************/  
  
static void mywrite(const int fh,const char *const msg,const size_t size)
{
  assert(fh   >= 0);
  assert(msg  != NULL);
  assert(size >  0);
  
  if (write(fh,msg,size) < (ssize_t)size)
  {
    if (fh != STDERR_FILENO)
      myperror("output");
      
    exit(EXIT_FAILURE);
  }
}

/***********************************************************************/

And it is faster:

[spc]lucy:~/projects/99/src>time ./23 ~/bin/firefox/libxul.so >/dev/null

real    0m0.258s
user    0m0.247s
sys     0m0.011s
[spc]lucy:~/projects/99/src>time ./24 ~/bin/firefox/libxul.so >/dev/null

real    0m0.186s
user    0m0.178s
sys     0m0.008s

About 1.3 times faster, but it is faster.

The conversion string is fixed, but that doesn't preclude a port to, say, an EBCIDIC system from using a different one, or the string being constructed at run time. The runtime generation would be more portable, but to me, that's wasted time spent generating a string that will always be the same (and frankly, if we're using this hack for speed, that's just wasted time).

Perhaps better might be several such strings, ASCII, EBCIDIC, Baudot, PETSCII and select via a command line option which one to use (defaulting to whatever character set is native for the platform the program is running on). It could be a useful thing.

But such a modification I'm leaving as an exercise for the reader.

Now, is this the fastest version possible? I'm not going to say yes this time. There might be something else that could be done to wring that last bit of performance out of this code, but at this point, I am definitely done with wringing out the speed.

I think.

Obligatory Picture

An abstract representation of where you're coming from]

Obligatory Contact Info

Obligatory Feeds

Obligatory Links

Obligatory Miscellaneous

You have my permission to link freely to any entry here. Go ahead, I won't bite. I promise.

The dates are the permanent links to that day's entries (or entry, if there is only one entry). The titles are the permanent links to that entry only. The format for the links are simple: Start with the base link for this site: https://boston.conman.org/, then add the date you are interested in, say 2000/08/01, so that would make the final URL:

https://boston.conman.org/2000/08/01

You can also specify the entire month by leaving off the day portion. You can even select an arbitrary portion of time.

You may also note subtle shading of the links and that's intentional: the “closer” the link is (relative to the page) the “brighter” it appears. It's an experiment in using color shading to denote the distance a link is from here. If you don't notice it, don't worry; it's not all that important.

It is assumed that every brand name, slogan, corporate name, symbol, design element, et cetera mentioned in these pages is a protected and/or trademarked entity, the sole property of its owner(s), and acknowledgement of this status is implied.

Copyright © 1999-2024 by Sean Conner. All Rights Reserved.