The Boston Diaries

The ongoing saga of a programmer who doesn't live in Boston, nor does he even like Boston, but yet named his weblog/journal “The Boston Diaries.”

Go figure.

Saturday, January 28, 2012

99 ways to program a hex, Part 20: C89, const correctness, assertive, system calls

When last we left the C versions, we pretty much hit the limit of what we could do using the standard C library to remain portable (well, we did use a GCC extenstion). Not much else we can do, unless we want to leave the Land of Portability™ and start hitting some system specific calls.

So, that's what this version does—it eschews the use of the standard C library (except for exit(), errno and memset()—while I could replace this with my own version, C compilers can and will produce better optimized versions than I can write) and goes straight for the system calls.

This means I will have to write my own code to convert binary to hexidecimal, but I've written such code plenty of times before.

/*************************************************************************
*
* Copyright 2012 by Sean Conner.  All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*
* Comments, questions and criticisms can be sent to: sean@conman.org
*
*************************************************************************/

/* Style: C89, const correctness, assertive, system calls */

#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

#define LINESIZE	16

/********************************************************************/

extern const char *sys_errlist[];
extern int         sys_nerr;

static void	do_dump		(const int,const int);
static size_t	dump_line	(const int,unsigned char *,size_t,const unsigned long);
static void	hexout		(char *,unsigned long,size_t,const int);
static void	myperror	(const char *const);
static size_t	myread		(const int,char *,size_t);
static void	mywrite		(const int,const char *const,const size_t);

/********************************************************************/

int main(const int argc,const char *const argv[])
{
  if (argc == 1)
    do_dump(STDIN_FILENO,STDOUT_FILENO);
  else
  {
    int i;
    
    for (i = 1 ; i < argc ; i++)
    {
      int fhin;
      
      fhin = open(argv[i],O_RDONLY);
      if (fhin == -1)
      {
        myperror(argv[i]);
        continue;
      }
      
      mywrite(STDOUT_FILENO,"-----",5);
      mywrite(STDOUT_FILENO,argv[i],strlen(argv[i]));
      mywrite(STDOUT_FILENO,"-----\n",6);
      
      do_dump(fhin,STDOUT_FILENO);
      if (close(fhin) < 0)
        myperror(argv[i]);
    }
  }
  
  return 0;
}
      
/************************************************************************/     

static void do_dump(const int fhin,const int fhout)
{
  unsigned char buffer[4096];
  unsigned long off;
  size_t        bytes;
  
  assert(fhin  >= 0);
  assert(fhout >= 0);

  off = 0;
  
  while((bytes = myread(fhin,(char *)buffer,sizeof(buffer))) > 0)
  {
    unsigned char *p = buffer;
    
    for (p = buffer ; bytes > 0 ; )
    {
      size_t amount;
      
      amount = dump_line(fhout,p,bytes,off);
      p     += amount;
      bytes -= amount;
      off   += amount;
    }
  }
}

/********************************************************************/

static size_t dump_line(
	const int            fhout,
	unsigned char       *p,
	size_t               bytes,
	const unsigned long  off
)
{
  size_t count;
  char   addr [9];
  char   hex  [LINESIZE * 3];
  char   ascii[LINESIZE];
  char  *dh;
  char  *da;
  
  assert(fhout >= 0);
  assert(p     != NULL);
  assert(bytes >  0);
  
  memset(hex   ,' ',sizeof(hex));
  memset(ascii,' ',sizeof(hex));

  hexout(addr,off,8,':');
  if (bytes > LINESIZE)
    bytes = LINESIZE;
    
  p  += bytes;
  dh  = &hex[bytes * 3];
  da  = &ascii[bytes];
  
  assert(addr[8] == ':');
  assert(bytes <= LINESIZE);
  assert(dh == &hex  [bytes * 3]);
  assert(da == &ascii[bytes]);
  
  for (count = 0 ; (count < bytes) && (count < LINESIZE) ; count++)
  {
    p  --;
    da --;
    dh -= 3;
    
    if ((*p >= ' ') && (*p <= '~'))
      *da = *p;
    else
      *da = '.';
    
    hexout(dh,(unsigned long)*p,2,' ');
  }
  
  assert(dh == hex);
  assert(da == ascii);
  
  mywrite(fhout,addr,sizeof(addr));
  mywrite(fhout," ",1);
  mywrite(fhout,hex,sizeof(hex));
  mywrite(fhout,ascii,count);
  mywrite(fhout,"\n",1);

  return count;
}

/**********************************************************************/  

static void hexout(char *dest,unsigned long value,size_t size,const int padding)
{
  assert(dest != NULL);
  assert(size >  0);
  assert((padding >= ' ') && (padding <= '~'));
  
  dest[size] = padding;
  while(size--)
  {
    dest[size] = (char)((value & 0x0F) + '0');
    if (dest[size] > '9') dest[size] += 7;
    value >>= 4;
  }
}

/************************************************************************/

static void myperror(const char *const s)
{
  int err = errno;
  
  assert(s != NULL);
  
  mywrite(STDERR_FILENO,s,strlen(s));
  mywrite(STDERR_FILENO,": ",2);
  
  if (err > sys_nerr)
    mywrite(STDERR_FILENO,"(unknown)",9);
  else
    mywrite(STDERR_FILENO,sys_errlist[err],strlen(sys_errlist[err]));
  mywrite(STDERR_FILENO,"\n",1);
}

/************************************************************************/

static size_t myread(const int fh,char *buf,size_t size)
{
  size_t amount = 0;
  
  assert(fh   >= 0);
  assert(buf  != NULL);
  assert(size >  0);
  
  while(size > 0)
  {
    ssize_t bytes;
    
    bytes = read(fh,buf,size);
    if (bytes < 0)
    {
      myperror("read()");
      exit(EXIT_FAILURE);
    }
    if (bytes == 0)
      break;
    
    amount += bytes;
    size   -= bytes;
    buf    += bytes;
  }
  
  return amount;
}

/*********************************************************************/  
  
static void mywrite(const int fh,const char *const msg,const size_t size)
{
  assert(fh   >= 0);
  assert(msg  != NULL);
  assert(size >  0);
  
  if (write(fh,msg,size) < (ssize_t)size)
  {
    if (fh != STDERR_FILENO)
      myperror("output");
      
    exit(EXIT_FAILURE);
  }
}

/***********************************************************************/

The major trick here is that I generate the output for each line backwards! I do that because it's easier to generate the hexidecimal output that way. Generating the hexidecimal output “forwards” would mean I need to rotate the first four bits down into position (so with a 32-bit value, I would need to shift the bits down 28 positions), then generate the hex digit, then rotate the next four bits down 24 positions, but by then, I'm doing repeated rotates and discarding all the work I did previously for each digit. And if I only want to work with 8 bits, I have to have another special function do handle that, or complicate one function to handle multiple number of bits.

But by going backwards, I start with the last four bits, which are already in the “proper position” to generate a digit, then shift everthing down four bits, and keep repeating this until the specified number of hexidecimal digits are produced.

So, while the amount of code goes up, it is faster than the more portable version:

[spc]lucy:~/projects/99/src>time ./12 ~/bin/firefox/libxul.so >/dev/null

real    0m4.985s
user    0m4.969s
sys     0m0.015s
[spc]lucy:~/projects/99/src>time ./20 ~/bin/firefox/libxul.so >/dev/null

real    0m2.936s
user    0m1.511s
sys     0m1.425s

It's almost twice as fast, yet it spends a disturbingly large amount of time (compared to the portable version) in the kernel. It's because of all the calls to write() I do. That's a problem I'll attack in the next version.

Obligatory Picture

[The future's so bright, I gotta wear shades]

Obligatory Contact Info

Obligatory Feeds

Obligatory Links

Obligatory Miscellaneous

You have my permission to link freely to any entry here. Go ahead, I won't bite. I promise.

The dates are the permanent links to that day's entries (or entry, if there is only one entry). The titles are the permanent links to that entry only. The format for the links are simple: Start with the base link for this site: https://boston.conman.org/, then add the date you are interested in, say 2000/08/01, so that would make the final URL:

https://boston.conman.org/2000/08/01

You can also specify the entire month by leaving off the day portion. You can even select an arbitrary portion of time.

You may also note subtle shading of the links and that's intentional: the “closer” the link is (relative to the page) the “brighter” it appears. It's an experiment in using color shading to denote the distance a link is from here. If you don't notice it, don't worry; it's not all that important.

It is assumed that every brand name, slogan, corporate name, symbol, design element, et cetera mentioned in these pages is a protected and/or trademarked entity, the sole property of its owner(s), and acknowledgement of this status is implied.

Copyright © 1999-2024 by Sean Conner. All Rights Reserved.