Wednesday, February 01, 2012
99 ways to program a hex, Part 24: more lookup tables
So we went from a character encoding specific version to a character encoding agnostic version to today's version—another character encoding specific version (ASCII to be exact). But today's version also eliminates a branch point in the code, using a 256-element string to pick which character to display as part of the hexidecimal dump.
/************************************************************************* * * Copyright 2012 by Sean Conner. All Rights Reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Comments, questions and criticisms can be sent to: sean@conman.org * *************************************************************************/ /* Style: C89, const correctness, assertive, system calls, full buffering */ /* lookup tables */ #include <stdlib.h> #include <string.h> #include <errno.h> #include <assert.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> #define LINESIZE 16 /********************************************************************/ extern const char *sys_errlist[]; extern int sys_nerr; static void do_dump (const int,const int); static size_t dump_line (char **const,unsigned char *,size_t,const unsigned long); static void hexout (char *const,unsigned long,size_t,const int); static void myperror (const char *const); static size_t myread (const int,char *,size_t); static void mywrite (const int,const char *const,const size_t); /********************************************************************/ int main(const int argc,const char *const argv[]) { if (argc == 1) do_dump(STDIN_FILENO,STDOUT_FILENO); else { int i; for (i = 1 ; i < argc ; i++) { int fhin; fhin = open(argv[i],O_RDONLY); if (fhin == -1) { myperror(argv[i]); continue; } mywrite(STDOUT_FILENO,"-----",5); mywrite(STDOUT_FILENO,argv[i],strlen(argv[i])); mywrite(STDOUT_FILENO,"-----\n",6); do_dump(fhin,STDOUT_FILENO); if (close(fhin) < 0) myperror(argv[i]); } } return EXIT_SUCCESS; } /************************************************************************/ static void do_dump(const int fhin,const int fhout) { unsigned char buffer[4096]; char outbuffer[75 * 109]; char *pout; unsigned long off; size_t bytes; size_t count; assert(fhin >= 0); assert(fhout >= 0); memset(outbuffer,' ',sizeof(outbuffer)); off = 0; count = 0; pout = outbuffer; while((bytes = myread(fhin,(char *)buffer,sizeof(buffer))) > 0) { unsigned char *p = buffer; for (p = buffer ; bytes > 0 ; ) { size_t amount; amount = dump_line(&pout,p,bytes,off); p += amount; bytes -= amount; off += amount; count++; if (count == 109) { mywrite(fhout,outbuffer,(size_t)(pout - outbuffer)); memset(outbuffer,' ',sizeof(outbuffer)); count = 0; pout = outbuffer; } } } if ((size_t)(pout - outbuffer) > 0) mywrite(fhout,outbuffer,(size_t)(pout - outbuffer)); } /********************************************************************/ static size_t dump_line( char **const pline, unsigned char *p, size_t bytes, const unsigned long off ) { char *line; char *dh; char *da; size_t count; assert(pline != NULL); assert(*pline != NULL); assert(p != NULL); assert(bytes > 0); line = *pline; hexout(line,off,8,':'); if (bytes > LINESIZE) bytes = LINESIZE; p += bytes; dh = &line[10 + bytes * 3]; da = &line[58 + bytes]; for (count = 0 ; count < bytes ; count++) { p --; da --; dh -= 3; *da = "................................ !\"#$%&'()*+,-./0123456789:;<=>?" "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~." "................................................................" "........................................................" "........"[*p]; hexout(dh,(unsigned long)*p,2,' '); } line[58 + count] = '\n'; *pline = &line[59 + count]; return count; } /**********************************************************************/ static void hexout(char *const dest,unsigned long value,size_t size,const int padding) { assert(dest != NULL); assert(size > 0); assert((padding >= ' ') && (padding <= '~')); dest[size] = padding; while(size--) { dest[size] = "0123456789ABCDEF"[value & 0x0f]; value >>= 4; } } /************************************************************************/ static void myperror(const char *const s) { int err = errno; assert(s != NULL); mywrite(STDERR_FILENO,s,strlen(s)); mywrite(STDERR_FILENO,": ",2); if (err > sys_nerr) mywrite(STDERR_FILENO,"(unknown)",9); else mywrite(STDERR_FILENO,sys_errlist[err],strlen(sys_errlist[err])); mywrite(STDERR_FILENO,"\n",1); } /************************************************************************/ static size_t myread(const int fh,char *buf,size_t size) { size_t amount = 0; assert(fh >= 0); assert(buf != NULL); assert(size > 0); while(size > 0) { ssize_t bytes; bytes = read(fh,buf,size); if (bytes < 0) { myperror("read()"); exit(EXIT_FAILURE); } if (bytes == 0) break; amount += bytes; size -= bytes; buf += bytes; } return amount; } /*********************************************************************/ static void mywrite(const int fh,const char *const msg,const size_t size) { assert(fh >= 0); assert(msg != NULL); assert(size > 0); if (write(fh,msg,size) < (ssize_t)size) { if (fh != STDERR_FILENO) myperror("output"); exit(EXIT_FAILURE); } } /***********************************************************************/
And it is faster:
[spc]lucy:~/projects/99/src>time ./23 ~/bin/firefox/libxul.so >/dev/null real 0m0.258s user 0m0.247s sys 0m0.011s [spc]lucy:~/projects/99/src>time ./24 ~/bin/firefox/libxul.so >/dev/null real 0m0.186s user 0m0.178s sys 0m0.008s
About 1.3 times faster, but it is faster.
The conversion string is fixed, but that doesn't preclude a port to, say, an EBCIDIC system from using a different one, or the string being constructed at run time. The runtime generation would be more portable, but to me, that's wasted time spent generating a string that will always be the same (and frankly, if we're using this hack for speed, that's just wasted time).
Perhaps better might be several such strings, ASCII, EBCIDIC, Baudot, PETSCII and select via a command line option which one to use (defaulting to whatever character set is native for the platform the program is running on). It could be a useful thing.
But such a modification I'm leaving as an exercise for the reader.
Now, is this the fastest version possible? I'm not going to say yes this time. There might be something else that could be done to wring that last bit of performance out of this code, but at this point, I am definitely done with wringing out the speed.
I think.