Monday, January 30, 2012
99 ways to program a hex, Part 22: C89, const correctness, assertive, system calls, full buffering
So yesterday I presented a
non-portable version that was quite a bit faster than the portable version,
but I'm not quite done yet. That version just buffered a line at a
time—today's version buffers nearly 8k
worth of data (it's not exact, but it's close enough) between calls to
write()
.
/************************************************************************* * * Copyright 2012 by Sean Conner. All Rights Reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Comments, questions and criticisms can be sent to: sean@conman.org * *************************************************************************/ /* Style: C89, const correctness, assertive, system calls, full buffering */ #include <stdlib.h> #include <string.h> #include <errno.h> #include <assert.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> #define LINESIZE 16 /********************************************************************/ extern const char *sys_errlist[]; extern int sys_nerr; static void do_dump (const int,const int); static size_t dump_line (char **const,unsigned char *,size_t,const unsigned long); static void hexout (char *const,unsigned long,size_t,const int); static void myperror (const char *const); static size_t myread (const int,char *,size_t); static void mywrite (const int,const char *const,const size_t); /********************************************************************/ int main(const int argc,const char *const argv[]) { if (argc == 1) do_dump(STDIN_FILENO,STDOUT_FILENO); else { int i; for (i = 1 ; i < argc ; i++) { int fhin; fhin = open(argv[i],O_RDONLY); if (fhin == -1) { myperror(argv[i]); continue; } mywrite(STDOUT_FILENO,"-----",5); mywrite(STDOUT_FILENO,argv[i],strlen(argv[i])); mywrite(STDOUT_FILENO,"-----\n",6); do_dump(fhin,STDOUT_FILENO); if (close(fhin) < 0) myperror(argv[i]); } } return EXIT_SUCCESS; } /************************************************************************/ static void do_dump(const int fhin,const int fhout) { unsigned char buffer[4096]; char outbuffer[75 * 109]; char *pout; unsigned long off; size_t bytes; size_t count; assert(fhin >= 0); assert(fhout >= 0); memset(outbuffer,' ',sizeof(outbuffer)); off = 0; count = 0; pout = outbuffer; while((bytes = myread(fhin,(char *)buffer,sizeof(buffer))) > 0) { unsigned char *p = buffer; for (p = buffer ; bytes > 0 ; ) { size_t amount; amount = dump_line(&pout,p,bytes,off); p += amount; bytes -= amount; off += amount; count++; if (count == 109) { mywrite(fhout,outbuffer,(size_t)(pout - outbuffer)); memset(outbuffer,' ',sizeof(outbuffer)); count = 0; pout = outbuffer; } } } if ((size_t)(pout - outbuffer) > 0) mywrite(fhout,outbuffer,(size_t)(pout - outbuffer)); } /********************************************************************/ static size_t dump_line( char **const pline, unsigned char *p, size_t bytes, const unsigned long off ) { char *line; char *dh; char *da; size_t count; assert(pline != NULL); assert(*pline != NULL); assert(p != NULL); assert(bytes > 0); line = *pline; hexout(line,off,8,':'); if (bytes > LINESIZE) bytes = LINESIZE; p += bytes; dh = &line[10 + bytes * 3]; da = &line[58 + bytes]; for (count = 0 ; count < bytes ; count++) { p --; da --; dh -= 3; if ((*p >= ' ') && (*p <= '~')) *da = *p; else *da = '.'; hexout(dh,(unsigned long)*p,2,' '); } line[58 + count] = '\n'; *pline = &line[59 + count]; return count; } /**********************************************************************/ static void hexout(char *const dest,unsigned long value,size_t size,const int padding) { assert(dest != NULL); assert(size > 0); assert((padding >= ' ') && (padding <= '~')); dest[size] = padding; while(size--) { dest[size] = (char)((value & 0x0F) + '0'); if (dest[size] > '9') dest[size] += 7; value >>= 4; } } /************************************************************************/ static void myperror(const char *const s) { int err = errno; assert(s != NULL); mywrite(STDERR_FILENO,s,strlen(s)); mywrite(STDERR_FILENO,": ",2); if (err > sys_nerr) mywrite(STDERR_FILENO,"(unknown)",9); else mywrite(STDERR_FILENO,sys_errlist[err],strlen(sys_errlist[err])); mywrite(STDERR_FILENO,"\n",1); } /************************************************************************/ static size_t myread(const int fh,char *buf,size_t size) { size_t amount = 0; assert(fh >= 0); assert(buf != NULL); assert(size > 0); while(size > 0) { ssize_t bytes; bytes = read(fh,buf,size); if (bytes < 0) { myperror("read()"); exit(EXIT_FAILURE); } if (bytes == 0) break; amount += bytes; size -= bytes; buf += bytes; } return amount; } /*********************************************************************/ static void mywrite(const int fh,const char *const msg,const size_t size) { assert(fh >= 0); assert(msg != NULL); assert(size > 0); if (write(fh,msg,size) < (ssize_t)size) { if (fh != STDERR_FILENO) myperror("output"); exit(EXIT_FAILURE); } } /***********************************************************************/
And that makes all the difference. The portable vesrsion:
[spc]lucy:~/projects/99/src>time ./12 ~/bin/firefox/libxul.so >/dev/null real 0m4.985s user 0m4.969s sys 0m0.015s
Our first stab at a non-portable, but possibly faster version:
[spc]lucy:~/projects/99/src>time ./20 ~/bin/firefox/libxul.so >/dev/null real 0m2.936s user 0m1.511s sys 0m1.425s
The “it's quite a bit faster” version:
[spc]lucy:~/projects/99/src>time ./21 ~/bin/firefox/libxul.so >/dev/null real 0m0.957s user 0m0.645s sys 0m0.313s
And finally, the punchline—today's version:
[spc]lucy:~/projects/99/src>time ./22 ~/bin/firefox/libxul.so >/dev/null real 0m0.460s user 0m0.448s sys 0m0.012s
And yes, that's the real output—1/10 the time of the portable version with a similar amount of time in the kernel.
Frankly, I was a bit surprised at these results—not that the non-portable version was faster (that's almost a given) but the magnitude of the results. I didn't think the standard C library had that much overhead. I was expecting easily a percentage increase in speed, but even twice would have been unexpected, but ten times faster?
Wow.
Increasing the size of the buffer past what I have probably won't help all that much, and in fact, when I doubled the buffer size:
[spc]lucy:~/projects/99/src>time ./22 ~/bin/firefox/libxul.so >/dev/null real 0m0.592s user 0m0.582s sys 0m0.010s
The timing difference could be due to cache effects, maybe?
So I think we've maxed out the speed at which this program will run. As a test, I profiled the code to see if there was anything I migh have missed:
Each sample counts as 0.01 seconds. % cumulative self self total time seconds seconds calls ms/call ms/call name 100.21 0.41 0.41 1 410.86 410.86 do_dump 0.00 0.41 0.00 9197 0.00 0.00 mywrite
I checked the code GCC produced (all code was compiled
with -O3
, a very high level of optimization) and well, I'm not
sure I could have done much better, and probably would have done worse—GCC
inlined everything into do_dump()
(with the exception
of main()
and mywrite()
), something I would
not have done in assembly (and have any hope of code reuse for
another project). So I think we're done with making this code fast.
That's not to say I won't do an assembly version of this program, but it probably won't be for the x86 line.
- Part 21: C89, const correctness, assertive, system calls, per line buffering
- Part 23: C89, const correctness, assertive, system calls, full buffering, lookup table
Double facepalm
R, who runs the Ft. Lauderdale Office of The Corporation, stopped by and informed me that we now have a source license to The Protocol Stack From Hell™.
WE HAVE A SOURCE LICENSE TO THE PROTOCOL STACK FROM HELL™!
It's about time.
Now I can figure out why I needed that mutex around a few calls.
At this point—I don't know what's worse, not knowing what's in the code, or knowing what's in the code.
Okay, curiosity got the better of me, and we all know what happens to curious cats. I could only be so lucky.
Let's see … K&R style code, standard vowel impairment, no comments—that I was expecting.
C89 style declarations a nice plus—I guess they needed to upate the code at some point in the past, but global variables all over the place? For a library?
No wonder I needed those mutexes all over the place.
And more unbelivably, the first file I took a look at wouldn't compile at all because the braces were mis-aligned! Further investigation of the code revealed twelve (12) such files!
I don't even want to know how much was spent on this.
And no, this isn't some off version. No, this is the version we're using!
Sounds of a programmer dominated office
Yeah, this sounds about right for an office dominated by programmers (link via Reddit)