Friday, February 03, 2012
99 ways to program a hex, part 26: C89, system calls and mmap()
I still stand by what I said in part 24:
Now, is this the fastest version possible? I'm not going to say yes this time. There might be something else that could be done to wring that last bit of performance out of this code, but at this point, I am definitely done with wringing out the speed.
That didn't prevent Dave Täht from sending in a
patch to part 24 that used mmap()
(a system call that does some
magic to make a file suddenly appear in memory) which did better on his
system, although it was a percentage gain, not an order of magnitude
gain.
/************************************************************************* * * Copyright 2012 by Sean Conner. All Rights Reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Comments, questions and criticisms can be sent to: sean@conman.org * *************************************************************************/ /* Style: C89, const correctness, assertive, system calls, full buffering */ /* lookup tables, mmap() */ #define _GNU_SOURCE #include <stdlib.h> #include <string.h> #include <errno.h> #include <assert.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/mman.h> #include <fcntl.h> #include <unistd.h> #define LINESIZE 16 /********************************************************************/ extern const char *sys_errlist[]; extern int sys_nerr; static void do_dump (const int,const int); static size_t do_dump_memory (unsigned char *,size_t,size_t,const int); static size_t dump_line (char **const,unsigned char *,size_t,const unsigned long); static void hexout (char *const,unsigned long,size_t,const int); static void myperror (const char *const); static size_t myread (const int,char *,size_t); static void mywrite (const int,const char *const,const size_t); /********************************************************************/ int main(const int argc,const char *const argv[]) { if (argc == 1) do_dump(STDIN_FILENO,STDOUT_FILENO); else { int i; for (i = 1 ; i < argc ; i++) { int fhin; fhin = open(argv[i],O_RDONLY); if (fhin == -1) { myperror(argv[i]); continue; } mywrite(STDOUT_FILENO,"-----",5); mywrite(STDOUT_FILENO,argv[i],strlen(argv[i])); mywrite(STDOUT_FILENO,"-----\n",6); do_dump(fhin,STDOUT_FILENO); if (close(fhin) < 0) myperror(argv[i]); } } return EXIT_SUCCESS; } /************************************************************************/ static void do_dump(const int fhin,const int fhout) { struct stat info; assert(fhin >= 0); assert(fhout >= 0); if (fstat(fhin,&info) < 0) myperror("fstat()"); if (!S_ISREG(info.st_mode)) { unsigned char buffer[4096]; size_t bytes; size_t off = 0; while((bytes = myread(fhin,(char *)buffer,sizeof(buffer))) > 0) off = do_dump_memory(buffer,bytes,off,fhout); } else { unsigned char *buffer; buffer = mmap(NULL,info.st_size,PROT_READ,MAP_SHARED,fhin,0); if (buffer == MAP_FAILED) myperror("mmap()"); if (madvise(buffer,info.st_size,MADV_SEQUENTIAL | MADV_WILLNEED) < 0) myperror("madvise()"); do_dump_memory(buffer,info.st_size,0,fhout); munmap(buffer,info.st_size); } } /********************************************************************/ static size_t do_dump_memory( unsigned char *p, size_t bytes, size_t off, const int fhout ) { char outbuffer[75 * 109]; char *pout; size_t count; assert(p != NULL); assert(fhout >= 0); memset(outbuffer,' ',sizeof(outbuffer)); count = 0; pout = outbuffer; while(bytes) { size_t amount; amount = dump_line(&pout,p,bytes,off); p += amount; bytes -= amount; off += amount; count++; if (count == 109) { mywrite(fhout,outbuffer,(size_t)(pout - outbuffer)); memset(outbuffer,' ',sizeof(outbuffer)); count = 0; pout = outbuffer; } } if ((size_t)(pout - outbuffer) > 0) mywrite(fhout,outbuffer,(size_t)(pout - outbuffer)); return off; } /******************************************************************/ static size_t dump_line( char **const pline, unsigned char *p, size_t bytes, const unsigned long off ) { char *line; char *dh; char *da; size_t count; assert(pline != NULL); assert(*pline != NULL); assert(p != NULL); assert(bytes > 0); line = *pline; hexout(line,off,8,':'); if (bytes > LINESIZE) bytes = LINESIZE; p += bytes; dh = &line[10 + bytes * 3]; da = &line[58 + bytes]; for (count = 0 ; count < bytes ; count++) { p --; da --; dh -= 3; *da = "................................ !\"#$%&'()*+,-./0123456789:;<=>?" "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~." "................................................................" "........................................................" "........"[*p]; hexout(dh,(unsigned long)*p,2,' '); } line[58 + count] = '\n'; *pline = &line[59 + count]; return count; } /**********************************************************************/ static void hexout(char *const dest,unsigned long value,size_t size,const int padding) { assert(dest != NULL); assert(size > 0); assert((padding >= ' ') && (padding <= '~')); dest[size] = padding; while(size--) { dest[size] = "0123456789ABCDEF"[value & 0x0f]; value >>= 4; } } /************************************************************************/ static void myperror(const char *const s) { int err = errno; assert(s != NULL); mywrite(STDERR_FILENO,s,strlen(s)); mywrite(STDERR_FILENO,": ",2); if (err > sys_nerr) mywrite(STDERR_FILENO,"(unknown)",9); else mywrite(STDERR_FILENO,sys_errlist[err],strlen(sys_errlist[err])); mywrite(STDERR_FILENO,"\n",1); } /************************************************************************/ static size_t myread(const int fh,char *buf,size_t size) { size_t amount = 0; assert(fh >= 0); assert(buf != NULL); assert(size > 0); while(size > 0) { ssize_t bytes; bytes = read(fh,buf,size); if (bytes < 0) { myperror("read()"); exit(EXIT_FAILURE); } if (bytes == 0) break; amount += bytes; size -= bytes; buf += bytes; } return amount; } /*********************************************************************/ static void mywrite(const int fh,const char *const msg,const size_t size) { assert(fh >= 0); assert(msg != NULL); assert(size > 0); if (write(fh,msg,size) < (ssize_t)size) { if (fh != STDERR_FILENO) myperror("output"); exit(EXIT_FAILURE); } } /***********************************************************************/
I tried it on my system, and saw no difference in performance whatsoever. But Dave was using a 64-bit system, and I was using a 32-bit system. Okay, there could be a difference there. I then tried it on a 64-bit system (The Corporation provided laptop, running a 64-bit version of Linux) and there was a difference, but well:
[spc]saltmine:~/source/99>time ./24 libxul.so >/dev/null real 0m0.043s user 0m0.030s sys 0m0.010s [spc]saltmine:~/source/99>time ./26 libxul.so >/dev/null real 0m0.054s user 0m0.040s sys 0m0.010s
The version with mmap()
is slower! It's more
noticeable with a large (759,012,536 bytes) file:
[spc]saltmine:~/source/99>time ./24 largedata >/dev/null real 0m1.682s user 0m1.500s sys 0m0.170s [spc]saltmine:~/source/99>time ./26 largedata >/dev/null real 0m1.809s user 0m1.680s sys 0m0.120s
Yes, time spent in the kernel goes down (understandable, since we no
longer have to copy data out of the file through the kernel) but the overall
time goes up. And at this point, we've reached the point of diminishing
returns, where the amount of return does not justify the amount of
effort. It could be that Dave's machine had more memory than my machine, or
a faster harddrive, or a later version of the kernel that handled
mmap()/madvise()
better. There's no real gains at this point
to be had.
Another interesting thing is to run the time command, but with more verbose output:
[spc]saltmine:~/source/99>time -v ./24 largedata >/dev/null Command being timed: "./24 largedata" User time (seconds): 1.47 System time (seconds): 0.22 Percent of CPU this job got: 99% Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.69 Average shared text size (kbytes): 0 Average unshared data size (kbytes): 0 Average stack size (kbytes): 0 Average total size (kbytes): 0 Maximum resident set size (kbytes): 1600 Average resident set size (kbytes): 0 Major (requiring I/O) page faults: 0 Minor (reclaiming a frame) page faults: 139 Voluntary context switches: 1 Involuntary context switches: 171 Swaps: 0 File system inputs: 0 File system outputs: 0 Socket messages sent: 0 Socket messages received: 0 Signals delivered: 0 Page size (bytes): 4096 Exit status: 0
Here we see the code from part 24 taking 139 page faults. Now, today's version:
[spc]saltmine:~/source/99>time -v ./26 largedata >/dev/null Command being timed: "./26 largedata" User time (seconds): 1.71 System time (seconds): 0.10 Percent of CPU this job got: 99% Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.82 Average shared text size (kbytes): 0 Average unshared data size (kbytes): 0 Average stack size (kbytes): 0 Average total size (kbytes): 0 Maximum resident set size (kbytes): 2965440 Average resident set size (kbytes): 0 Major (requiring I/O) page faults: 0 Minor (reclaiming a frame) page faults: 185449 Voluntary context switches: 1 Involuntary context switches: 182 Swaps: 0 File system inputs: 0 File system outputs: 0 Socket messages sent: 0 Socket messages received: 0 Signals delivered: 0 Page size (bytes): 4096 Exit status: 0
The number of page faults skyrockets to 185,449, three orders of
magnitude more than the previous version, and thus, that could account for
the time loss on my 64-bit system (quite possibly this does show a
sub-optimal implementation of mmap()
).
Your milage may vary, though.