Contents

Optimization, or how to waste time trying

Contents

I’ve been on some very long conference calls lately so I made a try of log parsing in common lisp:

   (defun split (delim line)
      "Return a list of subsequences separated by a one character delimiter, delim itself is not returned"
      (loop :for mark = 0 :then (1+ point)
            :for point = (position delim line :start mark)
            :collect (subseq line mark point)
            :while point))

   (defun splitcols (columns file &key (delim #Space) grep)
      "Iterate over the lines of a file, returning the specified columns.
    If :grep is supplied only lines matching that string will be returned."
      (with-open-file (s file)
        (loop :for line = (read-line s nil)
              :while line
              :when (or (eq grep nil) (search grep line))
              :collect (loop :for c :in columns
                             :collect (elt (split delim line) c)))))

It suits the simple case where one might use awk to pull out columns from a log but it’s quite a bit slower on SBCL 1.0.18 on my machine than just using awk. So as a challenge to myself I tried to optimize it which resulted in spending a few minutes salting it heavily with declarations to reach this:

   (defun split-optimized (delim line)
      "Return a list of subsequences separated by a one character delimiter, delim itself is not returned"
      (declare (optimize (speed 3) (debug 0) (space 0) (safety 0)))
      (declare (type simple-string line)
               (type character delim)
               (type seq-index point))
      (loop :for mark = (the fixnum 0) :then (the fixnum (1+ point))
            :for point = (position delim (the simple-string line) :start mark)
            :collect (subseq (the string line) mark point)
            :while point))

   (defun splitcols-optimized-1 (columns file &key (delim #Space) grep)
      "Iterate over the lines of a file, returning the specified columns.
    If :grep is supplied only lines matching that string will be returned."
      ; this runs in about half the time of splitcols
      (declare (optimize (speed 3) (debug 0) (space 0) (safety 0)))
      (declare (type simple-string grep)
               (type character delim))
      (declare (inline split-optimized))
      (with-open-file (s file)
        (loop :for line of-type simple-string = (read-line s nil)
              :while line
              :when (or (eq grep nil) (search grep line)) ; maybe inefficient search...
              :collect (loop :for c :in columns
                             :collect (elt (split-optimized delim line) c)))))

which is imperfect but quite a bit better in terms of runtime, as tested against a 25MB apache log:

   CL-USER> (sb-profile:report)
      seconds