From b2c4211b10c3da552bb832eb56ede0cc0356d575 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Fri, 25 Nov 2016 05:02:08 -0500
Subject: [PATCH] MAINT: Cleanup pandas/src/parser

Remove dead code and reformat for
style using Google's C++ style guide.

Also adds Google's cpplint (fork) to
the style checking for Travis.
---
 ci/lint.sh                    |   12 +
 pandas/src/parser/io.c        |  118 +--
 pandas/src/parser/io.h        |   47 +-
 pandas/src/parser/tokenizer.c | 1831 ++++++++++++++-------------------
 pandas/src/parser/tokenizer.h |  166 ++-
 5 files changed, 921 insertions(+), 1253 deletions(-)

diff --git a/ci/lint.sh b/ci/lint.sh
index d6390a16b763e..7ab97bfc6d328 100755
--- a/ci/lint.sh
+++ b/ci/lint.sh
@@ -35,6 +35,18 @@ if [ "$LINT" ]; then
     done
     echo "Linting *.pxi.in DONE"
 
+    # readability/casting: Warnings about C casting instead of C++ casting
+    # runtime/int: Warnings about using C number types instead of C++ ones
+    # build/include_subdir: Warnings about prefacing included header files with directory
+    pip install cpplint
+
+    echo "Linting *.c and *.h"
+    cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive pandas/src/parser
+    if [ $? -ne "0" ]; then
+        RET=1
+    fi
+    echo "Linting *.c and *.h DONE"
+
     echo "Check for invalid testing"
     grep -r -E --include '*.py' --exclude nosetester.py --exclude testing.py '(numpy|np)\.testing' pandas
     if [ $? = "0" ]; then
diff --git a/pandas/src/parser/io.c b/pandas/src/parser/io.c
index 566de72804968..562d6033ce3eb 100644
--- a/pandas/src/parser/io.c
+++ b/pandas/src/parser/io.c
@@ -1,12 +1,20 @@
-#include "io.h"
+/*
+Copyright (c) 2016, PyData Development Team
+All rights reserved.
+
+Distributed under the terms of the BSD Simplified License.
+
+The full license is in the LICENSE file, distributed with this software.
+*/
 
- /*
-   On-disk FILE, uncompressed
-  */
+#include "io.h"
 
+/*
+  On-disk FILE, uncompressed
+*/
 
 void *new_file_source(char *fname, size_t buffer_size) {
-    file_source *fs = (file_source *) malloc(sizeof(file_source));
+    file_source *fs = (file_source *)malloc(sizeof(file_source));
     fs->fp = fopen(fname, "rb");
 
     if (fs->fp == NULL) {
@@ -18,7 +26,7 @@ void *new_file_source(char *fname, size_t buffer_size) {
     fs->initial_file_pos = ftell(fs->fp);
 
     // Only allocate this heap memory if we are not memory-mapping the file
-    fs->buffer = (char*) malloc((buffer_size + 1) * sizeof(char));
+    fs->buffer = (char *)malloc((buffer_size + 1) * sizeof(char));
 
     if (fs->buffer == NULL) {
         return NULL;
@@ -27,25 +35,11 @@ void *new_file_source(char *fname, size_t buffer_size) {
     memset(fs->buffer, 0, buffer_size + 1);
     fs->buffer[buffer_size] = '\0';
 
-    return (void *) fs;
+    return (void *)fs;
 }
 
-
-// XXX handle on systems without the capability
-
-
-/*
- *  void *new_file_buffer(FILE *f, int buffer_size)
- *
- *  Allocate a new file_buffer.
- *  Returns NULL if the memory allocation fails or if the call to mmap fails.
- *
- *  buffer_size is ignored.
- */
-
-
-void* new_rd_source(PyObject *obj) {
-    rd_source *rds = (rd_source *) malloc(sizeof(rd_source));
+void *new_rd_source(PyObject *obj) {
+    rd_source *rds = (rd_source *)malloc(sizeof(rd_source));
 
     /* hold on to this object */
     Py_INCREF(obj);
@@ -53,7 +47,7 @@ void* new_rd_source(PyObject *obj) {
     rds->buffer = NULL;
     rds->position = 0;
 
-    return (void*) rds;
+    return (void *)rds;
 }
 
 /*
@@ -63,9 +57,7 @@ void* new_rd_source(PyObject *obj) {
  */
 
 int del_file_source(void *fs) {
-    // fseek(FS(fs)->fp, FS(fs)->initial_file_pos, SEEK_SET);
-    if (fs == NULL)
-        return 0;
+    if (fs == NULL) return 0;
 
     /* allocated on the heap */
     free(FS(fs)->buffer);
@@ -89,13 +81,11 @@ int del_rd_source(void *rds) {
 
  */
 
-
-void* buffer_file_bytes(void *source, size_t nbytes,
-                        size_t *bytes_read, int *status) {
+void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read,
+                        int *status) {
     file_source *src = FS(source);
 
-    *bytes_read = fread((void*) src->buffer, sizeof(char), nbytes,
-                        src->fp);
+    *bytes_read = fread((void *)src->buffer, sizeof(char), nbytes, src->fp);
 
     if (*bytes_read == 0) {
         *status = REACHED_EOF;
@@ -103,13 +93,11 @@ void* buffer_file_bytes(void *source, size_t nbytes,
         *status = 0;
     }
 
-    return (void*) src->buffer;
-
+    return (void *)src->buffer;
 }
 
-
-void* buffer_rd_bytes(void *source, size_t nbytes,
-                      size_t *bytes_read, int *status) {
+void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
+                      int *status) {
     PyGILState_STATE state;
     PyObject *result, *func, *args, *tmp;
 
@@ -125,21 +113,18 @@ void* buffer_rd_bytes(void *source, size_t nbytes,
     args = Py_BuildValue("(i)", nbytes);
 
     func = PyObject_GetAttrString(src->obj, "read");
-    /* printf("%s\n", PyBytes_AsString(PyObject_Repr(func))); */
 
     /* TODO: does this release the GIL? */
     result = PyObject_CallObject(func, args);
     Py_XDECREF(args);
     Py_XDECREF(func);
 
-    /* PyObject_Print(PyObject_Type(result), stdout, 0); */
     if (result == NULL) {
         PyGILState_Release(state);
         *bytes_read = 0;
         *status = CALLING_READ_FAILED;
         return NULL;
-    }
-    else if (!PyBytes_Check(result)) {
+    } else if (!PyBytes_Check(result)) {
         tmp = PyUnicode_AsUTF8String(result);
         Py_XDECREF(result);
         result = tmp;
@@ -154,8 +139,7 @@ void* buffer_rd_bytes(void *source, size_t nbytes,
 
     /* hang on to the Python object */
     src->buffer = result;
-    retval = (void*) PyBytes_AsString(result);
-
+    retval = (void *)PyBytes_AsString(result);
 
     PyGILState_Release(state);
 
@@ -165,21 +149,18 @@ void* buffer_rd_bytes(void *source, size_t nbytes,
     return retval;
 }
 
-
 #ifdef HAVE_MMAP
 
-#include <sys/stat.h>
 #include <sys/mman.h>
+#include <sys/stat.h>
 
-void *new_mmap(char *fname)
-{
+void *new_mmap(char *fname) {
     struct stat buf;
     int fd;
     memory_map *mm;
-    /* off_t position; */
     off_t filesize;
 
-    mm = (memory_map *) malloc(sizeof(memory_map));
+    mm = (memory_map *)malloc(sizeof(memory_map));
     mm->fp = fopen(fname, "rb");
 
     fd = fileno(mm->fp);
@@ -187,20 +168,19 @@ void *new_mmap(char *fname)
         fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n", errno);
         return NULL;
     }
-    filesize = buf.st_size;  /* XXX This might be 32 bits. */
-
+    filesize = buf.st_size; /* XXX This might be 32 bits. */
 
     if (mm == NULL) {
         /* XXX Eventually remove this print statement. */
         fprintf(stderr, "new_file_buffer: malloc() failed.\n");
         return NULL;
     }
-    mm->size = (off_t) filesize;
+    mm->size = (off_t)filesize;
     mm->line_number = 0;
 
     mm->fileno = fd;
     mm->position = ftell(mm->fp);
-    mm->last_pos = (off_t) filesize;
+    mm->last_pos = (off_t)filesize;
 
     mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0);
     if (mm->memmap == NULL) {
@@ -210,30 +190,20 @@ void *new_mmap(char *fname)
         mm = NULL;
     }
 
-    return (void*) mm;
+    return (void *)mm;
 }
 
-
-int del_mmap(void *src)
-{
+int del_mmap(void *src) {
     munmap(MM(src)->memmap, MM(src)->size);
 
     fclose(MM(src)->fp);
-
-    /*
-     *  With a memory mapped file, there is no need to do
-     *  anything if restore == RESTORE_INITIAL.
-     */
-    /* if (restore == RESTORE_FINAL) { */
-    /*     fseek(FB(fb)->file, FB(fb)->current_pos, SEEK_SET); */
-    /* } */
     free(src);
 
     return 0;
 }
 
-void* buffer_mmap_bytes(void *source, size_t nbytes,
-                        size_t *bytes_read, int *status) {
+void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read,
+                        int *status) {
     void *retval;
     memory_map *src = MM(source);
 
@@ -264,19 +234,15 @@ void* buffer_mmap_bytes(void *source, size_t nbytes,
 
 /* kludgy */
 
-void *new_mmap(char *fname) {
-  return NULL;
-}
+void *new_mmap(char *fname) { return NULL; }
 
-int del_mmap(void *src) {
-  return 0;
-}
+int del_mmap(void *src) { return 0; }
 
 /* don't use this! */
 
-void* buffer_mmap_bytes(void *source, size_t nbytes,
-                        size_t *bytes_read, int *status) {
-  return NULL;
+void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read,
+                        int *status) {
+    return NULL;
 }
 
 #endif
diff --git a/pandas/src/parser/io.h b/pandas/src/parser/io.h
index 2ae72ff8a7fe0..5a0c2b2b5e4a4 100644
--- a/pandas/src/parser/io.h
+++ b/pandas/src/parser/io.h
@@ -1,14 +1,23 @@
+/*
+Copyright (c) 2016, PyData Development Team
+All rights reserved.
+
+Distributed under the terms of the BSD Simplified License.
+
+The full license is in the LICENSE file, distributed with this software.
+*/
+
+#ifndef PANDAS_SRC_PARSER_IO_H_
+#define PANDAS_SRC_PARSER_IO_H_
+
 #include "Python.h"
 #include "tokenizer.h"
 
-
 typedef struct _file_source {
     /* The file being read. */
     FILE *fp;
 
     char *buffer;
-    /* Size of the file, in bytes. */
-    /* off_t size; */
 
     /* file position when the file_buffer was created. */
     off_t initial_file_pos;
@@ -16,15 +25,9 @@ typedef struct _file_source {
     /* Offset in the file of the data currently in the buffer. */
     off_t buffer_file_pos;
 
-    /* Actual number of bytes in the current buffer. (Can be less than buffer_size.) */
+    /* Actual number of bytes in the current buffer. (Can be less than
+     * buffer_size.) */
     off_t last_pos;
-
-    /* Size (in bytes) of the buffer. */
-    // off_t buffer_size;
-
-    /* Pointer to the buffer. */
-    // char *buffer;
-
 } file_source;
 
 #define FS(source) ((file_source *)source)
@@ -34,7 +37,6 @@ typedef struct _file_source {
 #endif
 
 typedef struct _memory_map {
-
     FILE *fp;
 
     /* Size of the file, in bytes. */
@@ -49,22 +51,20 @@ typedef struct _memory_map {
     off_t position;
     off_t last_pos;
     char *memmap;
-
 } memory_map;
 
-#define MM(src) ((memory_map*) src)
+#define MM(src) ((memory_map *)src)
 
 void *new_mmap(char *fname);
 
 int del_mmap(void *src);
 
-void* buffer_mmap_bytes(void *source, size_t nbytes,
-                        size_t *bytes_read, int *status);
-
+void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read,
+                        int *status);
 
 typedef struct _rd_source {
-    PyObject* obj;
-    PyObject* buffer;
+    PyObject *obj;
+    PyObject *buffer;
     size_t position;
 } rd_source;
 
@@ -77,9 +77,10 @@ void *new_rd_source(PyObject *obj);
 int del_file_source(void *src);
 int del_rd_source(void *src);
 
-void* buffer_file_bytes(void *source, size_t nbytes,
-                        size_t *bytes_read, int *status);
+void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read,
+                        int *status);
 
-void* buffer_rd_bytes(void *source, size_t nbytes,
-                      size_t *bytes_read, int *status);
+void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
+                      int *status);
 
+#endif  // PANDAS_SRC_PARSER_IO_H_
diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c
index 450abcf6c325c..1ea62d66345bd 100644
--- a/pandas/src/parser/tokenizer.c
+++ b/pandas/src/parser/tokenizer.c
@@ -9,61 +9,33 @@ See LICENSE for the license
 
 */
 
- /*
-   Low-level ascii-file processing for pandas. Combines some elements from
-   Python's built-in csv module and Warren Weckesser's textreader project on
-   GitHub. See Python Software Foundation License and BSD licenses for these.
+/*
 
-  */
+Low-level ascii-file processing for pandas. Combines some elements from
+Python's built-in csv module and Warren Weckesser's textreader project on
+GitHub. See Python Software Foundation License and BSD licenses for these.
 
+*/
 
 #include "tokenizer.h"
 
 #include <ctype.h>
-#include <math.h>
 #include <float.h>
-
-
-//#define READ_ERROR_OUT_OF_MEMORY   1
-
-
-/*
-* restore:
-*  RESTORE_NOT     (0):
-*      Free memory, but leave the file position wherever it
-*      happend to be.
-*  RESTORE_INITIAL (1):
-*      Restore the file position to the location at which
-*      the file_buffer was created.
-*  RESTORE_FINAL   (2):
-*      Put the file position at the next byte after the
-*      data read from the file_buffer.
-*
-#define RESTORE_NOT     0
-#define RESTORE_INITIAL 1
-#define RESTORE_FINAL   2
-*/
+#include <math.h>
 
 static void *safe_realloc(void *buffer, size_t size) {
     void *result;
-    // OS X is weird
+    // OSX is weird.
     // http://stackoverflow.com/questions/9560609/
     // different-realloc-behaviour-in-linux-and-osx
 
     result = realloc(buffer, size);
-    TRACE(("safe_realloc: buffer = %p, size = %zu, result = %p\n", buffer, size, result))
+    TRACE(("safe_realloc: buffer = %p, size = %zu, result = %p\n", buffer, size,
+           result))
 
-/*    if (result != NULL) {
-        // errno gets set to 12 on my OS Xmachine in some cases even when the
-        // realloc succeeds. annoying
-        errno = 0;
-    } else {
-        return buffer;
-    }*/
     return result;
 }
 
-
 void coliter_setup(coliter_t *self, parser_t *parser, int i, int start) {
     // column i, starting at 0
     self->words = parser->words;
@@ -73,7 +45,7 @@ void coliter_setup(coliter_t *self, parser_t *parser, int i, int start) {
 
 coliter_t *coliter_new(parser_t *self, int i) {
     // column i, starting at 0
-    coliter_t *iter = (coliter_t*) malloc(sizeof(coliter_t));
+    coliter_t *iter = (coliter_t *)malloc(sizeof(coliter_t));
 
     if (NULL == iter) {
         return NULL;
@@ -83,36 +55,28 @@ coliter_t *coliter_new(parser_t *self, int i) {
     return iter;
 }
 
-
- /* int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, int *error); */
- /* uint64_t str_to_uint64(const char *p_item, uint64_t uint_max, int *error); */
-
-
-static  void free_if_not_null(void **ptr) {
+static void free_if_not_null(void **ptr) {
     TRACE(("free_if_not_null %p\n", *ptr))
     if (*ptr != NULL) {
         free(*ptr);
         *ptr = NULL;
     }
- }
-
-
-
- /*
+}
 
-   Parser / tokenizer
+/*
 
- */
+  Parser / tokenizer
 
+*/
 
-static void *grow_buffer(void *buffer, int length, int *capacity,
-                         int space, int elsize, int *error) {
+static void *grow_buffer(void *buffer, int length, int *capacity, int space,
+                         int elsize, int *error) {
     int cap = *capacity;
     void *newbuffer = buffer;
 
     // Can we fit potentially nbytes tokens (+ null terminators) in the stream?
-    while ( (length + space >= cap) && (newbuffer != NULL) ){
-        cap = cap? cap << 1 : 2;
+    while ((length + space >= cap) && (newbuffer != NULL)) {
+        cap = cap ? cap << 1 : 2;
         buffer = newbuffer;
         newbuffer = safe_realloc(newbuffer, elsize * cap);
     }
@@ -122,15 +86,14 @@ static void *grow_buffer(void *buffer, int length, int *capacity,
         // and return the last good realloc'd buffer so it can be freed
         *error = errno;
         newbuffer = buffer;
-        } else {
+    } else {
         // realloc worked, update *capacity and set *error to 0
         // sigh, multiple return values
         *capacity = cap;
         *error = 0;
     }
     return newbuffer;
- }
-
+}
 
 void parser_set_default_options(parser_t *self) {
     self->decimal = '.';
@@ -139,7 +102,7 @@ void parser_set_default_options(parser_t *self) {
     // For tokenization
     self->state = START_RECORD;
 
-    self->delimiter = ','; // XXX
+    self->delimiter = ',';  // XXX
     self->delim_whitespace = 0;
 
     self->doublequote = 0;
@@ -161,17 +124,13 @@ void parser_set_default_options(parser_t *self) {
     self->thousands = '\0';
 
     self->skipset = NULL;
-    self-> skip_first_N_rows = -1;
+    self->skip_first_N_rows = -1;
     self->skip_footer = 0;
 }
 
-int get_parser_memory_footprint(parser_t *self) {
-    return 0;
-}
+int get_parser_memory_footprint(parser_t *self) { return 0; }
 
-parser_t* parser_new() {
-    return (parser_t*) calloc(1, sizeof(parser_t));
-}
+parser_t *parser_new() { return (parser_t *)calloc(1, sizeof(parser_t)); }
 
 int parser_clear_data_buffers(parser_t *self) {
     free_if_not_null((void *)&self->stream);
@@ -183,14 +142,14 @@ int parser_clear_data_buffers(parser_t *self) {
 }
 
 int parser_cleanup(parser_t *self) {
-    int    status = 0;
+    int status = 0;
 
     // XXX where to put this
-    free_if_not_null((void *) &self->error_msg);
-    free_if_not_null((void *) &self->warn_msg);
+    free_if_not_null((void *)&self->error_msg);
+    free_if_not_null((void *)&self->warn_msg);
 
     if (self->skipset != NULL) {
-        kh_destroy_int64((kh_int64_t*) self->skipset);
+        kh_destroy_int64((kh_int64_t *)self->skipset);
         self->skipset = NULL;
     }
 
@@ -207,8 +166,6 @@ int parser_cleanup(parser_t *self) {
     return status;
 }
 
-
-
 int parser_init(parser_t *self) {
     int sz;
 
@@ -225,7 +182,7 @@ int parser_init(parser_t *self) {
     self->warn_msg = NULL;
 
     // token stream
-    self->stream = (char*) malloc(STREAM_INIT_SIZE * sizeof(char));
+    self->stream = (char *)malloc(STREAM_INIT_SIZE * sizeof(char));
     if (self->stream == NULL) {
         parser_cleanup(self);
         return PARSER_OUT_OF_MEMORY;
@@ -235,16 +192,16 @@ int parser_init(parser_t *self) {
 
     // word pointers and metadata
     sz = STREAM_INIT_SIZE / 10;
-    sz = sz? sz : 1;
-    self->words = (char**) malloc(sz * sizeof(char*));
-    self->word_starts = (int*) malloc(sz * sizeof(int));
+    sz = sz ? sz : 1;
+    self->words = (char **)malloc(sz * sizeof(char *));
+    self->word_starts = (int *)malloc(sz * sizeof(int));
     self->words_cap = sz;
     self->words_len = 0;
 
     // line pointers and metadata
-    self->line_start = (int*) malloc(sz * sizeof(int));
+    self->line_start = (int *)malloc(sz * sizeof(int));
 
-    self->line_fields = (int*) malloc(sz * sizeof(int));
+    self->line_fields = (int *)malloc(sz * sizeof(int));
 
     self->lines_cap = sz;
     self->lines = 0;
@@ -253,7 +210,6 @@ int parser_init(parser_t *self) {
     if (self->stream == NULL || self->words == NULL ||
         self->word_starts == NULL || self->line_start == NULL ||
         self->line_fields == NULL) {
-
         parser_cleanup(self);
 
         return PARSER_OUT_OF_MEMORY;
@@ -279,7 +235,6 @@ int parser_init(parser_t *self) {
     return 0;
 }
 
-
 void parser_free(parser_t *self) {
     // opposite of parser_init
     parser_cleanup(self);
@@ -292,20 +247,21 @@ static int make_stream_space(parser_t *self, size_t nbytes) {
 
     // Can we fit potentially nbytes tokens (+ null terminators) in the stream?
 
-    /* TRACE(("maybe growing buffers\n")); */
-
     /*
       TOKEN STREAM
     */
 
-    orig_ptr = (void *) self->stream;
-    TRACE(("\n\nmake_stream_space: nbytes = %zu.  grow_buffer(self->stream...)\n", nbytes))
-    self->stream = (char*) grow_buffer((void *) self->stream,
-                                        self->stream_len,
-                                        &self->stream_cap, nbytes * 2,
-                                        sizeof(char), &status);
-    TRACE(("make_stream_space: self->stream=%p, self->stream_len = %zu, self->stream_cap=%zu, status=%zu\n",
-           self->stream, self->stream_len, self->stream_cap, status))
+    orig_ptr = (void *)self->stream;
+    TRACE(
+        ("\n\nmake_stream_space: nbytes = %zu.  grow_buffer(self->stream...)\n",
+         nbytes))
+    self->stream = (char *)grow_buffer((void *)self->stream, self->stream_len,
+                                       &self->stream_cap, nbytes * 2,
+                                       sizeof(char), &status);
+    TRACE(
+        ("make_stream_space: self->stream=%p, self->stream_len = %zu, "
+         "self->stream_cap=%zu, status=%zu\n",
+         self->stream, self->stream_len, self->stream_cap, status))
 
     if (status != 0) {
         return PARSER_OUT_OF_MEMORY;
@@ -313,95 +269,86 @@ static int make_stream_space(parser_t *self, size_t nbytes) {
 
     // realloc sets errno when moving buffer?
     if (self->stream != orig_ptr) {
-        // uff
-        /* TRACE(("Moving word pointers\n")) */
-
         self->pword_start = self->stream + self->word_start;
 
-        for (i = 0; i < self->words_len; ++i)
-        {
+        for (i = 0; i < self->words_len; ++i) {
             self->words[i] = self->stream + self->word_starts[i];
         }
     }
 
-
     /*
       WORD VECTORS
     */
 
     cap = self->words_cap;
-    self->words = (char**) grow_buffer((void *) self->words,
-                                       self->words_len,
-                                       &self->words_cap, nbytes,
-                                       sizeof(char*), &status);
-    TRACE(("make_stream_space: grow_buffer(self->self->words, %zu, %zu, %zu, %d)\n",
-           self->words_len, self->words_cap, nbytes, status))
+    self->words =
+        (char **)grow_buffer((void *)self->words, self->words_len,
+                             &self->words_cap, nbytes, sizeof(char *), &status);
+    TRACE(
+        ("make_stream_space: grow_buffer(self->self->words, %zu, %zu, %zu, "
+         "%d)\n",
+         self->words_len, self->words_cap, nbytes, status))
     if (status != 0) {
         return PARSER_OUT_OF_MEMORY;
     }
 
-
     // realloc took place
     if (cap != self->words_cap) {
-        TRACE(("make_stream_space: cap != self->words_cap, nbytes = %d, self->words_cap=%d\n", nbytes, self->words_cap))
-        newptr = safe_realloc((void *) self->word_starts, sizeof(int) * self->words_cap);
+        TRACE(
+            ("make_stream_space: cap != self->words_cap, nbytes = %d, "
+             "self->words_cap=%d\n",
+             nbytes, self->words_cap))
+        newptr = safe_realloc((void *)self->word_starts,
+                              sizeof(int) * self->words_cap);
         if (newptr == NULL) {
             return PARSER_OUT_OF_MEMORY;
         } else {
-            self->word_starts = (int*) newptr;
+            self->word_starts = (int *)newptr;
         }
     }
 
-
     /*
       LINE VECTORS
     */
-    /*
-    printf("Line_start: ");
-
-    for (j = 0; j < self->lines + 1; ++j) {
-         printf("%d ", self->line_fields[j]);
-     }
-    printf("\n");
-
-    printf("lines_cap: %d\n", self->lines_cap);
-    */
     cap = self->lines_cap;
-    self->line_start = (int*) grow_buffer((void *) self->line_start,
-                                          self->lines + 1,
-                                          &self->lines_cap, nbytes,
-                                          sizeof(int), &status);
-    TRACE(("make_stream_space: grow_buffer(self->line_start, %zu, %zu, %zu, %d)\n",
-           self->lines + 1, self->lines_cap, nbytes, status))
+    self->line_start =
+        (int *)grow_buffer((void *)self->line_start, self->lines + 1,
+                           &self->lines_cap, nbytes, sizeof(int), &status);
+    TRACE((
+        "make_stream_space: grow_buffer(self->line_start, %zu, %zu, %zu, %d)\n",
+        self->lines + 1, self->lines_cap, nbytes, status))
     if (status != 0) {
         return PARSER_OUT_OF_MEMORY;
     }
 
     // realloc took place
     if (cap != self->lines_cap) {
-        TRACE(("make_stream_space: cap != self->lines_cap, nbytes = %d\n", nbytes))
-        newptr = safe_realloc((void *) self->line_fields, sizeof(int) * self->lines_cap);
+        TRACE(("make_stream_space: cap != self->lines_cap, nbytes = %d\n",
+               nbytes))
+        newptr = safe_realloc((void *)self->line_fields,
+                              sizeof(int) * self->lines_cap);
         if (newptr == NULL) {
             return PARSER_OUT_OF_MEMORY;
         } else {
-            self->line_fields = (int*) newptr;
+            self->line_fields = (int *)newptr;
         }
     }
 
-    /* TRACE(("finished growing buffers\n")); */
-
     return 0;
 }
 
-
 static int push_char(parser_t *self, char c) {
-    /* TRACE(("pushing %c \n", c)) */
-    TRACE(("push_char: self->stream[%zu] = %x, stream_cap=%zu\n", self->stream_len+1, c, self->stream_cap))
+    TRACE(("push_char: self->stream[%zu] = %x, stream_cap=%zu\n",
+           self->stream_len + 1, c, self->stream_cap))
     if (self->stream_len >= self->stream_cap) {
-        TRACE(("push_char: ERROR!!! self->stream_len(%d) >= self->stream_cap(%d)\n",
-               self->stream_len, self->stream_cap))
-        self->error_msg = (char*) malloc(64);
-        sprintf(self->error_msg, "Buffer overflow caught - possible malformed input file.\n");
+        TRACE(
+            ("push_char: ERROR!!! self->stream_len(%d) >= "
+             "self->stream_cap(%d)\n",
+             self->stream_len, self->stream_cap))
+        int bufsize = 100;
+        self->error_msg = (char *)malloc(bufsize);
+        snprintf(self->error_msg, bufsize,
+                 "Buffer overflow caught - possible malformed input file.\n");
         return PARSER_OUT_OF_MEMORY;
     }
     self->stream[self->stream_len++] = c;
@@ -410,11 +357,15 @@ static int push_char(parser_t *self, char c) {
 
 int P_INLINE end_field(parser_t *self) {
     // XXX cruft
-//    self->numeric_field = 0;
     if (self->words_len >= self->words_cap) {
-        TRACE(("end_field: ERROR!!! self->words_len(%zu) >= self->words_cap(%zu)\n", self->words_len, self->words_cap))
-        self->error_msg = (char*) malloc(64);
-        sprintf(self->error_msg, "Buffer overflow caught - possible malformed input file.\n");
+        TRACE(
+            ("end_field: ERROR!!! self->words_len(%zu) >= "
+             "self->words_cap(%zu)\n",
+             self->words_len, self->words_cap))
+        int bufsize = 100;
+        self->error_msg = (char *)malloc(bufsize);
+        snprintf(self->error_msg, bufsize,
+                 "Buffer overflow caught - possible malformed input file.\n");
         return PARSER_OUT_OF_MEMORY;
     }
 
@@ -426,8 +377,8 @@ int P_INLINE end_field(parser_t *self) {
 
     TRACE(("end_field: Char diff: %d\n", self->pword_start - self->words[0]));
 
-    TRACE(("end_field: Saw word %s at: %d. Total: %d\n",
-           self->pword_start, self->word_start, self->words_len + 1))
+    TRACE(("end_field: Saw word %s at: %d. Total: %d\n", self->pword_start,
+           self->word_start, self->words_len + 1))
 
     self->word_starts[self->words_len] = self->word_start;
     self->words_len++;
@@ -442,29 +393,29 @@ int P_INLINE end_field(parser_t *self) {
     return 0;
 }
 
-
 static void append_warning(parser_t *self, const char *msg) {
     int ex_length;
     int length = strlen(msg);
     void *newptr;
 
     if (self->warn_msg == NULL) {
-        self->warn_msg = (char*) malloc(length + 1);
-        strcpy(self->warn_msg, msg);
+        self->warn_msg = (char *)malloc(length + 1);
+        strncpy(self->warn_msg, msg, strlen(msg) + 1);
     } else {
         ex_length = strlen(self->warn_msg);
         newptr = safe_realloc(self->warn_msg, ex_length + length + 1);
         if (newptr != NULL) {
-            self->warn_msg = (char*) newptr;
-            strcpy(self->warn_msg + ex_length, msg);
+            self->warn_msg = (char *)newptr;
+            strncpy(self->warn_msg + ex_length, msg, strlen(msg) + 1);
         }
     }
 }
 
 static int end_line(parser_t *self) {
+    char *msg;
     int fields;
     int ex_fields = self->expected_fields;
-    char *msg;
+    int bufsize = 100;  // for error or warning messages
 
     fields = self->line_fields[self->lines];
 
@@ -478,11 +429,10 @@ static int end_line(parser_t *self) {
         }
     }
 
-    if (self->state == START_FIELD_IN_SKIP_LINE || \
-        self->state == IN_FIELD_IN_SKIP_LINE || \
-        self->state == IN_QUOTED_FIELD_IN_SKIP_LINE || \
-        self->state == QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE
-    ) {
+    if (self->state == START_FIELD_IN_SKIP_LINE ||
+        self->state == IN_FIELD_IN_SKIP_LINE ||
+        self->state == IN_QUOTED_FIELD_IN_SKIP_LINE ||
+        self->state == QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE) {
         TRACE(("end_line: Skipping row %d\n", self->file_lines));
         // increment file line count
         self->file_lines++;
@@ -495,9 +445,8 @@ static int end_line(parser_t *self) {
         return 0;
     }
 
-    if (!(self->lines <= self->header_end + 1)
-        && (self->expected_fields < 0 && fields > ex_fields)
-        && !(self->usecols)) {
+    if (!(self->lines <= self->header_end + 1) &&
+        (self->expected_fields < 0 && fields > ex_fields) && !(self->usecols)) {
         // increment file line count
         self->file_lines++;
 
@@ -509,8 +458,9 @@ static int end_line(parser_t *self) {
 
         // file_lines is now the actual file line number (starting at 1)
         if (self->error_bad_lines) {
-            self->error_msg = (char*) malloc(100);
-            sprintf(self->error_msg, "Expected %d fields in line %d, saw %d\n",
+            self->error_msg = (char *)malloc(bufsize);
+            snprintf(self->error_msg, bufsize,
+                    "Expected %d fields in line %d, saw %d\n",
                     ex_fields, self->file_lines, fields);
 
             TRACE(("Error at line %d, %d fields\n", self->file_lines, fields));
@@ -520,9 +470,10 @@ static int end_line(parser_t *self) {
             // simply skip bad lines
             if (self->warn_bad_lines) {
                 // pass up error message
-                msg = (char*) malloc(100);
-                sprintf(msg, "Skipping line %d: expected %d fields, saw %d\n",
-                        self->file_lines, ex_fields, fields);
+                msg = (char *)malloc(bufsize);
+                snprintf(msg, bufsize,
+                        "Skipping line %d: expected %d fields, saw %d\n",
+                         self->file_lines, ex_fields, fields);
                 append_warning(self, msg);
                 free(msg);
             }
@@ -530,14 +481,13 @@ static int end_line(parser_t *self) {
     } else {
         // missing trailing delimiters
         if ((self->lines >= self->header_end + 1) && fields < ex_fields) {
-
             // might overrun the buffer when closing fields
             if (make_stream_space(self, ex_fields - fields) < 0) {
                 self->error_msg = "out of memory";
                 return -1;
             }
 
-            while (fields < ex_fields){
+            while (fields < ex_fields) {
                 end_field(self);
                 fields++;
             }
@@ -549,15 +499,21 @@ static int end_line(parser_t *self) {
 
         // good line, set new start point
         if (self->lines >= self->lines_cap) {
-            TRACE(("end_line: ERROR!!! self->lines(%zu) >= self->lines_cap(%zu)\n", self->lines, self->lines_cap))  \
-            self->error_msg = (char*) malloc(100);      \
-            sprintf(self->error_msg, "Buffer overflow caught - possible malformed input file.\n"); \
-            return PARSER_OUT_OF_MEMORY;                \
+            TRACE((
+                "end_line: ERROR!!! self->lines(%zu) >= self->lines_cap(%zu)\n",
+                self->lines, self->lines_cap))
+            int bufsize = 100;
+            self->error_msg = (char *)malloc(bufsize);
+            snprintf(self->error_msg, bufsize,
+                     "Buffer overflow caught - "
+                     "possible malformed input file.\n");
+            return PARSER_OUT_OF_MEMORY;
         }
-        self->line_start[self->lines] = (self->line_start[self->lines - 1] +
-                                         fields);
+        self->line_start[self->lines] =
+            (self->line_start[self->lines - 1] + fields);
 
-        TRACE(("end_line: new line start: %d\n", self->line_start[self->lines]));
+        TRACE(
+            ("end_line: new line start: %d\n", self->line_start[self->lines]));
 
         // new line start with 0 fields
         self->line_fields[self->lines] = 0;
@@ -574,10 +530,10 @@ int parser_add_skiprow(parser_t *self, int64_t row) {
     int ret = 0;
 
     if (self->skipset == NULL) {
-        self->skipset = (void*) kh_init_int64();
+        self->skipset = (void *)kh_init_int64();
     }
 
-    set = (kh_int64_t*) self->skipset;
+    set = (kh_int64_t *)self->skipset;
 
     k = kh_put_int64(set, row, &ret);
     set->keys[k] = row;
@@ -601,18 +557,21 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
     status = 0;
     self->datapos = 0;
     self->data = self->cb_io(self->source, nbytes, &bytes_read, &status);
-    TRACE(("parser_buffer_bytes self->cb_io: nbytes=%zu, datalen: %d, status=%d\n",
-           nbytes, bytes_read, status));
+    TRACE((
+        "parser_buffer_bytes self->cb_io: nbytes=%zu, datalen: %d, status=%d\n",
+        nbytes, bytes_read, status));
     self->datalen = bytes_read;
 
     if (status != REACHED_EOF && self->data == NULL) {
-        self->error_msg = (char*) malloc(200);
+        int bufsize = 200;
+        self->error_msg = (char *)malloc(bufsize);
 
         if (status == CALLING_READ_FAILED) {
-            sprintf(self->error_msg, ("Calling read(nbytes) on source failed. "
-                                      "Try engine='python'."));
+            snprintf(self->error_msg, bufsize,
+                     "Calling read(nbytes) on source failed. "
+                     "Try engine='python'.");
         } else {
-            sprintf(self->error_msg, "Unknown error in IO callback");
+            snprintf(self->error_msg, bufsize, "Unknown error in IO callback");
         }
         return -1;
     }
@@ -622,93 +581,96 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
     return status;
 }
 
-
 /*
 
   Tokenization macros and state machine code
 
 */
 
-//    printf("pushing %c\n", c);
-
-#define PUSH_CHAR(c)                                \
-    TRACE(("PUSH_CHAR: Pushing %c, slen= %d, stream_cap=%zu, stream_len=%zu\n", c, slen, self->stream_cap, self->stream_len)) \
-    if (slen >= maxstreamsize) {                    \
-        TRACE(("PUSH_CHAR: ERROR!!! slen(%d) >= maxstreamsize(%d)\n", slen, maxstreamsize))            \
-        self->error_msg = (char*) malloc(100);      \
-        sprintf(self->error_msg, "Buffer overflow caught - possible malformed input file.\n"); \
-        return PARSER_OUT_OF_MEMORY;                \
-    }                                               \
-    *stream++ = c;                                  \
+#define PUSH_CHAR(c)                                                          \
+    TRACE(                                                                    \
+        ("PUSH_CHAR: Pushing %c, slen= %d, stream_cap=%zu, stream_len=%zu\n", \
+         c, slen, self->stream_cap, self->stream_len))                        \
+    if (slen >= maxstreamsize) {                                              \
+        TRACE(("PUSH_CHAR: ERROR!!! slen(%d) >= maxstreamsize(%d)\n", slen,   \
+               maxstreamsize))                                                \
+        int bufsize = 100;                                                    \
+        self->error_msg = (char *)malloc(bufsize);                            \
+        snprintf(self->error_msg, bufsize,                                    \
+                 "Buffer overflow caught - possible malformed input file.\n");\
+        return PARSER_OUT_OF_MEMORY;                                          \
+    }                                                                         \
+    *stream++ = c;                                                            \
     slen++;
 
 // This is a little bit of a hack but works for now
 
-#define END_FIELD()                            \
-    self->stream_len = slen;                   \
-    if (end_field(self) < 0) {                 \
-        goto parsingerror;                     \
-    }                                          \
-    stream = self->stream + self->stream_len;  \
+#define END_FIELD()                           \
+    self->stream_len = slen;                  \
+    if (end_field(self) < 0) {                \
+        goto parsingerror;                    \
+    }                                         \
+    stream = self->stream + self->stream_len; \
     slen = self->stream_len;
 
-#define END_LINE_STATE(STATE)                                           \
-    self->stream_len = slen;                                            \
-    if (end_line(self) < 0) {                                           \
-        goto parsingerror;                                              \
-    }                                                                   \
-    stream = self->stream + self->stream_len;                           \
-    slen = self->stream_len;                                            \
-    self->state = STATE;                                                \
-    if (line_limit > 0 && self->lines == start_lines + line_limit) {    \
-        goto linelimit;                                                 \
-                                                                        \
-    }
-
-#define END_LINE_AND_FIELD_STATE(STATE)                                 \
-    self->stream_len = slen;                                            \
-    if (end_line(self) < 0) {                                           \
-        goto parsingerror;                                              \
-    }                                                                   \
-    if (end_field(self) < 0) {                                          \
-        goto parsingerror;                                              \
-    }                                                                   \
-    stream = self->stream + self->stream_len;                           \
-    slen = self->stream_len;                                            \
-    self->state = STATE;                                                \
-    if (line_limit > 0 && self->lines == start_lines + line_limit) {    \
-        goto linelimit;                                                 \
-                                                                        \
+#define END_LINE_STATE(STATE)                                        \
+    self->stream_len = slen;                                         \
+    if (end_line(self) < 0) {                                        \
+        goto parsingerror;                                           \
+    }                                                                \
+    stream = self->stream + self->stream_len;                        \
+    slen = self->stream_len;                                         \
+    self->state = STATE;                                             \
+    if (line_limit > 0 && self->lines == start_lines + line_limit) { \
+        goto linelimit;                                              \
+    }
+
+#define END_LINE_AND_FIELD_STATE(STATE)                              \
+    self->stream_len = slen;                                         \
+    if (end_line(self) < 0) {                                        \
+        goto parsingerror;                                           \
+    }                                                                \
+    if (end_field(self) < 0) {                                       \
+        goto parsingerror;                                           \
+    }                                                                \
+    stream = self->stream + self->stream_len;                        \
+    slen = self->stream_len;                                         \
+    self->state = STATE;                                             \
+    if (line_limit > 0 && self->lines == start_lines + line_limit) { \
+        goto linelimit;                                              \
     }
 
 #define END_LINE() END_LINE_STATE(START_RECORD)
 
 #define IS_WHITESPACE(c) ((c == ' ' || c == '\t'))
 
-#define IS_TERMINATOR(c) ((self->lineterminator == '\0' && c == '\n') || \
-                          (self->lineterminator != '\0' &&               \
-                           c == self->lineterminator))
+#define IS_TERMINATOR(c)                            \
+    ((self->lineterminator == '\0' && c == '\n') || \
+     (self->lineterminator != '\0' && c == self->lineterminator))
 
 #define IS_QUOTE(c) ((c == self->quotechar && self->quoting != QUOTE_NONE))
 
 // don't parse '\r' with a custom line terminator
 #define IS_CARRIAGE(c) ((self->lineterminator == '\0' && c == '\r'))
 
-#define IS_COMMENT_CHAR(c) ((self->commentchar != '\0' && c == self->commentchar))
+#define IS_COMMENT_CHAR(c) \
+    ((self->commentchar != '\0' && c == self->commentchar))
 
 #define IS_ESCAPE_CHAR(c) ((self->escapechar != '\0' && c == self->escapechar))
 
-#define IS_SKIPPABLE_SPACE(c) ((!self->delim_whitespace && c == ' ' && \
-                                self->skipinitialspace))
+#define IS_SKIPPABLE_SPACE(c) \
+    ((!self->delim_whitespace && c == ' ' && self->skipinitialspace))
 
 // applied when in a field
-#define IS_DELIMITER(c) ((!self->delim_whitespace && c == self->delimiter) || \
-                         (self->delim_whitespace && IS_WHITESPACE(c)))
+#define IS_DELIMITER(c)                                   \
+    ((!self->delim_whitespace && c == self->delimiter) || \
+     (self->delim_whitespace && IS_WHITESPACE(c)))
 
 #define _TOKEN_CLEANUP()                                                \
     self->stream_len = slen;                                            \
     self->datapos = i;                                                  \
-    TRACE(("_TOKEN_CLEANUP: datapos: %d, datalen: %d\n", self->datapos, self->datalen));
+    TRACE(("_TOKEN_CLEANUP: datapos: %d, datalen: %d\n", self->datapos, \
+           self->datalen));
 
 #define CHECK_FOR_BOM()                                                   \
     if (*buf == '\xef' && *(buf + 1) == '\xbb' && *(buf + 2) == '\xbf') { \
@@ -718,16 +680,14 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
 
 int skip_this_line(parser_t *self, int64_t rownum) {
     if (self->skipset != NULL) {
-        return ( kh_get_int64((kh_int64_t*) self->skipset, self->file_lines) !=
-                 ((kh_int64_t*)self->skipset)->n_buckets );
-    }
-    else {
-        return ( rownum <= self->skip_first_N_rows );
+        return (kh_get_int64((kh_int64_t *)self->skipset, self->file_lines) !=
+                ((kh_int64_t *)self->skipset)->n_buckets);
+    } else {
+        return (rownum <= self->skip_first_N_rows);
     }
 }
 
-int tokenize_bytes(parser_t *self, size_t line_limit, int start_lines)
-{
+int tokenize_bytes(parser_t *self, size_t line_limit, int start_lines) {
     int i, slen;
     long maxstreamsize;
     char c;
@@ -749,368 +709,364 @@ int tokenize_bytes(parser_t *self, size_t line_limit, int start_lines)
         CHECK_FOR_BOM();
     }
 
-    for (i = self->datapos; i < self->datalen; ++i)
-    {
+    for (i = self->datapos; i < self->datalen; ++i) {
         // next character in file
         c = *buf++;
 
-        TRACE(("tokenize_bytes - Iter: %d Char: 0x%x Line %d field_count %d, state %d\n",
-               i, c, self->file_lines + 1, self->line_fields[self->lines],
-               self->state));
-
-        switch(self->state) {
-
-        case START_FIELD_IN_SKIP_LINE:
-            if (IS_TERMINATOR(c)) {
-                END_LINE();
-            } else if (IS_CARRIAGE(c)) {
-                self->file_lines++;
-                self->state = EAT_CRNL_NOP;
-            } else if (IS_QUOTE(c)) {
-                self->state = IN_QUOTED_FIELD_IN_SKIP_LINE;
-            } else if (IS_DELIMITER(c)) {
-                // Do nothing, we're starting a new field again.
-            } else {
-                self->state = IN_FIELD_IN_SKIP_LINE;
-            }
-            break;
+        TRACE(
+            ("tokenize_bytes - Iter: %d Char: 0x%x Line %d field_count %d, "
+             "state %d\n",
+             i, c, self->file_lines + 1, self->line_fields[self->lines],
+             self->state));
 
-        case IN_FIELD_IN_SKIP_LINE:
-            if (IS_TERMINATOR(c)) {
-                END_LINE();
-            } else if (IS_CARRIAGE(c)) {
-                self->file_lines++;
-                self->state = EAT_CRNL_NOP;
-            } else if (IS_DELIMITER(c)) {
-                self->state = START_FIELD_IN_SKIP_LINE;
-            }
-            break;
-
-        case IN_QUOTED_FIELD_IN_SKIP_LINE:
-            if (IS_QUOTE(c)) {
-                if (self->doublequote) {
-                    self->state = QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE;
+        switch (self->state) {
+            case START_FIELD_IN_SKIP_LINE:
+                if (IS_TERMINATOR(c)) {
+                    END_LINE();
+                } else if (IS_CARRIAGE(c)) {
+                    self->file_lines++;
+                    self->state = EAT_CRNL_NOP;
+                } else if (IS_QUOTE(c)) {
+                    self->state = IN_QUOTED_FIELD_IN_SKIP_LINE;
+                } else if (IS_DELIMITER(c)) {
+                    // Do nothing, we're starting a new field again.
                 } else {
                     self->state = IN_FIELD_IN_SKIP_LINE;
                 }
-            }
-            break;
-
-        case QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE:
-            if (IS_QUOTE(c)) {
-                self->state = IN_QUOTED_FIELD_IN_SKIP_LINE;
-            } else if (IS_TERMINATOR(c)) {
-                END_LINE();
-            } else if (IS_CARRIAGE(c)) {
-                self->file_lines++;
-                self->state = EAT_CRNL_NOP;
-            } else if (IS_DELIMITER(c)) {
-                self->state = START_FIELD_IN_SKIP_LINE;
-            } else {
-                self->state = IN_FIELD_IN_SKIP_LINE;
-            }
-            break;
-
-        case WHITESPACE_LINE:
-            if (IS_TERMINATOR(c)) {
-                self->file_lines++;
-                self->state = START_RECORD;
-                break;
-            } else if (IS_CARRIAGE(c)) {
-                self->file_lines++;
-                self->state = EAT_CRNL_NOP;
                 break;
-            } else if (!self->delim_whitespace) {
-                if (IS_WHITESPACE(c) && c != self->delimiter) {
-                    ;
-                } else { // backtrack
-                    // use i + 1 because buf has been incremented but not i
-                    do {
-                        --buf;
-                        --i;
-                    } while (i + 1 > self->datapos && !IS_TERMINATOR(*buf));
 
-                    // reached a newline rather than the beginning
-                    if (IS_TERMINATOR(*buf)) {
-                        ++buf; // move pointer to first char after newline
-                        ++i;
-                    }
-                    self->state = START_FIELD;
+            case IN_FIELD_IN_SKIP_LINE:
+                if (IS_TERMINATOR(c)) {
+                    END_LINE();
+                } else if (IS_CARRIAGE(c)) {
+                    self->file_lines++;
+                    self->state = EAT_CRNL_NOP;
+                } else if (IS_DELIMITER(c)) {
+                    self->state = START_FIELD_IN_SKIP_LINE;
                 }
                 break;
-            }
-            // fall through
 
-        case EAT_WHITESPACE:
-            if (IS_TERMINATOR(c)) {
-                END_LINE();
-                self->state = START_RECORD;
-                break;
-            } else if (IS_CARRIAGE(c)) {
-                self->state = EAT_CRNL;
-                break;
-            } else if (!IS_WHITESPACE(c)) {
-                self->state = START_FIELD;
-                // fall through to subsequent state
-            } else {
-                // if whitespace char, keep slurping
+            case IN_QUOTED_FIELD_IN_SKIP_LINE:
+                if (IS_QUOTE(c)) {
+                    if (self->doublequote) {
+                        self->state = QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE;
+                    } else {
+                        self->state = IN_FIELD_IN_SKIP_LINE;
+                    }
+                }
                 break;
-            }
 
-        case START_RECORD:
-            // start of record
-            if (skip_this_line(self, self->file_lines)) {
+            case QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE:
                 if (IS_QUOTE(c)) {
                     self->state = IN_QUOTED_FIELD_IN_SKIP_LINE;
+                } else if (IS_TERMINATOR(c)) {
+                    END_LINE();
+                } else if (IS_CARRIAGE(c)) {
+                    self->file_lines++;
+                    self->state = EAT_CRNL_NOP;
+                } else if (IS_DELIMITER(c)) {
+                    self->state = START_FIELD_IN_SKIP_LINE;
                 } else {
                     self->state = IN_FIELD_IN_SKIP_LINE;
-
-                    if (IS_TERMINATOR(c)) {
-                        END_LINE();
-                    }
                 }
                 break;
-            } else if (IS_TERMINATOR(c)) {
-                // \n\r possible?
-                if (self->skip_empty_lines) {
+
+            case WHITESPACE_LINE:
+                if (IS_TERMINATOR(c)) {
                     self->file_lines++;
-                } else {
-                    END_LINE();
-                }
-                break;
-            } else if (IS_CARRIAGE(c)) {
-                if (self->skip_empty_lines) {
+                    self->state = START_RECORD;
+                    break;
+                } else if (IS_CARRIAGE(c)) {
                     self->file_lines++;
                     self->state = EAT_CRNL_NOP;
-                } else {
+                    break;
+                } else if (!self->delim_whitespace) {
+                    if (IS_WHITESPACE(c) && c != self->delimiter) {
+                    } else {  // backtrack
+                        // use i + 1 because buf has been incremented but not i
+                        do {
+                            --buf;
+                            --i;
+                        } while (i + 1 > self->datapos && !IS_TERMINATOR(*buf));
+
+                        // reached a newline rather than the beginning
+                        if (IS_TERMINATOR(*buf)) {
+                            ++buf;  // move pointer to first char after newline
+                            ++i;
+                        }
+                        self->state = START_FIELD;
+                    }
+                    break;
+                }
+            // fall through
+
+            case EAT_WHITESPACE:
+                if (IS_TERMINATOR(c)) {
+                    END_LINE();
+                    self->state = START_RECORD;
+                    break;
+                } else if (IS_CARRIAGE(c)) {
                     self->state = EAT_CRNL;
+                    break;
+                } else if (!IS_WHITESPACE(c)) {
+                    self->state = START_FIELD;
+                    // fall through to subsequent state
+                } else {
+                    // if whitespace char, keep slurping
+                    break;
                 }
-                break;
-            } else if (IS_COMMENT_CHAR(c)) {
-                self->state = EAT_LINE_COMMENT;
-                break;
-            } else if (IS_WHITESPACE(c)) {
-                if (self->delim_whitespace) {
+
+            case START_RECORD:
+                // start of record
+                if (skip_this_line(self, self->file_lines)) {
+                    if (IS_QUOTE(c)) {
+                        self->state = IN_QUOTED_FIELD_IN_SKIP_LINE;
+                    } else {
+                        self->state = IN_FIELD_IN_SKIP_LINE;
+
+                        if (IS_TERMINATOR(c)) {
+                            END_LINE();
+                        }
+                    }
+                    break;
+                } else if (IS_TERMINATOR(c)) {
+                    // \n\r possible?
                     if (self->skip_empty_lines) {
-                        self->state = WHITESPACE_LINE;
+                        self->file_lines++;
                     } else {
-                        self->state = EAT_WHITESPACE;
+                        END_LINE();
                     }
                     break;
-                } else if (c != self->delimiter && self->skip_empty_lines) {
-                    self->state = WHITESPACE_LINE;
+                } else if (IS_CARRIAGE(c)) {
+                    if (self->skip_empty_lines) {
+                        self->file_lines++;
+                        self->state = EAT_CRNL_NOP;
+                    } else {
+                        self->state = EAT_CRNL;
+                    }
+                    break;
+                } else if (IS_COMMENT_CHAR(c)) {
+                    self->state = EAT_LINE_COMMENT;
                     break;
+                } else if (IS_WHITESPACE(c)) {
+                    if (self->delim_whitespace) {
+                        if (self->skip_empty_lines) {
+                            self->state = WHITESPACE_LINE;
+                        } else {
+                            self->state = EAT_WHITESPACE;
+                        }
+                        break;
+                    } else if (c != self->delimiter && self->skip_empty_lines) {
+                        self->state = WHITESPACE_LINE;
+                        break;
+                    }
+                    // fall through
                 }
-                // fall through
-            }
 
-            // normal character - fall through
-            // to handle as START_FIELD
-            self->state = START_FIELD;
+                // normal character - fall through
+                // to handle as START_FIELD
+                self->state = START_FIELD;
 
-        case START_FIELD:
-            // expecting field
-            if (IS_TERMINATOR(c)) {
-                END_FIELD();
-                END_LINE();
-            } else if (IS_CARRIAGE(c)) {
-                END_FIELD();
-                self->state = EAT_CRNL;
-            } else if (IS_QUOTE(c)) {
-                // start quoted field
-                self->state = IN_QUOTED_FIELD;
-            } else if (IS_ESCAPE_CHAR(c)) {
-                // possible escaped character
-                self->state = ESCAPED_CHAR;
-            } else if (IS_SKIPPABLE_SPACE(c)) {
-                // ignore space at start of field
-                ;
-            } else if (IS_DELIMITER(c)) {
-                if (self->delim_whitespace) {
-                    self->state = EAT_WHITESPACE;
-                } else {
-                    // save empty field
+            case START_FIELD:
+                // expecting field
+                if (IS_TERMINATOR(c)) {
+                    END_FIELD();
+                    END_LINE();
+                } else if (IS_CARRIAGE(c)) {
+                    END_FIELD();
+                    self->state = EAT_CRNL;
+                } else if (IS_QUOTE(c)) {
+                    // start quoted field
+                    self->state = IN_QUOTED_FIELD;
+                } else if (IS_ESCAPE_CHAR(c)) {
+                    // possible escaped character
+                    self->state = ESCAPED_CHAR;
+                } else if (IS_SKIPPABLE_SPACE(c)) {
+                    // ignore space at start of field
+                } else if (IS_DELIMITER(c)) {
+                    if (self->delim_whitespace) {
+                        self->state = EAT_WHITESPACE;
+                    } else {
+                        // save empty field
+                        END_FIELD();
+                    }
+                } else if (IS_COMMENT_CHAR(c)) {
                     END_FIELD();
+                    self->state = EAT_COMMENT;
+                } else {
+                    // begin new unquoted field
+                    PUSH_CHAR(c);
+                    self->state = IN_FIELD;
                 }
-            } else if (IS_COMMENT_CHAR(c)) {
-                END_FIELD();
-                self->state = EAT_COMMENT;
-            } else {
-                // begin new unquoted field
-                // if (self->delim_whitespace && \
-                //    self->quoting == QUOTE_NONNUMERIC) {
-                //    self->numeric_field = 1;
-                // }
+                break;
 
+            case ESCAPED_CHAR:
                 PUSH_CHAR(c);
                 self->state = IN_FIELD;
-            }
-            break;
+                break;
 
-        case ESCAPED_CHAR:
-            PUSH_CHAR(c);
-            self->state = IN_FIELD;
-            break;
+            case EAT_LINE_COMMENT:
+                if (IS_TERMINATOR(c)) {
+                    self->file_lines++;
+                    self->state = START_RECORD;
+                } else if (IS_CARRIAGE(c)) {
+                    self->file_lines++;
+                    self->state = EAT_CRNL_NOP;
+                }
+                break;
 
-        case EAT_LINE_COMMENT:
-            if (IS_TERMINATOR(c)) {
-                self->file_lines++;
-                self->state = START_RECORD;
-            } else if (IS_CARRIAGE(c)) {
-                self->file_lines++;
-                self->state = EAT_CRNL_NOP;
-            }
-            break;
+            case IN_FIELD:
+                // in unquoted field
+                if (IS_TERMINATOR(c)) {
+                    END_FIELD();
+                    END_LINE();
+                } else if (IS_CARRIAGE(c)) {
+                    END_FIELD();
+                    self->state = EAT_CRNL;
+                } else if (IS_ESCAPE_CHAR(c)) {
+                    // possible escaped character
+                    self->state = ESCAPED_CHAR;
+                } else if (IS_DELIMITER(c)) {
+                    // end of field - end of line not reached yet
+                    END_FIELD();
 
-        case IN_FIELD:
-            // in unquoted field
-            if (IS_TERMINATOR(c)) {
-                END_FIELD();
-                END_LINE();
-            } else if (IS_CARRIAGE(c)) {
-                END_FIELD();
-                self->state = EAT_CRNL;
-            } else if (IS_ESCAPE_CHAR(c)) {
-                // possible escaped character
-                self->state = ESCAPED_CHAR;
-            } else if (IS_DELIMITER(c)) {
-                // end of field - end of line not reached yet
-                END_FIELD();
-
-                if (self->delim_whitespace) {
-                    self->state = EAT_WHITESPACE;
+                    if (self->delim_whitespace) {
+                        self->state = EAT_WHITESPACE;
+                    } else {
+                        self->state = START_FIELD;
+                    }
+                } else if (IS_COMMENT_CHAR(c)) {
+                    END_FIELD();
+                    self->state = EAT_COMMENT;
                 } else {
-                    self->state = START_FIELD;
+                    // normal character - save in field
+                    PUSH_CHAR(c);
                 }
-            } else if (IS_COMMENT_CHAR(c)) {
-                END_FIELD();
-                self->state = EAT_COMMENT;
-            } else {
-                // normal character - save in field
-                PUSH_CHAR(c);
-            }
-            break;
+                break;
 
-        case IN_QUOTED_FIELD:
-            // in quoted field
-            if (IS_ESCAPE_CHAR(c)) {
-                // possible escape character
-                self->state = ESCAPE_IN_QUOTED_FIELD;
-            } else if (IS_QUOTE(c)) {
-                if (self->doublequote) {
-                    // double quote - " represented by ""
-                    self->state = QUOTE_IN_QUOTED_FIELD;
+            case IN_QUOTED_FIELD:
+                // in quoted field
+                if (IS_ESCAPE_CHAR(c)) {
+                    // possible escape character
+                    self->state = ESCAPE_IN_QUOTED_FIELD;
+                } else if (IS_QUOTE(c)) {
+                    if (self->doublequote) {
+                        // double quote - " represented by ""
+                        self->state = QUOTE_IN_QUOTED_FIELD;
+                    } else {
+                        // end of quote part of field
+                        self->state = IN_FIELD;
+                    }
                 } else {
-                    // end of quote part of field
-                    self->state = IN_FIELD;
+                    // normal character - save in field
+                    PUSH_CHAR(c);
                 }
-            } else {
-                // normal character - save in field
-                PUSH_CHAR(c);
-            }
-            break;
-
-        case ESCAPE_IN_QUOTED_FIELD:
-            PUSH_CHAR(c);
-            self->state = IN_QUOTED_FIELD;
-            break;
-
-        case QUOTE_IN_QUOTED_FIELD:
-            // double quote - seen a quote in an quoted field
-            if (IS_QUOTE(c)) {
-                // save "" as "
+                break;
 
+            case ESCAPE_IN_QUOTED_FIELD:
                 PUSH_CHAR(c);
                 self->state = IN_QUOTED_FIELD;
-            } else if (IS_DELIMITER(c)) {
-                // end of field - end of line not reached yet
-                END_FIELD();
-
-                if (self->delim_whitespace) {
-                    self->state = EAT_WHITESPACE;
-                } else {
-                    self->state = START_FIELD;
-                }
-            } else if (IS_TERMINATOR(c)) {
-                END_FIELD();
-                END_LINE();
-            } else if (IS_CARRIAGE(c)) {
-                END_FIELD();
-                self->state = EAT_CRNL;
-            } else if (!self->strict) {
-                PUSH_CHAR(c);
-                self->state = IN_FIELD;
-            } else {
-                self->error_msg = (char*) malloc(50);
-                sprintf(self->error_msg,
-                        "delimiter expected after "
-                        "quote in quote");
-                goto parsingerror;
-            }
-            break;
+                break;
 
-        case EAT_COMMENT:
-            if (IS_TERMINATOR(c)) {
-                END_LINE();
-            } else if (IS_CARRIAGE(c)) {
-                self->state = EAT_CRNL;
-            }
-            break;
+            case QUOTE_IN_QUOTED_FIELD:
+                // double quote - seen a quote in an quoted field
+                if (IS_QUOTE(c)) {
+                    // save "" as "
 
-        // only occurs with non-custom line terminator,
-        // which is why we directly check for '\n'
-        case EAT_CRNL:
-            if (c == '\n') {
-                END_LINE();
-            } else if (IS_DELIMITER(c)){
+                    PUSH_CHAR(c);
+                    self->state = IN_QUOTED_FIELD;
+                } else if (IS_DELIMITER(c)) {
+                    // end of field - end of line not reached yet
+                    END_FIELD();
 
-                if (self->delim_whitespace) {
-                    END_LINE_STATE(EAT_WHITESPACE);
+                    if (self->delim_whitespace) {
+                        self->state = EAT_WHITESPACE;
+                    } else {
+                        self->state = START_FIELD;
+                    }
+                } else if (IS_TERMINATOR(c)) {
+                    END_FIELD();
+                    END_LINE();
+                } else if (IS_CARRIAGE(c)) {
+                    END_FIELD();
+                    self->state = EAT_CRNL;
+                } else if (!self->strict) {
+                    PUSH_CHAR(c);
+                    self->state = IN_FIELD;
                 } else {
-                    // Handle \r-delimited files
-                    END_LINE_AND_FIELD_STATE(START_FIELD);
+                    int bufsize = 100;
+                    self->error_msg = (char *)malloc(bufsize);
+                    snprintf(self->error_msg, bufsize,
+                            "delimiter expected after quote in quote");
+                    goto parsingerror;
                 }
-            } else {
-                if (self->delim_whitespace) {
-                    /* XXX
-                    * first character of a new record--need to back up and reread
-                    * to handle properly...
-                    */
-                    i--; buf--; // back up one character (HACK!)
-                    END_LINE_STATE(START_RECORD);
-                } else {
-                    // \r line terminator
-                    // UGH. we don't actually want
-                    // to consume the token. fix this later
-                    self->stream_len = slen;
-                    if (end_line(self) < 0) {
-                        goto parsingerror;
-                    }
+                break;
 
-                    stream = self->stream + self->stream_len;
-                    slen = self->stream_len;
-                    self->state = START_RECORD;
+            case EAT_COMMENT:
+                if (IS_TERMINATOR(c)) {
+                    END_LINE();
+                } else if (IS_CARRIAGE(c)) {
+                    self->state = EAT_CRNL;
+                }
+                break;
+
+            // only occurs with non-custom line terminator,
+            // which is why we directly check for '\n'
+            case EAT_CRNL:
+                if (c == '\n') {
+                    END_LINE();
+                } else if (IS_DELIMITER(c)) {
+                    if (self->delim_whitespace) {
+                        END_LINE_STATE(EAT_WHITESPACE);
+                    } else {
+                        // Handle \r-delimited files
+                        END_LINE_AND_FIELD_STATE(START_FIELD);
+                    }
+                } else {
+                    if (self->delim_whitespace) {
+                        /* XXX
+                        * first character of a new record--need to back up and
+                        * reread
+                        * to handle properly...
+                        */
+                        i--;
+                        buf--;  // back up one character (HACK!)
+                        END_LINE_STATE(START_RECORD);
+                    } else {
+                        // \r line terminator
+                        // UGH. we don't actually want
+                        // to consume the token. fix this later
+                        self->stream_len = slen;
+                        if (end_line(self) < 0) {
+                            goto parsingerror;
+                        }
+
+                        stream = self->stream + self->stream_len;
+                        slen = self->stream_len;
+                        self->state = START_RECORD;
 
-                    --i; buf--; // let's try this character again (HACK!)
-                    if (line_limit > 0 && self->lines == start_lines + line_limit) {
-                        goto linelimit;
+                        --i;
+                        buf--;  // let's try this character again (HACK!)
+                        if (line_limit > 0 &&
+                            self->lines == start_lines + line_limit) {
+                            goto linelimit;
+                        }
                     }
                 }
-            }
-            break;
+                break;
 
-        // only occurs with non-custom line terminator,
-        // which is why we directly check for '\n'
-        case EAT_CRNL_NOP: // inside an ignored comment line
-            self->state = START_RECORD;
-            // \r line terminator -- parse this character again
-            if (c != '\n' && !IS_DELIMITER(c)) {
-                --i;
-                --buf;
-            }
-            break;
-        default:
-            break;
+            // only occurs with non-custom line terminator,
+            // which is why we directly check for '\n'
+            case EAT_CRNL_NOP:  // inside an ignored comment line
+                self->state = START_RECORD;
+                // \r line terminator -- parse this character again
+                if (c != '\n' && !IS_DELIMITER(c)) {
+                    --i;
+                    --buf;
+                }
+                break;
+            default:
+                break;
         }
     }
 
@@ -1134,39 +1090,41 @@ int tokenize_bytes(parser_t *self, size_t line_limit, int start_lines)
 }
 
 static int parser_handle_eof(parser_t *self) {
-    TRACE(("handling eof, datalen: %d, pstate: %d\n", self->datalen, self->state))
+    int bufsize = 100;
 
-    if (self->datalen != 0)
-        return -1;
+    TRACE(
+        ("handling eof, datalen: %d, pstate: %d\n", self->datalen, self->state))
 
-    switch (self->state) {
-    case START_RECORD:
-    case WHITESPACE_LINE:
-    case EAT_CRNL_NOP:
-    case EAT_LINE_COMMENT:
-        return 0;
+    if (self->datalen != 0) return -1;
 
-    case ESCAPE_IN_QUOTED_FIELD:
-    case IN_QUOTED_FIELD:
-        self->error_msg = (char*)malloc(100);
-        sprintf(self->error_msg, "EOF inside string starting at line %d",
-                self->file_lines);
-        return -1;
+    switch (self->state) {
+        case START_RECORD:
+        case WHITESPACE_LINE:
+        case EAT_CRNL_NOP:
+        case EAT_LINE_COMMENT:
+            return 0;
 
-    case ESCAPED_CHAR:
-        self->error_msg = (char*)malloc(100);
-        sprintf(self->error_msg, "EOF following escape character");
-        return -1;
+        case ESCAPE_IN_QUOTED_FIELD:
+        case IN_QUOTED_FIELD:
+            self->error_msg = (char *)malloc(bufsize);
+            snprintf(self->error_msg, bufsize,
+                    "EOF inside string starting at line %d", self->file_lines);
+            return -1;
 
-    case IN_FIELD:
-    case START_FIELD:
-    case QUOTE_IN_QUOTED_FIELD:
-        if (end_field(self) < 0)
+        case ESCAPED_CHAR:
+            self->error_msg = (char *)malloc(bufsize);
+            snprintf(self->error_msg, bufsize,
+                     "EOF following escape character");
             return -1;
-        break;
 
-    default:
-        break;
+        case IN_FIELD:
+        case START_FIELD:
+        case QUOTE_IN_QUOTED_FIELD:
+            if (end_field(self) < 0) return -1;
+            break;
+
+        default:
+            break;
     }
 
     if (end_line(self) < 0)
@@ -1183,19 +1141,19 @@ int parser_consume_rows(parser_t *self, size_t nrows) {
     }
 
     /* do nothing */
-    if (nrows == 0)
-        return 0;
+    if (nrows == 0) return 0;
 
     /* cannot guarantee that nrows + 1 has been observed */
     word_deletions = self->line_start[nrows - 1] + self->line_fields[nrows - 1];
     char_count = (self->word_starts[word_deletions - 1] +
                   strlen(self->words[word_deletions - 1]) + 1);
 
-    TRACE(("parser_consume_rows: Deleting %d words, %d chars\n", word_deletions, char_count));
+    TRACE(("parser_consume_rows: Deleting %d words, %d chars\n", word_deletions,
+           char_count));
 
     /* move stream, only if something to move */
     if (char_count < self->stream_len) {
-        memmove((void*) self->stream, (void*) (self->stream + char_count),
+        memmove((void *)self->stream, (void *)(self->stream + char_count),
                 self->stream_len - char_count);
     }
     /* buffer counts */
@@ -1213,26 +1171,14 @@ int parser_consume_rows(parser_t *self, size_t nrows) {
     /* move current word pointer to stream */
     self->pword_start -= char_count;
     self->word_start -= char_count;
-    /*
-    printf("Line_start: ");
-    for (i = 0; i < self->lines + 1; ++i) {
-         printf("%d ", self->line_fields[i]);
-     }
-    printf("\n");
-    */
+
     /* move line metadata */
-    for (i = 0; i < self->lines - nrows + 1; ++i)
-    {
+    for (i = 0; i < self->lines - nrows + 1; ++i) {
         offset = i + nrows;
         self->line_start[i] = self->line_start[offset] - word_deletions;
-
-        /* TRACE(("First word in line %d is now %s\n", i, */
-        /*        self->words[self->line_start[i]])); */
-
         self->line_fields[i] = self->line_fields[offset];
     }
     self->lines -= nrows;
-    /* self->line_fields[self->lines] = 0; */
 
     return 0;
 }
@@ -1256,47 +1202,50 @@ int parser_trim_buffers(parser_t *self) {
     new_cap = _next_pow2(self->words_len) + 1;
     if (new_cap < self->words_cap) {
         TRACE(("parser_trim_buffers: new_cap < self->words_cap\n"));
-        newptr = safe_realloc((void*) self->words, new_cap * sizeof(char*));
+        newptr = safe_realloc((void *)self->words, new_cap * sizeof(char *));
         if (newptr == NULL) {
             return PARSER_OUT_OF_MEMORY;
         } else {
-            self->words = (char**) newptr;
+            self->words = (char **)newptr;
         }
-        newptr = safe_realloc((void*) self->word_starts, new_cap * sizeof(int));
+        newptr = safe_realloc((void *)self->word_starts, new_cap * sizeof(int));
         if (newptr == NULL) {
             return PARSER_OUT_OF_MEMORY;
         } else {
-            self->word_starts = (int*) newptr;
+            self->word_starts = (int *)newptr;
             self->words_cap = new_cap;
         }
     }
 
     /* trim stream */
     new_cap = _next_pow2(self->stream_len) + 1;
-    TRACE(("parser_trim_buffers: new_cap = %zu, stream_cap = %zu, lines_cap = %zu\n",
-           new_cap, self->stream_cap, self->lines_cap));
+    TRACE(
+        ("parser_trim_buffers: new_cap = %zu, stream_cap = %zu, lines_cap = "
+         "%zu\n",
+         new_cap, self->stream_cap, self->lines_cap));
     if (new_cap < self->stream_cap) {
-        TRACE(("parser_trim_buffers: new_cap < self->stream_cap, calling safe_realloc\n"));
-        newptr = safe_realloc((void*) self->stream, new_cap);
+        TRACE(
+            ("parser_trim_buffers: new_cap < self->stream_cap, calling "
+             "safe_realloc\n"));
+        newptr = safe_realloc((void *)self->stream, new_cap);
         if (newptr == NULL) {
             return PARSER_OUT_OF_MEMORY;
         } else {
-            // Update the pointers in the self->words array (char **) if `safe_realloc`
-            //  moved the `self->stream` buffer. This block mirrors a similar block in
+            // Update the pointers in the self->words array (char **) if
+            // `safe_realloc`
+            //  moved the `self->stream` buffer. This block mirrors a similar
+            //  block in
             //  `make_stream_space`.
             if (self->stream != newptr) {
-                /* TRACE(("Moving word pointers\n")) */
-                self->pword_start = (char*) newptr + self->word_start;
+                self->pword_start = (char *)newptr + self->word_start;
 
-                for (i = 0; i < self->words_len; ++i)
-                {
-                    self->words[i] = (char*) newptr + self->word_starts[i];
+                for (i = 0; i < self->words_len; ++i) {
+                    self->words[i] = (char *)newptr + self->word_starts[i];
                 }
             }
 
             self->stream = newptr;
             self->stream_cap = new_cap;
-
         }
     }
 
@@ -1304,17 +1253,17 @@ int parser_trim_buffers(parser_t *self) {
     new_cap = _next_pow2(self->lines) + 1;
     if (new_cap < self->lines_cap) {
         TRACE(("parser_trim_buffers: new_cap < self->lines_cap\n"));
-        newptr = safe_realloc((void*) self->line_start, new_cap * sizeof(int));
+        newptr = safe_realloc((void *)self->line_start, new_cap * sizeof(int));
         if (newptr == NULL) {
             return PARSER_OUT_OF_MEMORY;
         } else {
-            self->line_start = (int*) newptr;
+            self->line_start = (int *)newptr;
         }
-        newptr = safe_realloc((void*) self->line_fields, new_cap * sizeof(int));
+        newptr = safe_realloc((void *)self->line_fields, new_cap * sizeof(int));
         if (newptr == NULL) {
             return PARSER_OUT_OF_MEMORY;
         } else {
-            self->line_fields = (int*) newptr;
+            self->line_fields = (int *)newptr;
             self->lines_cap = new_cap;
         }
     }
@@ -1326,12 +1275,10 @@ void debug_print_parser(parser_t *self) {
     int j, line;
     char *token;
 
-    for (line = 0; line < self->lines; ++line)
-    {
+    for (line = 0; line < self->lines; ++line) {
         printf("(Parsed) Line %d: ", line);
 
-        for (j = 0; j < self->line_fields[j]; ++j)
-        {
+        for (j = 0; j < self->line_fields[j]; ++j) {
             token = self->words[j + self->line_start[line]];
             printf("%s ", token);
         }
@@ -1339,13 +1286,6 @@ void debug_print_parser(parser_t *self) {
     }
 }
 
-/*int clear_parsed_lines(parser_t *self, size_t nlines) {
-    // TODO. move data up in stream, shift relevant word pointers
-
-    return 0;
-}*/
-
-
 /*
   nrows : number of rows to tokenize (or until reach EOF)
   all : tokenize all the data vs. certain number of rows
@@ -1359,12 +1299,12 @@ int _tokenize_helper(parser_t *self, size_t nrows, int all) {
         return 0;
     }
 
-    TRACE(("_tokenize_helper: Asked to tokenize %d rows, datapos=%d, datalen=%d\n", \
-           (int) nrows, self->datapos, self->datalen));
+    TRACE((
+        "_tokenize_helper: Asked to tokenize %d rows, datapos=%d, datalen=%d\n",
+        (int)nrows, self->datapos, self->datalen));
 
     while (1) {
-        if (!all && self->lines - start_lines >= nrows)
-            break;
+        if (!all && self->lines - start_lines >= nrows) break;
 
         if (self->datapos == self->datalen) {
             status = parser_buffer_bytes(self, self->chunksize);
@@ -1379,15 +1319,19 @@ int _tokenize_helper(parser_t *self, size_t nrows, int all) {
             }
         }
 
-        TRACE(("_tokenize_helper: Trying to process %d bytes, datalen=%d, datapos= %d\n",
-               self->datalen - self->datapos, self->datalen, self->datapos));
+        TRACE(
+            ("_tokenize_helper: Trying to process %d bytes, datalen=%d, "
+             "datapos= %d\n",
+             self->datalen - self->datapos, self->datalen, self->datapos));
 
         status = tokenize_bytes(self, nrows, start_lines);
 
         if (status < 0) {
             // XXX
-            TRACE(("_tokenize_helper: Status %d returned from tokenize_bytes, breaking\n",
-                   status));
+            TRACE(
+                ("_tokenize_helper: Status %d returned from tokenize_bytes, "
+                 "breaking\n",
+                 status));
             status = -1;
             break;
         }
@@ -1406,86 +1350,11 @@ int tokenize_all_rows(parser_t *self) {
     return status;
 }
 
-/* SEL - does not look like this routine is used anywhere
-void test_count_lines(char *fname) {
-    clock_t start = clock();
-
-    char *buffer, *tmp;
-    size_t bytes, lines = 0;
-    int i;
-    FILE *fp = fopen(fname, "rb");
-
-    buffer = (char*) malloc(CHUNKSIZE * sizeof(char));
-
-    while(1) {
-        tmp = buffer;
-        bytes = fread((void *) buffer, sizeof(char), CHUNKSIZE, fp);
-        // printf("Read %d bytes\n", bytes);
-
-        if (bytes == 0) {
-            break;
-        }
-
-        for (i = 0; i < bytes; ++i)
-        {
-            if (*tmp++ == '\n') {
-                lines++;
-            }
-        }
-    }
-
-
-    printf("Saw %d lines\n", (int) lines);
-
-    free(buffer);
-    fclose(fp);
-
-    printf("Time elapsed: %f\n", ((double)clock() - start) / CLOCKS_PER_SEC);
-}*/
-
-
 P_INLINE void uppercase(char *p) {
-    for ( ; *p; ++p) *p = toupper(*p);
-}
-
-/* SEL - does not look like these routines are used anywhere
-P_INLINE void lowercase(char *p) {
-    for ( ; *p; ++p) *p = tolower(*p);
+    for (; *p; ++p) *p = toupper(*p);
 }
 
-int P_INLINE to_complex(char *item, double *p_real, double *p_imag, char sci, char decimal)
-{
-    char *p_end;
-
-    *p_real = xstrtod(item, &p_end, decimal, sci, '\0', FALSE);
-    if (*p_end == '\0') {
-        *p_imag = 0.0;
-        return errno == 0;
-    }
-    if (*p_end == 'i' || *p_end == 'j') {
-        *p_imag = *p_real;
-        *p_real = 0.0;
-        ++p_end;
-    }
-    else {
-        if (*p_end == '+') {
-            ++p_end;
-        }
-        *p_imag = xstrtod(p_end, &p_end, decimal, sci, '\0', FALSE);
-        if (errno || ((*p_end != 'i') && (*p_end != 'j'))) {
-            return FALSE;
-        }
-        ++p_end;
-    }
-    while(*p_end == ' ') {
-        ++p_end;
-    }
-    return *p_end == '\0';
-}*/
-
-
-int P_INLINE to_longlong(char *item, long long *p_value)
-{
+int P_INLINE to_longlong(char *item, long long *p_value) {
     char *p_end;
 
     // Try integer conversion.  We explicitly give the base to be 10. If
@@ -1500,65 +1369,26 @@ int P_INLINE to_longlong(char *item, long long *p_value)
     return (errno == 0) && (!*p_end);
 }
 
-/* does not look like this routine is used anywhere
-int P_INLINE to_longlong_thousands(char *item, long long *p_value, char tsep)
-{
-    int i, pos, status, n = strlen(item), count = 0;
-    char *tmp;
-    char *p_end;
-
-    for (i = 0; i < n; ++i)
-    {
-        if (*(item + i) == tsep) {
-            count++;
-        }
-    }
-
-    if (count == 0) {
-        return to_longlong(item, p_value);
-    }
-
-    tmp = (char*) malloc((n - count + 1) * sizeof(char));
-    if (tmp == NULL) {
-        return 0;
-    }
-
-    pos = 0;
-    for (i = 0; i < n; ++i)
-    {
-        if (item[i] != tsep)
-            tmp[pos++] = item[i];
-    }
-
-    tmp[pos] = '\0';
-
-    status = to_longlong(tmp, p_value);
-    free(tmp);
-
-    return status;
-}*/
-
 int to_boolean(const char *item, uint8_t *val) {
     char *tmp;
     int i, status = 0;
+    int bufsize = sizeof(char) * (strlen(item) + 1);
 
     static const char *tstrs[1] = {"TRUE"};
     static const char *fstrs[1] = {"FALSE"};
 
-    tmp = malloc(sizeof(char) * (strlen(item) + 1));
-    strcpy(tmp, item);
+    tmp = malloc(bufsize);
+    strncpy(tmp, item, bufsize);
     uppercase(tmp);
 
-    for (i = 0; i < 1; ++i)
-    {
+    for (i = 0; i < 1; ++i) {
         if (strcmp(tmp, tstrs[i]) == 0) {
             *val = 1;
             goto done;
         }
     }
 
-    for (i = 0; i < 1; ++i)
-    {
+    for (i = 0; i < 1; ++i) {
         if (strcmp(tmp, fstrs[i]) == 0) {
             *val = 0;
             goto done;
@@ -1572,27 +1402,19 @@ int to_boolean(const char *item, uint8_t *val) {
     return status;
 }
 
-// #define TEST
-
 #ifdef TEST
 
-int main(int argc, char *argv[])
-{
+int main(int argc, char *argv[]) {
     double x, y;
     long long xi;
     int status;
     char *s;
 
-    //s = "0.10e-3-+5.5e2i";
-    // s = "1-0j";
-    // status = to_complex(s, &x, &y, 'e', '.');
     s = "123,789";
     status = to_longlong_thousands(s, &xi, ',');
     printf("s = '%s'\n", s);
     printf("status = %d\n", status);
-    printf("x = %d\n", (int) xi);
-
-    // printf("x = %lg,  y = %lg\n", x, y);
+    printf("x = %d\n", (int)xi);
 
     return 0;
 }
@@ -1621,10 +1443,12 @@ int main(int argc, char *argv[])
 //    may be used to endorse or promote products derived from this software
 //    without specific prior written permission.
 //
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND
 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+// ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
@@ -1643,197 +1467,185 @@ int main(int argc, char *argv[])
 // * Add tsep argument for thousands separator
 //
 
-double xstrtod(const char *str, char **endptr, char decimal,
-                      char sci, char tsep, int skip_trailing)
-{
-  double number;
-  int exponent;
-  int negative;
-  char *p = (char *) str;
-  double p10;
-  int n;
-  int num_digits;
-  int num_decimals;
-
-  errno = 0;
-
-  // Skip leading whitespace
-  while (isspace(*p)) p++;
-
-  // Handle optional sign
-  negative = 0;
-  switch (*p)
-  {
-    case '-': negative = 1; // Fall through to increment position
-    case '+': p++;
-  }
-
-  number = 0.;
-  exponent = 0;
-  num_digits = 0;
-  num_decimals = 0;
-
-  // Process string of digits
-  while (isdigit(*p))
-  {
-    number = number * 10. + (*p - '0');
-    p++;
-    num_digits++;
-
-    p += (tsep != '\0' && *p == tsep);
-  }
-
-  // Process decimal part
-  if (*p == decimal)
-  {
-    p++;
-
-    while (isdigit(*p))
-    {
-      number = number * 10. + (*p - '0');
-      p++;
-      num_digits++;
-      num_decimals++;
-    }
-
-    exponent -= num_decimals;
-  }
-
-  if (num_digits == 0)
-  {
-    errno = ERANGE;
-    return 0.0;
-  }
-
-  // Correct for sign
-  if (negative) number = -number;
-
-  // Process an exponent string
-  if (toupper(*p) == toupper(sci))
-  {
-    // Handle optional sign
+double xstrtod(const char *str, char **endptr, char decimal, char sci,
+               char tsep, int skip_trailing) {
+    double number;
+    int exponent;
+    int negative;
+    char *p = (char *)str;
+    double p10;
+    int n;
+    int num_digits;
+    int num_decimals;
+
+    errno = 0;
+
+    // Skip leading whitespace.
+    while (isspace(*p)) p++;
+
+    // Handle optional sign.
     negative = 0;
-    switch (*++p)
-    {
-      case '-': negative = 1;   // Fall through to increment pos
-      case '+': p++;
+    switch (*p) {
+        case '-':
+            negative = 1;  // Fall through to increment position.
+        case '+':
+            p++;
     }
 
-    // Process string of digits
+    number = 0.;
+    exponent = 0;
     num_digits = 0;
-    n = 0;
-    while (isdigit(*p))
-    {
-      n = n * 10 + (*p - '0');
-      num_digits++;
-      p++;
+    num_decimals = 0;
+
+    // Process string of digits.
+    while (isdigit(*p)) {
+        number = number * 10. + (*p - '0');
+        p++;
+        num_digits++;
+
+        p += (tsep != '\0' && *p == tsep);
     }
 
-    if (negative)
-      exponent -= n;
-    else
-      exponent += n;
+    // Process decimal part.
+    if (*p == decimal) {
+        p++;
+
+        while (isdigit(*p)) {
+            number = number * 10. + (*p - '0');
+            p++;
+            num_digits++;
+            num_decimals++;
+        }
 
-    // If no digits, after the 'e'/'E', un-consume it
-    if (num_digits == 0)
-        p--;
-  }
+        exponent -= num_decimals;
+    }
 
+    if (num_digits == 0) {
+        errno = ERANGE;
+        return 0.0;
+    }
 
-  if (exponent < DBL_MIN_EXP  || exponent > DBL_MAX_EXP)
-  {
+    // Correct for sign.
+    if (negative) number = -number;
 
-    errno = ERANGE;
-    return HUGE_VAL;
-  }
+    // Process an exponent string.
+    if (toupper(*p) == toupper(sci)) {
+        // Handle optional sign.
+        negative = 0;
+        switch (*++p) {
+            case '-':
+                negative = 1;  // Fall through to increment pos.
+            case '+':
+                p++;
+        }
 
-  // Scale the result
-  p10 = 10.;
-  n = exponent;
-  if (n < 0) n = -n;
-  while (n)
-  {
-    if (n & 1)
-    {
-      if (exponent < 0)
-        number /= p10;
-      else
-        number *= p10;
+        // Process string of digits.
+        num_digits = 0;
+        n = 0;
+        while (isdigit(*p)) {
+            n = n * 10 + (*p - '0');
+            num_digits++;
+            p++;
+        }
+
+        if (negative)
+            exponent -= n;
+        else
+            exponent += n;
+
+        // If no digits, after the 'e'/'E', un-consume it
+        if (num_digits == 0) p--;
     }
-    n >>= 1;
-    p10 *= p10;
-  }
 
+    if (exponent < DBL_MIN_EXP || exponent > DBL_MAX_EXP) {
+        errno = ERANGE;
+        return HUGE_VAL;
+    }
 
-  if (number == HUGE_VAL) {
-      errno = ERANGE;
-  }
+    // Scale the result.
+    p10 = 10.;
+    n = exponent;
+    if (n < 0) n = -n;
+    while (n) {
+        if (n & 1) {
+            if (exponent < 0)
+                number /= p10;
+            else
+                number *= p10;
+        }
+        n >>= 1;
+        p10 *= p10;
+    }
 
-  if (skip_trailing) {
-      // Skip trailing whitespace
-      while (isspace(*p)) p++;
-  }
+    if (number == HUGE_VAL) {
+        errno = ERANGE;
+    }
 
-  if (endptr) *endptr = p;
+    if (skip_trailing) {
+        // Skip trailing whitespace.
+        while (isspace(*p)) p++;
+    }
 
+    if (endptr) *endptr = p;
 
-  return number;
+    return number;
 }
 
-double precise_xstrtod(const char *str, char **endptr, char decimal,
-               char sci, char tsep, int skip_trailing)
-{
+double precise_xstrtod(const char *str, char **endptr, char decimal, char sci,
+                       char tsep, int skip_trailing) {
     double number;
     int exponent;
     int negative;
-    char *p = (char *) str;
+    char *p = (char *)str;
     int num_digits;
     int num_decimals;
     int max_digits = 17;
     int n;
-    // Cache powers of 10 in memory
-    static double e[] = {1., 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10,
-                         1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20,
-                         1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30,
-                         1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40,
-                         1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50,
-                         1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, 1e60,
-                         1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, 1e70,
-                         1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79, 1e80,
-                         1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, 1e88, 1e89, 1e90,
-                         1e91, 1e92, 1e93, 1e94, 1e95, 1e96, 1e97, 1e98, 1e99, 1e100,
-                         1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, 1e110,
-                         1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119, 1e120,
-                         1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, 1e130,
-                         1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140,
-                         1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150,
-                         1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, 1e160,
-                         1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169, 1e170,
-                         1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179, 1e180,
-                         1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189, 1e190,
-                         1e191, 1e192, 1e193, 1e194, 1e195, 1e196, 1e197, 1e198, 1e199, 1e200,
-                         1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, 1e208, 1e209, 1e210,
-                         1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220,
-                         1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230,
-                         1e231, 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240,
-                         1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, 1e250,
-                         1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259, 1e260,
-                         1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269, 1e270,
-                         1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279, 1e280,
-                         1e281, 1e282, 1e283, 1e284, 1e285, 1e286, 1e287, 1e288, 1e289, 1e290,
-                         1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299, 1e300,
-                         1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308};
+    // Cache powers of 10 in memory.
+    static double e[] = {
+        1.,    1e1,   1e2,   1e3,   1e4,   1e5,   1e6,   1e7,   1e8,   1e9,
+        1e10,  1e11,  1e12,  1e13,  1e14,  1e15,  1e16,  1e17,  1e18,  1e19,
+        1e20,  1e21,  1e22,  1e23,  1e24,  1e25,  1e26,  1e27,  1e28,  1e29,
+        1e30,  1e31,  1e32,  1e33,  1e34,  1e35,  1e36,  1e37,  1e38,  1e39,
+        1e40,  1e41,  1e42,  1e43,  1e44,  1e45,  1e46,  1e47,  1e48,  1e49,
+        1e50,  1e51,  1e52,  1e53,  1e54,  1e55,  1e56,  1e57,  1e58,  1e59,
+        1e60,  1e61,  1e62,  1e63,  1e64,  1e65,  1e66,  1e67,  1e68,  1e69,
+        1e70,  1e71,  1e72,  1e73,  1e74,  1e75,  1e76,  1e77,  1e78,  1e79,
+        1e80,  1e81,  1e82,  1e83,  1e84,  1e85,  1e86,  1e87,  1e88,  1e89,
+        1e90,  1e91,  1e92,  1e93,  1e94,  1e95,  1e96,  1e97,  1e98,  1e99,
+        1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109,
+        1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119,
+        1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129,
+        1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139,
+        1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149,
+        1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159,
+        1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169,
+        1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179,
+        1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189,
+        1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196, 1e197, 1e198, 1e199,
+        1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, 1e208, 1e209,
+        1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219,
+        1e220, 1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229,
+        1e230, 1e231, 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239,
+        1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249,
+        1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259,
+        1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269,
+        1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279,
+        1e280, 1e281, 1e282, 1e283, 1e284, 1e285, 1e286, 1e287, 1e288, 1e289,
+        1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299,
+        1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308};
     errno = 0;
 
-    // Skip leading whitespace
+    // Skip leading whitespace.
     while (isspace(*p)) p++;
 
-    // Handle optional sign
+    // Handle optional sign.
     negative = 0;
-    switch (*p)
-    {
-    case '-': negative = 1; // Fall through to increment position
-    case '+': p++;
+    switch (*p) {
+        case '-':
+            negative = 1;  // Fall through to increment position.
+        case '+':
+            p++;
     }
 
     number = 0.;
@@ -1841,66 +1653,59 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
     num_digits = 0;
     num_decimals = 0;
 
-    // Process string of digits
-    while (isdigit(*p))
-    {
-        if (num_digits < max_digits)
-        {
+    // Process string of digits.
+    while (isdigit(*p)) {
+        if (num_digits < max_digits) {
             number = number * 10. + (*p - '0');
             num_digits++;
-        }
-        else
+        } else {
             ++exponent;
+        }
 
         p++;
         p += (tsep != '\0' && *p == tsep);
     }
 
     // Process decimal part
-    if (*p == decimal)
-    {
+    if (*p == decimal) {
         p++;
 
-        while (num_digits < max_digits && isdigit(*p))
-        {
+        while (num_digits < max_digits && isdigit(*p)) {
             number = number * 10. + (*p - '0');
             p++;
             num_digits++;
             num_decimals++;
         }
 
-        if (num_digits >= max_digits) // consume extra decimal digits
-            while (isdigit(*p))
-                ++p;
+        if (num_digits >= max_digits)  // Consume extra decimal digits.
+            while (isdigit(*p)) ++p;
 
         exponent -= num_decimals;
     }
 
-    if (num_digits == 0)
-    {
+    if (num_digits == 0) {
         errno = ERANGE;
         return 0.0;
     }
 
-    // Correct for sign
+    // Correct for sign.
     if (negative) number = -number;
 
-    // Process an exponent string
-    if (toupper(*p) == toupper(sci))
-    {
+    // Process an exponent string.
+    if (toupper(*p) == toupper(sci)) {
         // Handle optional sign
         negative = 0;
-        switch (*++p)
-        {
-        case '-': negative = 1;   // Fall through to increment pos
-        case '+': p++;
+        switch (*++p) {
+            case '-':
+                negative = 1;  // Fall through to increment pos.
+            case '+':
+                p++;
         }
 
-        // Process string of digits
+        // Process string of digits.
         num_digits = 0;
         n = 0;
-        while (isdigit(*p))
-        {
+        while (isdigit(*p)) {
             n = n * 10 + (*p - '0');
             num_digits++;
             p++;
@@ -1911,33 +1716,28 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
         else
             exponent += n;
 
-        // If no digits, after the 'e'/'E', un-consume it
-        if (num_digits == 0)
-            p--;
+        // If no digits after the 'e'/'E', un-consume it.
+        if (num_digits == 0) p--;
     }
 
-    if (exponent > 308)
-    {
+    if (exponent > 308) {
         errno = ERANGE;
         return HUGE_VAL;
-    }
-    else if (exponent > 0)
+    } else if (exponent > 0) {
         number *= e[exponent];
-    else if (exponent < -308) // subnormal
-    {
-        if (exponent < -616) // prevent invalid array access
+    } else if (exponent < -308) {  // Subnormal
+        if (exponent < -616)       // Prevent invalid array access.
             number = 0.;
         number /= e[-308 - exponent];
         number /= e[308];
-    }
-    else
+    } else {
         number /= e[-exponent];
+    }
 
-    if (number == HUGE_VAL || number == -HUGE_VAL)
-        errno = ERANGE;
+    if (number == HUGE_VAL || number == -HUGE_VAL) errno = ERANGE;
 
     if (skip_trailing) {
-        // Skip trailing whitespace
+        // Skip trailing whitespace.
         while (isspace(*p)) p++;
     }
 
@@ -1945,9 +1745,8 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
     return number;
 }
 
-double round_trip(const char *p, char **q, char decimal, char sci,
-                  char tsep, int skip_trailing)
-{
+double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
+                  int skip_trailing) {
 #if PY_VERSION_HEX >= 0x02070000
     return PyOS_string_to_double(p, q, 0);
 #else
@@ -1955,31 +1754,12 @@ double round_trip(const char *p, char **q, char decimal, char sci,
 #endif
 }
 
-/*
-float strtof(const char *str, char **endptr)
-{
-  return (float) strtod(str, endptr);
-}
-
-
-long double strtold(const char *str, char **endptr)
-{
-  return strtod(str, endptr);
-}
-
-double atof(const char *str)
-{
-  return strtod(str, NULL);
-}
-*/
-
 // End of xstrtod code
 // ---------------------------------------------------------------------------
 
 int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
-                     int *error, char tsep)
-{
-    const char *p = (const char *) p_item;
+                     int *error, char tsep) {
+    const char *p = (const char *)p_item;
     int isneg = 0;
     int64_t number = 0;
     int d;
@@ -1993,8 +1773,7 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
     if (*p == '-') {
         isneg = 1;
         ++p;
-    }
-    else if (*p == '+') {
+    } else if (*p == '+') {
         p++;
     }
 
@@ -2023,11 +1802,9 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
                 }
                 if ((number > pre_min) ||
                     ((number == pre_min) && (d - '0' <= dig_pre_min))) {
-
                     number = number * 10 - (d - '0');
                     d = *++p;
-                }
-                else {
+                } else {
                     *error = ERROR_OVERFLOW;
                     return 0;
                 }
@@ -2036,25 +1813,20 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
             while (isdigit(d)) {
                 if ((number > pre_min) ||
                     ((number == pre_min) && (d - '0' <= dig_pre_min))) {
-
                     number = number * 10 - (d - '0');
                     d = *++p;
-                }
-                else {
+                } else {
                     *error = ERROR_OVERFLOW;
                     return 0;
                 }
             }
         }
-    }
-    else {
+    } else {
         // If number is less than pre_max, at least one more digit
         // can be processed without overflowing.
         int64_t pre_max = int_max / 10;
         int dig_pre_max = int_max % 10;
 
-        //printf("pre_max = %lld  dig_pre_max = %d\n", pre_max, dig_pre_max);
-
         // Process the digits.
         d = *p;
         if (tsep != '\0') {
@@ -2067,12 +1839,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
                 }
                 if ((number < pre_max) ||
                     ((number == pre_max) && (d - '0' <= dig_pre_max))) {
-
                     number = number * 10 + (d - '0');
                     d = *++p;
 
-                }
-                else {
+                } else {
                     *error = ERROR_OVERFLOW;
                     return 0;
                 }
@@ -2081,12 +1851,10 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
             while (isdigit(d)) {
                 if ((number < pre_max) ||
                     ((number == pre_max) && (d - '0' <= dig_pre_max))) {
-
                     number = number * 10 + (d - '0');
                     d = *++p;
 
-                }
-                else {
+                } else {
                     *error = ERROR_OVERFLOW;
                     return 0;
                 }
@@ -2108,66 +1876,3 @@ int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
     *error = 0;
     return number;
 }
-
-/* does not look like this routine is used anywhere
-uint64_t str_to_uint64(const char *p_item, uint64_t uint_max, int *error)
-{
-    int d, dig_pre_max;
-    uint64_t pre_max;
-    const char *p = (const char *) p_item;
-    uint64_t number = 0;
-
-    // Skip leading spaces.
-    while (isspace(*p)) {
-        ++p;
-    }
-
-    // Handle sign.
-    if (*p == '-') {
-        *error = ERROR_MINUS_SIGN;
-        return 0;
-    }
-    if (*p == '+') {
-        p++;
-    }
-
-    // Check that there is a first digit.
-    if (!isdigit(*p)) {
-        // Error...
-        *error = ERROR_NO_DIGITS;
-        return 0;
-    }
-
-    // If number is less than pre_max, at least one more digit
-    // can be processed without overflowing.
-    pre_max = uint_max / 10;
-    dig_pre_max = uint_max % 10;
-
-    // Process the digits.
-    d = *p;
-    while (isdigit(d)) {
-        if ((number < pre_max) || ((number == pre_max) && (d - '0' <= dig_pre_max))) {
-            number = number * 10 + (d - '0');
-            d = *++p;
-        }
-        else {
-            *error = ERROR_OVERFLOW;
-            return 0;
-        }
-    }
-
-    // Skip trailing spaces.
-    while (isspace(*p)) {
-        ++p;
-    }
-
-    // Did we use up all the characters?
-    if (*p) {
-        *error = ERROR_INVALID_CHARS;
-        return 0;
-    }
-
-    *error = 0;
-    return number;
-}
-*/
diff --git a/pandas/src/parser/tokenizer.h b/pandas/src/parser/tokenizer.h
index 487c1265d9358..e01812f1c5520 100644
--- a/pandas/src/parser/tokenizer.h
+++ b/pandas/src/parser/tokenizer.h
@@ -9,29 +9,29 @@ See LICENSE for the license
 
 */
 
-#ifndef _PARSER_COMMON_H_
-#define _PARSER_COMMON_H_
+#ifndef PANDAS_SRC_PARSER_TOKENIZER_H_
+#define PANDAS_SRC_PARSER_TOKENIZER_H_
 
-#include "Python.h"
+#include <errno.h>
 #include <stdio.h>
-#include <string.h>
 #include <stdlib.h>
+#include <string.h>
 #include <time.h>
-#include <errno.h>
+#include "Python.h"
 
 #include <ctype.h>
 
-#define ERROR_OK             0
-#define ERROR_NO_DIGITS      1
-#define ERROR_OVERFLOW       2
-#define ERROR_INVALID_CHARS  3
-#define ERROR_MINUS_SIGN     4
+#define ERROR_OK 0
+#define ERROR_NO_DIGITS 1
+#define ERROR_OVERFLOW 2
+#define ERROR_INVALID_CHARS 3
+#define ERROR_MINUS_SIGN 4
 
 #include "../headers/stdint.h"
 
 #include "khash.h"
 
-#define CHUNKSIZE 1024*256
+#define CHUNKSIZE 1024 * 256
 #define KB 1024
 #define MB 1024 * KB
 #define STREAM_INIT_SIZE 32
@@ -40,15 +40,15 @@ See LICENSE for the license
 #define CALLING_READ_FAILED 2
 
 #ifndef P_INLINE
-  #if defined(__GNUC__)
-    #define P_INLINE static __inline__
-  #elif defined(_MSC_VER)
-    #define P_INLINE
-  #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-    #define P_INLINE static inline
-  #else
-    #define P_INLINE
-  #endif
+#if defined(__GNUC__)
+#define P_INLINE static __inline__
+#elif defined(_MSC_VER)
+#define P_INLINE
+#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define P_INLINE static inline
+#else
+#define P_INLINE
+#endif
 #endif
 
 #if defined(_MSC_VER)
@@ -62,41 +62,34 @@ See LICENSE for the license
  */
 
 #define FALSE 0
-#define TRUE  1
-
-/* Maximum number of columns in a file. */
-#define MAX_NUM_COLUMNS    2000
+#define TRUE 1
 
-/* Maximum number of characters in single field. */
-
-#define FIELD_BUFFER_SIZE  2000
+// Maximum number of columns in a file.
+#define MAX_NUM_COLUMNS 2000
 
+// Maximum number of characters in single field.
+#define FIELD_BUFFER_SIZE 2000
 
 /*
  *  Common set of error types for the read_rows() and tokenize()
  *  functions.
  */
-
-#define ERROR_OUT_OF_MEMORY             1
-#define ERROR_INVALID_COLUMN_INDEX     10
+#define ERROR_OUT_OF_MEMORY 1
+#define ERROR_INVALID_COLUMN_INDEX 10
 #define ERROR_CHANGED_NUMBER_OF_FIELDS 12
-#define ERROR_TOO_MANY_CHARS           21
-#define ERROR_TOO_MANY_FIELDS          22
-#define ERROR_NO_DATA                  23
-
-
-/* #define VERBOSE */
+#define ERROR_TOO_MANY_CHARS 21
+#define ERROR_TOO_MANY_FIELDS 22
+#define ERROR_NO_DATA 23
 
+// #define VERBOSE
 #if defined(VERBOSE)
 #define TRACE(X) printf X;
 #else
 #define TRACE(X)
 #endif
 
-
 #define PARSER_OUT_OF_MEMORY -1
 
-
 /*
  *  XXX Might want to couple count_rows() with read_rows() to avoid duplication
  *      of some file I/O.
@@ -108,7 +101,6 @@ See LICENSE for the license
  */
 #define WORD_BUFFER_SIZE 4000
 
-
 typedef enum {
     START_RECORD,
     START_FIELD,
@@ -131,12 +123,14 @@ typedef enum {
 } ParserState;
 
 typedef enum {
-    QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
+    QUOTE_MINIMAL,
+    QUOTE_ALL,
+    QUOTE_NONNUMERIC,
+    QUOTE_NONE
 } QuoteStyle;
 
-
-typedef void* (*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
-                            int *status);
+typedef void *(*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
+                             int *status);
 typedef int (*io_cleanup)(void *src);
 
 typedef struct parser_t {
@@ -156,38 +150,38 @@ typedef struct parser_t {
 
     // Store words in (potentially ragged) matrix for now, hmm
     char **words;
-    int *word_starts; // where we are in the stream
+    int *word_starts;  // where we are in the stream
     int words_len;
     int words_cap;
 
-    char *pword_start;    // pointer to stream start of current field
-    int word_start;       // position start of current field
+    char *pword_start;  // pointer to stream start of current field
+    int word_start;     // position start of current field
 
-    int *line_start;      // position in words for start of line
-    int *line_fields;     // Number of fields in each line
-    int lines;            // Number of (good) lines observed
-    int file_lines;       // Number of file lines observed (including bad or skipped)
-    int lines_cap;        // Vector capacity
+    int *line_start;   // position in words for start of line
+    int *line_fields;  // Number of fields in each line
+    int lines;         // Number of (good) lines observed
+    int file_lines;  // Number of file lines observed (including bad or skipped)
+    int lines_cap;   // Vector capacity
 
     // Tokenizing stuff
     ParserState state;
-    int doublequote;            /* is " represented by ""? */
-    char delimiter;             /* field separator */
-    int delim_whitespace;       /* delimit by consuming space/tabs instead */
-    char quotechar;             /* quote character */
-    char escapechar;            /* escape character */
+    int doublequote;      /* is " represented by ""? */
+    char delimiter;       /* field separator */
+    int delim_whitespace; /* delimit by consuming space/tabs instead */
+    char quotechar;       /* quote character */
+    char escapechar;      /* escape character */
     char lineterminator;
-    int skipinitialspace;       /* ignore spaces following delimiter? */
-    int quoting;                /* style of quoting to write */
+    int skipinitialspace; /* ignore spaces following delimiter? */
+    int quoting;          /* style of quoting to write */
 
     // krufty, hmm =/
     int numeric_field;
 
     char commentchar;
     int allow_embedded_newline;
-    int strict;                 /* raise exception on bad CSV */
+    int strict; /* raise exception on bad CSV */
 
-    int usecols; // Boolean: 1: usecols provided, 0: none provided
+    int usecols;  // Boolean: 1: usecols provided, 0: none provided
 
     int expected_fields;
     int error_bad_lines;
@@ -200,9 +194,9 @@ typedef struct parser_t {
     // thousands separator (comma, period)
     char thousands;
 
-    int header; // Boolean: 1: has header, 0: no header
-    int header_start; // header row start
-    int header_end;   // header row end
+    int header;        // Boolean: 1: has header, 0: no header
+    int header_start;  // header row start
+    int header_end;    // header row end
 
     void *skipset;
     int64_t skip_first_N_rows;
@@ -216,7 +210,6 @@ typedef struct parser_t {
     int skip_empty_lines;
 } parser_t;
 
-
 typedef struct coliter_t {
     char **words;
     int *line_start;
@@ -226,15 +219,13 @@ typedef struct coliter_t {
 void coliter_setup(coliter_t *self, parser_t *parser, int i, int start);
 coliter_t *coliter_new(parser_t *self, int i);
 
-/* #define COLITER_NEXT(iter) iter->words[iter->line_start[iter->line++] + iter->col] */
-// #define COLITER_NEXT(iter) iter.words[iter.line_start[iter.line++] + iter.col]
+#define COLITER_NEXT(iter, word)                          \
+    do {                                                  \
+        const int i = *iter.line_start++ + iter.col;      \
+        word = i < *iter.line_start ? iter.words[i] : ""; \
+    } while (0)
 
-#define COLITER_NEXT(iter, word) do { \
-    const int i = *iter.line_start++ + iter.col; \
-    word = i < *iter.line_start ? iter.words[i]: ""; \
-    } while(0)
-
-parser_t* parser_new(void);
+parser_t *parser_new(void);
 
 int parser_init(parser_t *self);
 
@@ -256,24 +247,17 @@ int tokenize_nrows(parser_t *self, size_t nrows);
 
 int tokenize_all_rows(parser_t *self);
 
-/*
-
-  Have parsed / type-converted a chunk of data and want to free memory from the
-  token stream
-
- */
-//int clear_parsed_lines(parser_t *self, size_t nlines);
-
-int64_t str_to_int64(const char *p_item, int64_t int_min,
-                     int64_t int_max, int *error, char tsep);
-//uint64_t str_to_uint64(const char *p_item, uint64_t uint_max, int *error);
-
-double xstrtod(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing);
-double precise_xstrtod(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing);
-double round_trip(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing);
-//int P_INLINE to_complex(char *item, double *p_real, double *p_imag, char sci, char decimal);
-//int P_INLINE to_longlong(char *item, long long *p_value);
-//int P_INLINE to_longlong_thousands(char *item, long long *p_value, char tsep);
+// Have parsed / type-converted a chunk of data
+// and want to free memory from the token stream
+
+int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max,
+                     int *error, char tsep);
+double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
+               int skip_trailing);
+double precise_xstrtod(const char *p, char **q, char decimal, char sci,
+                       char tsep, int skip_trailing);
+double round_trip(const char *p, char **q, char decimal, char sci, char tsep,
+                  int skip_trailing);
 int to_boolean(const char *item, uint8_t *val);
 
-#endif // _PARSER_COMMON_H_
+#endif  // PANDAS_SRC_PARSER_TOKENIZER_H_