Monday, March 22, 2010

Reading files with node.js

I tried to find an example on using node.js to read a file line by line. I found none so I had to write one myself.


// Module: FileLineReader
// Constructor: FileLineReader(filename, bufferSize = 8192)
// Methods: hasNextLine() -> boolean
//          nextLine() -> String
//
//
var fs = require("fs");
var sys = require("sys");

exports.FileLineReader = function(filename, bufferSize) {

    if(!bufferSize) {
        bufferSize = 8192;
    }

    //private:
    var currentPositionInFile = 0;
    var buffer = "";
    var fd = fs.openSync(filename, "r");


    // return -1
    // when EOF reached
    // fills buffer with next 8192 or less bytes
    var fillBuffer = function(position) {

        var res = fs.readSync(fd, bufferSize, position, "ascii");

        buffer += res[0];
        if (res[1] == 0) {
            return -1;
        }
        return position + res[1];

    };

    currentPositionInFile = fillBuffer(0);

    //public:
    this.hasNextLine = function() {
        while (buffer.indexOf("\n") == -1) {
            currentPositionInFile = fillBuffer(currentPositionInFile);
            if (currentPositionInFile == -1) {
                return false;
            }
        }

        if (buffer.indexOf("\n") > -1) {

            return true;
        }
        return false;
    };

    //public:
    this.nextLine = function() {
        var lineEnd = buffer.indexOf("\n");
        var result = buffer.substring(0, lineEnd);

        buffer = buffer.substring(result.length + 1, buffer.length);
        return result;
    };

    return this;
};




And some TestCode:

var assert = require("assert"),
        sys = require("sys"),
        flr = require("./FileLineReader");

try {
    var reader = new flr.FileLineReader("top-5.txt", 10);

    sys.debug(__filename + " First line...");
    assert.equal(reader.hasNextLine(), true);
    assert.equal(reader.nextLine(), "yahoo.com");

    sys.debug(__filename + " Next line...");

    assert.equal(reader.hasNextLine(), true);
    assert.equal(reader.nextLine(), "youtube.com");
    sys.debug(__filename + " Next line...");

    assert.equal(reader.hasNextLine(), true);
    assert.equal(reader.nextLine(), "facebook.com");
    sys.debug(__filename + " Next line...");

    assert.equal(reader.hasNextLine(), true);
    assert.equal(reader.nextLine(), "live.com");

    assert.equal(reader.hasNextLine(), true);
    assert.equal(reader.nextLine(), "msn.com");


    assert.equal(reader.hasNextLine(), false);

} catch (err) {
    sys.debug(__filename + ": ERROR: " + err);
}



The testdata file top-5.txt looked like this:

yahoo.com
youtube.com
facebook.com
live.com
msn.com

7 comments:

  1. This code works perfectly for what I needed. I am using it within some tools that I would like to release to GitHub eventually. Would you mind releasing the code under MIT license, or another?

    - Josh Du Lac
    joshdulac.com

    ReplyDelete
  2. I like to close my files early to keep the number of open file descriptors to a minimum so I added the following 'public' method:

    this.close = function(cb) {
    fs.close(fd, cb);
    }

    ReplyDelete
  3. what about CRLF? i mean support files with windows EOL

    ReplyDelete
  4. Thanks for this code. It is very helpful for me and my research.

    ReplyDelete
  5. Works well for loading in enormous files - added an encoding parameter to handle other file types (obviously, no big deal).

    ReplyDelete
  6. Hey Dirk,

    Couldn't find your email but I made this into an NPM package (https://github.com/davidbenhaim/FileLineReader). You're listed as one of the authors.

    - David

    ReplyDelete
  7. your readSync usage appears to be currently deprecated. i changed things to use buffers & made it a class 'cuz i'm a .js noob and don't really know how to deal with exports:


    const fs = require('fs')

    class LineReader {

    constructor (filename, bufSize) {
    this.bufferSize = bufSize ? bufSize : 8192;
    this.fd = fs.openSync(filename, "r");
    this.buffer = Buffer.alloc(this.bufferSize);
    this.filePos = 0;
    this.bufferPos = 0;
    this.bufferBytes = 0;
    this.fillBuffer()
    }

    // fills buffer with next this.bufferSize bytes
    fillBuffer () {
    // use buffered readSync instead:
    let bytesRead = fs.readSync(this.fd, this.buffer, 0, this.bufferSize, this.filePos)
    this.bufferBytes = bytesRead
    this.filePos += bytesRead
    this.bufferPos = 0
    return bytesRead
    };

    //public:
    nextLine() {
    if (this.bufferBytes <= 0) {
    // last fillBuffer() was @ or past end of file
    return null
    }
    let line = ""
    let lineEnd = -1;
    while ((lineEnd = this.buffer.indexOf("\n", this.bufferPos)) < 0) {
    line += this.buffer.toString("utf8", this.bufferPos, this.bufferBytes)
    if (this.fillBuffer() <= 0) {
    return line;
    }
    }
    line += this.buffer.toString("utf8", this.bufferPos, lineEnd)
    this.bufferPos = lineEnd + 1
    return line;
    };

    close(cb) {
    fs.close(this.fd, cb);
    }

    };

    // make a 3 line test file with no trailing newline:
    var path = "bozo.txt"
    var data = "one line\nanother line\nand another"
    fs.writeFileSync(path, data)

    // test using small buffer size to make sure we cross boundaries properly:
    var line = ""
    reader = new LineReader(path, 4)
    while ((line = reader.nextLine()) != null) {
    console.log(line)
    }
    reader.close()

    console.log("Done !")

    ReplyDelete