Node.js and Async Calls to a PostgreSQL Database

While working on an application for which I had to parse large amounts of data, I came across an interesting asynchronous issue when using Node's PostgreSQL client node-postgres. Due to node-postgres asynchronous nature and the large amount of data being sent over to the database, my connection was closing before the queries had a chance to complete. This caused only part of the data to be uploaded to the database and I would get the following error: Error: Connection was ended during query. Let's take a look at the code to see what I was doing wrong.

pg.connect(conString, function(err, client, done) {
var query = "INSERT INTO tree (name, qspeciesid, siteorder, qsiteinfo, qcaretaker, plantdate, dbh, plotsize, permitnotes, locationid) VALUES ('tree', (select qspeciesid from qspecies where qspecies = $1), $2, $3, $4, $5, $6, $7, $8, (select locationid from location where xcoord = $9));"
    for (var x = 0, count = trees.length; x < count; x++) {
      var xcoord = trees[x].xcoord || 999,
        qspecies = trees[x].qspecies,
        siteorder = trees[x].siteorder || 9999,
        qsiteinfo = trees[x].gsiteinfo || 'unknown',
        qcaretaker = trees[x].qcaretaker || 'unknown',
        plantdate = trees[x].plantdate || new Date(0),
        dbh = trees[x].dbh || 999,
        plotsize = trees[x].plotsize || "unknown",
        permitnotes = trees[x].permitnotes || "unknown";

      client.query(query, [qspecies, siteorder, qsiteinfo, qcaretaker, plantdate, dbh, plotsize, permitnotes, xcoord],
        function(err, result) {
            console.log(err);
        });
      }
    done();
  });

The issue here was that the for loop was asynchronously calling a new client.query for each query. node-postgres client pooling system kept the application from breaking, but it was not able to keep the connection from closing before all the queries were completed. For a small number of queries (something under 10,000) this would usually not be an issue, but since I was trying to run about 50,000 queries at once the queries were failing. The solution was to throttle down the number of queries I was doing and to share the client across different queries. Here's the example of the working version of the code:

pg.connect(conString, function(err, client, done) {
var query = "INSERT INTO tree (name, qspeciesid, siteorder, qsiteinfo, qcaretaker, plantdate, dbh, plotsize, permitnotes, locationid) VALUES ('tree', (select distinct qspeciesid from qspecies where qspecies = $1 limit 1), $2, $3, $4, $5, $6, $7, $8, (select distinct locationid from location where xcoord = $9 limit 1));";
async.map(trees, function(tree, cb) {
      var xcoord = tree.xcoord || 999,
        qspecies = tree.qspecies,
        siteorder = tree.siteorder || 9999,
        qsiteinfo = tree.gsiteinfo || 'unknown',
        qcaretaker = tree.qcaretaker || 'unknown',
        plantdate = tree.plantdate || new Date(0),
        dbh = tree.dbh || 999,
        plotsize = tree.plotsize || "unknown",
        permitnotes = tree.permitnotes || "unknown";
      client.query(query, [qspecies, siteorder, qsiteinfo, qcaretaker, plantdate, dbh, plotsize, permitnotes, xcoord]);
    }, function(error, results) {
      console.log("Finished inserts!", error, results);
      done();
    });
});

Notice that in this version I am using asyncs.map function in order to throttle down my queries and to allow for all of them to finish before the connection is closed. This solved the issue and now my database and my node server are working in sync.

Paulo Diniz