Read array of files using async/await? [duplicate]

Keywords: javascript asynchronous ecmascript-6 promise async-await

Question: 

Are there any issues with using async/await in a forEach loop? I'm trying to loop through an array of files and await on the contents of each file.

import fs from 'fs-promise'

async function printFiles () {
  const files = await getFilePaths() // Assume this works fine

  files.forEach(async (file) => {
    const contents = await fs.readFile(file, 'utf8')
    console.log(contents)
  })
}

printFiles()

This code does work, but could something go wrong with this? I had someone tell me that you're not supposed to use async/await in a higher order function like this so I just wanted to ask if there was any issue with this.

Answers: 

Sure the code does work, but I'm pretty sure it doesn't do what you expect it to do. It just fires off multiple asynchronous calls, but the printFiles function does immediately return after that.

If you want to read the files in sequence, you cannot use forEach indeed. Just use a modern for … of loop instead, in which await will work as expected:

async function printFiles () {
  const files = await getFilePaths();

  for (const file of files) {
    const contents = await fs.readFile(file, 'utf8');
    console.log(contents);
  }
}

If you want to read the files in parallel, you cannot use forEach indeed. Each of the async callback function calls does return a promise, but you're throwing them away instead of awaiting them. Just use map instead, and you can await the array of promises that you'll get with Promise.all:

async function printFiles () {
  const files = await getFilePaths();

  await Promise.all(files.map(async (file) => {
    const contents = await fs.readFile(file, 'utf8')
    console.log(contents)
  }));
}

The p-iteration module on npm implements the Array iteration methods so they can be used in a very straightforward way with async/await.

An example with your case:

const { forEach } = require('p-iteration');
const fs = require('fs-promise');

(async function printFiles () {
  const files = await getFilePaths();

  await forEach(files, async (file) => {
    const contents = await fs.readFile(file, 'utf8');
    console.log(contents);
  });
})();

Both the solutions above work, however, Antonio's does the job with less code, here is how it helped me resolve data from my database, from several different child refs and then pushing them all into an array and resolving it in a promise after all is done:

Promise.all(PacksList.map((pack)=>{
    return fireBaseRef.child(pack.folderPath).once('value',(snap)=>{
        snap.forEach( childSnap => {
            const file = childSnap.val()
            file.id = childSnap.key;
            allItems.push( file )
        })
    })
})).then(()=>store.dispatch( actions.allMockupItems(allItems)))

it's pretty painless to pop a couple methods in a file that will handle asynchronous data in a serialized order and give a more conventional flavour to your code. For example:

module.exports = function () {
  var self = this;

  this.each = async (items, fn) => {
    if (items && items.length) {
      await Promise.all(
        items.map(async (item) => {
          await fn(item);
        }));
    }
  };

  this.reduce = async (items, fn, initialValue) => {
    await self.each(
      items, async (item) => {
        initialValue = await fn(initialValue, item);
      });
    return initialValue;
  };
};

now, assuming that's saved at './myAsync.js' you can do something similar to the below in an adjacent file:

...
/* your server setup here */
...
var MyAsync = require('./myAsync');
var Cat = require('./models/Cat');
var Doje = require('./models/Doje');
var example = async () => {
  var myAsync = new MyAsync();
  var doje = await Doje.findOne({ name: 'Doje', noises: [] }).save();
  var cleanParams = [];

  // FOR EACH EXAMPLE
  await myAsync.each(['bork', 'concern', 'heck'], 
    async (elem) => {
      if (elem !== 'heck') {
        await doje.update({ $push: { 'noises': elem }});
      }
    });

  var cat = await Cat.findOne({ name: 'Nyan' });

  // REDUCE EXAMPLE
  var friendsOfNyanCat = await myAsync.reduce(cat.friends,
    async (catArray, friendId) => {
      var friend = await Friend.findById(friendId);
      if (friend.name !== 'Long cat') {
        catArray.push(friend.name);
      }
    }, []);
  // Assuming Long Cat was a friend of Nyan Cat...
  assert(friendsOfNyanCat.length === (cat.friends.length - 1));
}

One important caveat is: The await + for .. of method and the forEach + async way actually have different effect.

Having await inside a real for loop will make sure all async calls are executed one by one. And the forEach + async way will fire off all promises at the same time, which is faster but sometimes overwhelmed(if you do some DB query or visit some web services with volume restrictions and do not want to fire 100,000 calls at a time).

You can also use reduce + promise(less elegant) if you do not use async/await and want to make sure files are read one after another.

files.reduce((lastPromise, file) => 
 lastPromise.then(() => 
   fs.readFile(file, 'utf8')
 ), Promise.resolve()
)

Or you can create a forEachAsync to help but basically use the same for loop underlying.

Array.prototype.forEachAsync = async function(cb){
    for(let x of this){
        await cb(x);
    }
}

I would use the well-tested (millions of downloads per week) pify and async modules. If you are unfamiliar with the async module, I highly recommend you check out its docs. I've seen multiple devs waste time recreating its methods, or worse, making difficult-to-maintain async code when higher-order async methods would simplify code.

const async = require('async')
const fs = require('fs-promise')
const pify = require('pify')

async function getFilePaths() {
    return Promise.resolve([
        './package.json',
        './package-lock.json',
    ]);
}

async function printFiles () {
  const files = await getFilePaths()

  await pify(async.eachSeries)(files, async (file) => {  // <-- run in series
  // await pify(async.each)(files, async (file) => {  // <-- run in parallel
    const contents = await fs.readFile(file, 'utf8')
    console.log(contents)
  })
  console.log('HAMBONE')
}

printFiles().then(() => {
    console.log('HAMBUNNY')
})
// ORDER OF LOGS:
// package.json contents
// package-lock.json contents
// HAMBONE
// HAMBUNNY
```

In addition to @Bergi’s answer, I’d like to offer a third alternative. It's very similar to @Bergi’s 2nd example, but instead of awaiting each readFile individually, you create an array of promises, each which you await at the end.

import fs from 'fs-promise';
async function printFiles () {
  const files = await getFilePaths();

  const promises = files.map((file) => fs.readFile(file, 'utf8'))

  const contents = await Promise.all(promises)

  contents.forEach(console.log);
}

Note that the function passed to .map() does not need to be async, since fs.readFile returns a Promise object anyway. Therefore promises is an array of Promise objects, which can be sent to Promise.all().

In @Bergi’s answer, the console may log file contents out of order. For example if a really small file finishes reading before a really large file, it will be logged first, even if the small file comes after the large file in the files array. However, in my method above, you are guaranteed the console will log the files in the same order as they are read.

Using Task, futurize, and a traversable List, you can simply do

async function printFiles() {
  const files = await getFiles();

  List(files).traverse( Task.of, f => readFile( f, 'utf-8'))
    .fork( console.error, console.log)
}

Here is how you'd set this up

import fs from 'fs';
import { futurize } from 'futurize';
import Task from 'data.task';
import { List } from 'immutable-ext';

const future = futurizeP(Task)
const readFile = future(fs.readFile)

Another way to have structured the desired code would be

const printFiles = files => 
  List(files).traverse( Task.of, fn => readFile( fn, 'utf-8'))
    .fork( console.error, console.log)

Or perhaps even more functionally oriented

// 90% of encodings are utf-8, making that use case super easy is prudent

// handy-library.js
export const readFile = f =>
  future(fs.readFile)( f, 'utf-8' )

export const arrayToTaskList = list => taskFn => 
  List(files).traverse( Task.of, taskFn ) 

export const readFiles = files =>
  arrayToTaskList( files, readFile )

export const printFiles = files => 
  readFiles(files).fork( console.error, console.log)

Then from the parent function

async function main() {
  /* awesome code with side-effects before */
  printFiles( await getFiles() );
  /* awesome code with side-effects after */
}

If you really wanted more flexibility in encoding, you could just do this (for fun, I'm using the proposed Pipe Forward operator )

import { curry, flip } from 'ramda'

export const readFile = fs.readFile 
  |> future,
  |> curry,
  |> flip

export const readFileUtf8 = readFile('utf-8')

PS - I didn't try this code on the console, might have some typos... "straight freestyle, off the top of the dome!" as the 90s kids would say. :-p

Here are some forEachAsync prototypes. Note you'll need to await them:

Array.prototype.forEachAsync = async function (fn) {
    for (let t of this) { await fn(t) }
}

Array.prototype.forEachAsyncParallel = async function (fn) {
    await Promise.all(this.map(fn));
}

Note while you may include this in your own code, you should not include this in libraries you distribute to others (to avoid polluting their globals).

Instead of Promise.all in conjunction with Array.prototype.map (which does not guarantee the order in which the Promises are resolved), I use Array.prototype.reduce, starting with a resolved Promise:

async function printFiles () {
  const files = await getFilePaths();

  await files.reduce(async (promise, file) => {
    // This line will wait for the last async function to finish.
    // The first iteration uses an already resolved Promise
    // so, it will immediately continue.
    await promise;
    const contents = await fs.readFile(file, 'utf8');
    console.log(contents);
  }, Promise.resolve());
}

With ES2018, you are able to greatly simplify all of the above answers to:

async function printFiles () {
  const files = await getFilePaths()

  for await (const file of fs.readFile(file, 'utf8')) {
    console.log(contents)
  }
}

See spec: https://github.com/tc39/proposal-async-iteration


2018-09-10: This answer has been getting a lot attention recently, please see Axel Rauschmayer's blog post for further information about asynchronous iteration: http://2ality.com/2016/10/asynchronous-iteration.html

Similar to Antonio Val's p-iteration, an alternative npm module is async-af:

const AsyncAF = require('async-af');
const fs = require('fs-promise');

function printFiles() {
  // since AsyncAF accepts promises or non-promises, there's no need to await here
  const files = getFilePaths();

  AsyncAF(files).forEach(async file => {
    const contents = await fs.readFile(file, 'utf8');
    console.log(contents);
  });
}

printFiles();

Alternatively, async-af has a static method (log/logAF) that logs the results of promises:

const AsyncAF = require('async-af');
const fs = require('fs-promise');

function printFiles() {
  const files = getFilePaths();

  AsyncAF(files).forEach(file => {
    AsyncAF.log(fs.readFile(file, 'utf8'));
  });
}

printFiles();

However, the main advantage of the library is that you can chain asynchronous methods to do something like:

const aaf = require('async-af');
const fs = require('fs-promise');

const printFiles = () => aaf(getFilePaths())
  .map(file => fs.readFile(file, 'utf8'))
  .forEach(file => aaf.log(file));

printFiles();

async-af

Currently the Array.forEach prototype property doesn't support async operations, but we can create our own poly-fill to meet our needs.

// Example of asyncForEach Array poly-fill for NodeJs
// file: asyncForEach.js
// Define asynForEach function 
async function asyncForEach(iteratorFunction){
  let indexer = 0
  for(let data of this){
    await iteratorFunction(data, indexer)
    indexer++
  }
}
// Append it as an Array prototype property
Array.prototype.asyncForEach = asyncForEach
module.exports = {Array}

And that's it! You now have an async forEach method available on any arrays that are defined after these to operations.

Let's test it...

// Nodejs style
// file: someOtherFile.js

const readline = require('readline')
Array = require('./asyncForEach').Array
const log = console.log

// Create a stream interface
function createReader(options={prompt: '>'}){
  return readline.createInterface({
    input: process.stdin
    ,output: process.stdout
    ,prompt: options.prompt !== undefined ? options.prompt : '>'
  })
}
// Create a cli stream reader
async function getUserIn(question, options={prompt:'>'}){
  log(question)
  let reader = createReader(options)
  return new Promise((res)=>{
    reader.on('line', (answer)=>{
      process.stdout.cursorTo(0, 0)
      process.stdout.clearScreenDown()
      reader.close()
      res(answer)
    })
  })
}

let questions = [
  `What's your name`
  ,`What's your favorite programming language`
  ,`What's your favorite async function`
]
let responses = {}

async function getResponses(){
// Notice we have to prepend await before calling the async Array function
// in order for it to function as expected
  await questions.asyncForEach(async function(question, index){
    let answer = await getUserIn(question)
    responses[question] = answer
  })
}

async function main(){
  await getResponses()
  log(responses)
}
main()
// Should prompt user for an answer to each question and then 
// log each question and answer as an object to the terminal

We could do the same for some of the other array functions like map...

async function asyncMap(iteratorFunction){
  let newMap = []
  let indexer = 0
  for(let data of this){
    newMap[indexer] = await iteratorFunction(data, indexer, this)
    indexer++
  }
  return newMap
}

Array.prototype.asyncMap = asyncMap

... and so on :)

Some things to note:

  • Your iteratorFunction must be an async function or promise
  • Any arrays created before Array.prototype.<yourAsyncFunc> = <yourAsyncFunc> will not have this feature available

Bergi's solution works nicely when fs is promise based. You can use bluebird, fs-extra or fs-promise for this.

However, solution for node's native fs libary is as follows:

const result = await Promise.all(filePaths
    .map( async filePath => {
      const fileContents = await getAssetFromCache(filePath, async function() {

        // 1. Wrap with Promise    
        // 2. Return the result of the Promise
        return await new Promise((res, rej) => {
          fs.readFile(filePath, 'utf8', function(err, data) {
            if (data) {
              res(data);
            }
          });
        });
      });

      return fileContents;
    }));

Note: require('fs') compulsorily takes function as 3rd arguments, otherwise throws error:

TypeError [ERR_INVALID_CALLBACK]: Callback must be a function