File

src/agent/io/scanner.ts

Index

Properties

Properties

hash
hash: string
Type : string
Optional
lines
lines: number
Type : number
import * as crypto from 'crypto';
import * as events from 'events';
import * as fs from 'fs';
import * as path from 'path';

// TODO: Make this more precise.
// eslint-disable-next-line @typescript-eslint/no-var-requires
const findit: (dir: string) => events.EventEmitter = require('findit2');

// TODO: Make this more precise.
// eslint-disable-next-line @typescript-eslint/no-var-requires
const split: () => fs.WriteStream = require('split');

export interface FileStats {
  // TODO: Verify that this member should actually be optional.
  hash?: string;
  lines: number;
}

// TODO: Update the code so that `undefined  is not a possible property value
export interface ScanStats {
  [filename: string]: FileStats | undefined;
}

export interface ScanResults {
  errors(): Map<string, Error>;
  all(): ScanStats;
  selectStats(regex: RegExp): ScanStats;
  selectFiles(regex: RegExp, baseDir: string): string[];
  hash: string;
}

class ScanResultsImpl implements ScanResults {
  /**
   * Encapsulates the results of a filesystem scan with methods
   * to easily select scan information or filenames for a
   * specific subset of the files listed in the scan results.
   *
   * @param stats An object that contains filenames
   *  as keys where each key maps to an object containing the
   *  hash and number of lines for the specified file.  This
   *  information is accessed via the `hash` and `lines`
   *  attributes respectively
   * @param hash A hashcode computed from the contents of all the files.
   */
  constructor(
    private readonly stats: ScanStats,
    readonly errorMap: Map<string, Error>,
    readonly hash: string
  ) {}

  errors(): Map<string, Error> {
    return this.errorMap;
  }

  /**
   * Used to get all of the file scan results.
   */
  all(): ScanStats {
    return this.stats;
  }

  /**
   * Used to get the file scan results for only the files
   * whose filenames match the specified regex.
   *
   * @param {regex} regex The regex that tests a filename
   *  to determine if the scan results for that filename
   *  should be included in the returned results.
   */
  selectStats(regex: RegExp): ScanStats | {} {
    const obj = {} as {[index: string]: {} | undefined};
    Object.keys(this.stats).forEach(key => {
      if (regex.test(key)) {
        obj[key] = this.stats[key];
      }
    });
    return obj;
  }

  /**
   * Used to get the only the file paths in the scan results
   * where the filenames match the specified regex and are
   * returned with the each relative to the specified base
   * directory.
   *
   * @param {regex} regex The regex that tests a filename to
   *  determine if the scan results for that filename should
   *  be included in the returned results.
   * @param {string} baseDir The absolute path to the directory
   *  from which all of the returned paths should be relative
   *  to.
   */
  selectFiles(regex: RegExp, baseDir: string): string[] {
    // ensure the base directory has only a single trailing path separator
    baseDir = path.normalize(baseDir + path.sep);
    return Object.keys(this.stats)
      .filter(file => {
        return file && regex.test(file);
      })
      .map(file => {
        return path.normalize(file).replace(baseDir, '');
      });
  }
}

export async function scan(
  baseDir: string,
  regex: RegExp,
  precomputedHash?: string
): Promise<ScanResults> {
  const fileList = await findFiles(baseDir, regex);
  return computeStats(fileList, precomputedHash);
}

/**
 * This function accept an array of filenames and computes a unique hash-code
 * based on the contents.
 *
 * @param fileList array of filenames
 * @param precomputedHash if available, hashing operations will be omitted
 * during scan
 */
// TODO: Typescript: Fix the docs associated with this function to match the
// call signature
function computeStats(
  fileList: string[],
  precomputedHash?: string
): Promise<ScanResults> {
  // eslint-disable-next-line no-async-promise-executor
  return new Promise<ScanResults>(async resolve => {
    // return a valid, if fake, result when there are no js files to hash.
    if (fileList.length === 0) {
      resolve(new ScanResultsImpl({}, new Map(), 'EMPTY-no-js-files'));
      return;
    }

    // TODO: Address the case where the array contains `undefined`.
    const hashes: Array<string | undefined> = [];
    const statistics: ScanStats = {};
    const errors: Map<string, Error> = new Map<string, Error>();

    for (const filename of fileList) {
      try {
        const fileStats = await statsForFile(filename, !precomputedHash);
        if (!precomputedHash) {
          hashes.push(fileStats.hash);
        }
        statistics[filename] = fileStats;
      } catch (err) {
        errors.set(filename, err);
      }
    }

    let hash: string;
    if (!precomputedHash) {
      // Sort the hashes to get a deterministic order as the files may
      // not be in the same order each time we scan the disk.
      const buffer = hashes.sort().join();
      const sha1 = crypto.createHash('sha1').update(buffer).digest('hex');
      hash = 'SHA1-' + sha1;
    } else {
      hash = precomputedHash!;
    }
    resolve(new ScanResultsImpl(statistics, errors, hash));
  });
}

/**
 * Given a base-directory, this function scans the subtree and finds all the js
 * files. .git and node_module subdirectories are ignored.
 * @param {!string} baseDir top-level directory to scan
 * @param {!regex} regex the regular expression that specifies the types of
 *  files to find based on their filename
 * @param {!function(?Error, Array<string>)} callback error-back callback
 */
function findFiles(baseDir: string, regex: RegExp): Promise<string[]> {
  return new Promise<string[]>((resolve, reject) => {
    let error: Error | undefined;

    if (!baseDir) {
      reject(new Error('hasher.findJSFiles requires a baseDir argument'));
      return;
    }

    const find = findit(baseDir);
    const fileList: string[] = [];

    find.on('error', (err: Error) => {
      error = err;
      return;
    });

    find.on('directory', (dir: string, ignore: fs.Stats, stop: () => void) => {
      const base = path.basename(dir);
      if (base === '.git' || base === 'node_modules') {
        stop(); // do not descend
      }
    });

    find.on('file', (file: string) => {
      if (regex.test(file)) {
        fileList.push(file);
      }
    });

    find.on('end', () => {
      // Note: the `end` event fires even after an error
      if (error) {
        reject(error);
      } else {
        resolve(fileList);
      }
    });
  });
}

/**
 * Compute a sha hash for the given file and record line counts along the way.
 * @param {string} filename
 * @param {Boolean} shouldHash whether a hash should be computed
 * @param {function} cb errorback style callback which returns the sha string
 * @private
 */
function statsForFile(
  filename: string,
  shouldHash: boolean
): Promise<FileStats> {
  return new Promise<FileStats>((resolve, reject) => {
    const reader = fs.createReadStream(filename);
    reader.on('error', err => {
      reject(err);
    });
    reader.on('open', () => {
      let shasum: crypto.Hash;
      if (shouldHash) {
        shasum = crypto.createHash('sha1');
      }

      let lines = 0;
      let error: Error | undefined;
      const byLine = reader!.pipe(split());
      byLine.on('error', (e: Error) => {
        error = e;
      });
      byLine.on('data', (d: string) => {
        if (shouldHash) {
          shasum.update(d);
        }
        lines++;
      });
      byLine.on('end', () => {
        if (error) {
          reject(error);
        } else {
          const hash = shouldHash ? shasum.digest('hex') : undefined;
          resolve({hash, lines});
        }
      });
    });
  });
}

result-matching ""

    No results matching ""