JSDoc: Source: AppRecom.js

// CONSTANTS
const RULES_ENCODING = "utf-8";

/**
 * <p>AppRecom is a tool for getting app recommendations based on a user's location.</p>
 *
 * <p>
 * The two primary methods in this class are:
 * <ol>
 * <li>train()</li>
 * <li>getApps()</li>
 * </ol>
 *
 * For specific information about each method, check the method documentation.
 */
class AppRecom{

  /**
   * Instantiate an AppRecom object for training and fetching recommendations.
   * @param {Boolean} debug - option for console log debugging
   */
  constructor(debug = false){
    this.itemsets = [];
    this.rules = [];
    this.DEBUG = debug;
  }

  /**
   * <p>Train the recommender against a data set. Entries in the data array look like:</p>
   * <p>
   * {pname: "Place Name", pcat: "Place Category", aname: "App Name", acat: "App Category"}
   * </p>
   *
   * @param {Array<Object>} data - data to find association rules on.
   * @param {Decimal} min_support - the minimum support percentage for an itemset (0.0 - 1.0)
   * @param {Decimal} min_conf - the minimum confidence percentage for a rule (0.0 - 1.0)
   * @param {Number} test_ratio - ratio of training data to test data (0.0 - 1.0)
   * @returns rules
   */
  train(data, min_support = 0.02, min_conf = 0.8, test_ratio = 0.8){
    // TRAIN AND TEST
    this._testData(data, min_support, min_conf, test_ratio, 5);
    // DONE TESTING

    // Get final rules using all data
    const optimalItemset = this._getOptimalItemset(data, min_support);
    const rules = this._getRules(data, optimalItemset, min_conf); // get the rules
    this.rules = rules;
  }

  /**
   * <p>Retrieves app category recommendations that best fit this location as an array.</p>
   *
   * @param {String} locationCategory - the category of the location (e.g. 'cafe')
   */
  getApps(location){
    const appRecommendations = this.rules[location] ? this.rules[location] : [];
    return appRecommendations;
  }

  /**
   * Tests the learning by splitting into training and testing data and verifying results.
   * @private
   */
  _testData(data, min_support, min_conf, test_ratio, rounds){
    let averageError = 0;

    // Determines an integer # for training instances
    const numTraining = Math.round((data.length) * test_ratio);

    for (let count = 0; count < rounds; count++) {
      let shuffleData = data.slice();
      shuffleData = shuffle(shuffleData);

      // Training and Testing data
      let trainingSet = [];
      for (let i = 0; i <= numTraining; i++){
        const item = shuffleData.pop();
        trainingSet.push(item);
      }
      let testingItemset = shuffleData;
      // Training rules
      const trainingItemset = this._getOptimalItemset(trainingSet, min_support);
      const trainingRules = this._getRules(trainingSet, trainingItemset, min_conf);

      // Logs
      if(this.DEBUG) print(`==============\nUNKNOWN TEST\ndata length: ${data.length} - training length: ${trainingSet.length} - testing length: ${testingItemset.length}\n`);

      // Get the error rate for this data.
      const error = this._testTrainingSet(trainingRules, testingItemset);
      averageError += error;
      if(this.DEBUG) print(`Round ${count+1} unknown rate: ${error}`);
    }
    averageError = Math.round(averageError / rounds * 100) / 100;
    if(this.DEBUG) print(`\nAverage unknown rate: ${averageError}`);
  }

  /**
   * Gets the optimal itemsets with the specified minimum support.
   * @private
   * @param {Array<Object>} data - the data to find itemsets on
   * @param {Decimal} min_support - the minimum support percentage to include this item set
   * @returns {Set} optimal itemset
   */
  _getOptimalItemset(data, min_support){
    return this._itemsetPrune(this._countItemsets(data), data.length, min_support);
  }

  /**
   * Prunes the itemsets for those who match the min_support
   * @private
   * @param {Map<String, Number>} itemsets - item entry as JSON Array, item frequency
   * @param {Number} length - length of the original data
   * @param {Number} min_support - the minimum support accepted for an itemset
   * @returns {Map<String, Number>} keepers - a map of rules and support
   */
  _itemsetPrune(itemsetSupport, length, min_support){
    const keepers = new Map(itemsetSupport);
    for ([instance, support] of itemsetSupport){
      if ((support / length) < min_support) keepers.delete(instance); // keep this value because it satisfies the min support.
    }
    if (this.DEBUG) print(`==============\nITEMSET DEBUG\n${jstr([...itemsetSupport]).replace(/],\[\"\[/g, "],\n\[\"\[")}\n==============`); // debug log
    return keepers; // return the keepers with their supports
  }

  /**
   * Counts the number of itemsets in the data.
   * @private
   * @param {Array<Object>} data - the data to count the itemsets on
   * @returns {Map<String, Number>} itemsetSupport - the itemset numbers
   */
  _countItemsets(data){
    const itemsetSupport = new Map();
    data.forEach((instance)=>{
      const mapKey = jstr([instance.pcat, instance.acat]);
      if (!itemsetSupport.has(mapKey)) itemsetSupport.set(mapKey, 0); // fill in the map value
      itemsetSupport.set(mapKey, itemsetSupport.get(mapKey) + 1); // increment this instance by one in the map
    });
    return itemsetSupport;
  }

  /**
   * Determines the quality of the classifier by testing the trainingSet for the error rate.
   * @private
   * @returns ratio of incorrectly classified instances
   */
  _testTrainingSet(rules, testingSet){
    let total = 0;
    let numIncorrect = 0;
    for (const instance of testingSet) {
      if (rules[instance.pcat]) { // if we have a rule for it, lets count it
        total++;
        let correct = false;
        if (this.DEBUG) print(`${total}. place cat: ${instance.pcat}`);
        correct = rules[instance.pcat].indexOf(instance.acat) != -1; // check the equality of the app part of the itemset
        if (this.DEBUG) print(`rule apps: ${rules[instance.pcat]}\ninstance app: ${instance.acat}`);
        if (!correct) numIncorrect++;
        if (this.DEBUG) print(`${correct ? "FOUND" : "UNKNOWN"}\n`);
      }
    }
    if (this.DEBUG) print(`\nTotal: ${total}\nUnknown: ${numIncorrect}`);
    const errorrate = numIncorrect / total;
    return errorrate != 0 ? Math.round(errorrate * 100) / 100 : 0;
  }

  /**
   * Gets the rules from the itemsets according to the minimum confidence.
   * @private
   * @param {Array<Object>} ogData - the original data to count value frequencies on
   * @param {Map<String, Number>} itemsets - the itemsets to fetch rules from.
   * @param {Decimal} min_conf - the minimum confidence for a rule to be accepted
   * @returns {Object} rules
   */
  _getRules(ogData, itemsets, min_conf){
    const rules = {};
    [...itemsets.keys()].forEach((itemset)=>{
      const arr = parse(itemset);
      let hyp = arr[0];
      let con = arr[1];
      const hypFreq = this._valueFreq(ogData, hyp);
      const conFreq = this._valueFreq(ogData, con);
      const count = itemsets.get(itemset);
      // print(`hypFreq: ${hypFreq} conFreq: ${conFreq}`);
      if ((conFreq / hypFreq) >= min_conf) this._addRules(rules, hyp, con, count);
    });

    if (this.DEBUG) print("==============\nRULES DEBUG\n");
    for (let key of Object.keys(rules)){
      rules[key].sort((a, b)=> b.count - a.count); // sort the rules on count
      if (this.DEBUG) print(`${key}:${jstr(rules[key])
          .replace(/{/g, "{\n   ")
          .replace(/}/g, "\n   }")
          .replace(/],/g, "],\n")}\n`);
      for (const app in rules[key]){
        rules[key][app] = rules[key][app].app; // strip object and count value
      }
    }
    if (this.DEBUG) print("\n==============");
    return rules;
  }

  /**
   * Returns the frequency of a certain value in the original data
   * @private
   */
  _valueFreq(ogData, value){
    let count = 0;
    ogData.forEach((instance)=>{
      if (instance.pcat == value || instance.acat == value) count++;
    });
    return count;
  }

  /**
   * Adds the rules to the rules array by their hypothesis conclusion format.
   * @private
   */
  _addRules(rules, hyp, con, count){
    const value = {app: con, count: count};
    if (rules[hyp]) rules[hyp].push(value); // if a value already exists, append to the rules array
    else rules[hyp] = [value]; // otherwise set
    return rules;
  }
}

// HELPER FUNCTIONS
function print(str){
  console.log(str);
}

function jstr(obj){
  return JSON.stringify(obj);
}

function parse(string){
  return JSON.parse(string);
}

function shuffle(array) {
  /*
    Stole from Christoph on Stack Overflow:
    https://stackoverflow.com/a/962890
   */
  var tmp, current, top = array.length;

  if(top) while(--top) {
      current = Math.floor(Math.random() * (top + 1));
      tmp = array[current];
      array[current] = array[top];
      array[top] = tmp;
  }

  return array;
}

// Module export
export default AppRecom;