loadsh xor 实现分析

论如何高效的计算数组/集合的对称差

header

最近碰到这么一个业务需求:两个 ip 列表,需要从中找出交集以外的项,也就是找对称差。举例来说:

有两个数组:

const arr1 = [1, 2, 3, 4, 5];
const arr2 = [1, 3, 5, 7, 9];

首先在 arr1 中找 arr1 有而 arr2 没有的元素,即 [2,4]
紧接着在 arr2 中找 arr2 有而 arr1 没有的元素,即 [7,9]

合并起来就是结果就是[2,4,7,9]

根据题意很快啊,我们就能写出以下代码:

const arr1 = [1, 2, 3, 4, 5];
const arr2 = [1, 3, 5, 7, 9];

const diffArr1InArr2 = arr1.filter((item) => !arr2.includes(item));
const diffArr2InArr1 = arr2.filter((item) => !arr1.includes(item));
const result = diffArr1InArr2.concat(diffArr2InArr1);

时间复杂度为 O(n^2),当数组很大时,这就不行了,需要改进一下,很快啊,优化版就出来了

const arr1 = [1, 2, 3, 4, 5];
const arr2 = [1, 3, 5, 7, 9];

const arr1Map = new Map();
let arr1Index = arr1.length;
while (arr1Index--) {
  arr1Map.set(arr1[arr1Index], "undefined");
}

const arr2Map = new Map();
let arr2Index = arr2.length;
while (arr2Index--) {
  arr2Map.set(arr2[arr2Index], "undefined");
}

const diffArr1InArr2 = arr1.filter((item) => !arr2Map.has(item));
const diffArr2InArr1 = arr2.filter((item) => !arr1Map.has(item));
const result = diffArr1InArr2.concat(diffArr2InArr1);

时间复杂度为 O(n),这样速度就基本无敌了,这也是 lodash xor 采用的方法。

最后,针对以下几点升级一下就是 loadsh 的原函数了(部分使用了数组的语法糖,loadsh 为了极致的性能全部用了 while 循环实现):

  • 支持输入多个数组
  • 已计算过的数组,下一次参数计算时要用上一次计算的结果即 result[index] || array
  • 数量较少时还是使用原始版的计算方法,避免 map 创建和销毁的开销
function xorBy(arrays, iteratee) {
  var length = arrays.length;
  var index = -1,
    result = Array(length);
  while (++index < length) {
    var array = arrays[index],
      othIndex = -1;

    while (++othIndex < length) {
      if (othIndex != index) {
        result[index] = baseDifference(
          result[index] || array,
          arrays[othIndex],
          iteratee
        );
      }
    }
  }
  return baseFlatten(result);
}

function baseFlatten(array) {
  var index = -1,
    length = array.length;

  var result = [];

  while (++index < length) {
    result = result.concat(array[index]);
  }
  return result;
}

function baseDifference(array, values, iteratee) {
  var index = -1,
    length = array.length,
    result = [],
    valuesLength = values.length,
    isCommon = true;

  if (!length) {
    return result;
  }

  if (iteratee) {
    values = values.map((value) => value[iteratee]);
  }

  if (valuesLength >= 200) {
    isCommon = false;
    var valuesMap = new Map();
    var valuesIndex = valuesLength;
    while (valuesIndex--) {
      valuesMap.set(values[valuesIndex], "undefined");
    }
    values = valuesMap;
  }

  outer: while (++index < length) {
    var value = array[index],
      computed = iteratee == null ? value : value[iteratee];

    value = value !== 0 ? value : 0;
    if (isCommon && computed === computed) {
      var valuesIndex = valuesLength;
      while (valuesIndex--) {
        if (values[valuesIndex] === computed) {
          continue outer;
        }
      }
      result.push(value);
    } else if (!values.has(computed)) {
      result.push(value);
    }
  }
  return result;
}