前言
前端时间在做项目加载优化时用到了splitChunks自动拆包,后了解了一下原理写下了此文。
Modules和Chunks
Modules简单来理解就是我们写的功能模块,不管是CommonJS还是ESM都算是一个Module,而Chunks则是webpack根据我们的规则/默认规则打包处理之后生成的产物,比如下图:
SplitChunks如何使用
关于拆包的具体使用,这里贴一个官网的例子简单说说,关于其他配置可参考webpack官网
splitChunks: {
chunks: "async", //chunks有三个值,initial表示直接引入的模块,async表示按需引入的模块,all则表示all in
minSize: 30000, //最小包体积,这里的单位是byte,超过这个大小的包会被splitChunks优化
minChunks: 1, //模块的最小引用次数,如果引用次数低于这个值,将不会被优化
maxAsyncRequests: 5, //设置async chunks的最大并行请求数
maxInitialRequests: 3, //设置initial chunks的最大并行请求数
automaticNameDelimiter: '~', //产出chunks的文件名分割符
name: true, //true:根据提取chunk的名字自动生成,false:根据缓存组IdHint生成,string:生成文件命即为这个string
cacheGroups: { //缓存组,自定义拆包规则在此定义
vendors: { //默认配置,node_modules的chunk
test: /[\/]node_modules[\/]/,
priority: -10
},
default: { //业务代码的chunk
minChunks: 2,
priority: -20,
reuseExistingChunk: true //复用已存在的chunks
}
}
}
而这一套默认的配置,则是在webpack里面默认定义的,我们可以在WebpackOptionsDefaulter.js里面找到它(当然还有别的配置项):
this.set("optimization.splitChunks", {});this.set("optimization.splitChunks.hidePathInfo", "make", options => {return isProductionLikeMode(options);});this.set("optimization.splitChunks.chunks", "async");this.set("optimization.splitChunks.minSize", "make", options => {return isProductionLikeMode(options) ? 30000 : 10000;//默认配置的minsize根据环境配置是不同的,生产环境的最小体积为30000byte,这里没有找到这样设置的理由,估计是因为开发环境的打包速度更为重要吧});this.set("optimization.splitChunks.minChunks", 1);this.set("optimization.splitChunks.maxAsyncRequests", "make", options => {return isProductionLikeMode(options) ? 5 : Infinity;});this.set("optimization.splitChunks.automaticNameDelimiter", "~");this.set("optimization.splitChunks.automaticNameMaxLength", 109);this.set("optimization.splitChunks.maxInitialRequests", "make", options => {return isProductionLikeMode(options) ? 3 : Infinity;});this.set("optimization.splitChunks.name", true);this.set("optimization.splitChunks.cacheGroups", {});this.set("optimization.splitChunks.cacheGroups.default", {//这里也就是我们看到的默认将业务代码和mode_modules单独拆分的初始化代码automaticNamePrefix: "",reuseExistingChunk: true,minChunks: 2,priority: -20});this.set("optimization.splitChunks.cacheGroups.vendors", {automaticNamePrefix: "vendors",test: /[\/]node_modules[\/]/,priority: -10});
接下来举个实际使用例子吧:
splitChunks: {
chunks: 'all',
automaticNameDelimiter: '.',
name: true,
minSize: 30000,
maxSize: 0,
minChunks: 1,
maxAsyncRequests: 10,
maxInitialRequests: 6,
cacheGroups: {
antdesigns: {
name: 'antdesigns',
test: /[\/]node_modules[\/](@antd|antd|@ant-design)[\/]/,
priority: 10
},
reactfileviewer: {
name: 'reactfileviewer',
test: /[\/]node_modules[\/](react-file-viewer)[\/]/,
priority: 10
},
pdfmake: {
name: 'pdfmake',
test: /[\/]node_modules[\/](pdfmake)[\/]/,
priority: 10
},
bizcharts: {
name: 'bizcharts',
test: /[\/]node_modules[\/](bizcharts)[\/]/,
priority: 10
},
commons: {
name: 'commons',
test: /[\/]src[\/]/,
priority: 1
},
vendors: {
name: 'vendors',
test: /[\/]node_modules[\/]/,
priority: -1
},
}
这里值得注意的是,在我们的配置里面,任何一项不满足都不会进入该项的拆包逻辑
SplitChunks原理
接下来我们结合源码看看SplitChunks的运行原理,SplitChunks位置在webpack/lib/optimize/SplitChunksPlugin.js
进入源码我们首先看到SplitChunks的class定义:
module.exports = class SplitChunksPlugin {constructor(options = {}) {const defaultSizeTypes = options.defaultSizeTypes || ["javascript","unknown"];const fallbackCacheGroup = options.fallbackCacheGroup || {};const minSize = normalizeSizes(options.minSize, defaultSizeTypes);const minSizeReduction = normalizeSizes(options.minSizeReduction,defaultSizeTypes);const maxSize = normalizeSizes(options.maxSize, defaultSizeTypes);
/** @type {SplitChunksOptions} *///options的初始化逻辑,概括为我们的配置优先,如果没有则采用默认规则里的相关项this.options = {chunksFilter: normalizeChunksFilter(options.chunks || "all"),defaultSizeTypes,minSize,minSizeReduction,minRemainingSize: mergeSizes(normalizeSizes(options.minRemainingSize, defaultSizeTypes),minSize),enforceSizeThreshold: normalizeSizes(options.enforceSizeThreshold,defaultSizeTypes),maxAsyncSize: mergeSizes(normalizeSizes(options.maxAsyncSize, defaultSizeTypes),maxSize),maxInitialSize: mergeSizes(normalizeSizes(options.maxInitialSize, defaultSizeTypes),maxSize),minChunks: options.minChunks || 1,maxAsyncRequests: options.maxAsyncRequests || 1,maxInitialRequests: options.maxInitialRequests || 1,hidePathInfo: options.hidePathInfo || false,filename: options.filename || undefined,getCacheGroups: normalizeCacheGroups(options.cacheGroups,defaultSizeTypes),getName: options.name ? normalizeName(options.name) : defaultGetName,//这里就是我们提到的如果name为true则使用chunks配置的nameautomaticNameDelimiter: options.automaticNameDelimiter,usedExports: options.usedExports,fallbackCacheGroup: {chunksFilter: normalizeChunksFilter(fallbackCacheGroup.chunks || options.chunks || "all"),minSize: mergeSizes(normalizeSizes(fallbackCacheGroup.minSize, defaultSizeTypes),minSize),maxAsyncSize: mergeSizes(normalizeSizes(fallbackCacheGroup.maxAsyncSize, defaultSizeTypes),normalizeSizes(fallbackCacheGroup.maxSize, defaultSizeTypes),normalizeSizes(options.maxAsyncSize, defaultSizeTypes),normalizeSizes(options.maxSize, defaultSizeTypes)),maxInitialSize: mergeSizes(normalizeSizes(fallbackCacheGroup.maxInitialSize, defaultSizeTypes),normalizeSizes(fallbackCacheGroup.maxSize, defaultSizeTypes),normalizeSizes(options.maxInitialSize, defaultSizeTypes),normalizeSizes(options.maxSize, defaultSizeTypes)),automaticNameDelimiter:fallbackCacheGroup.automaticNameDelimiter ||options.automaticNameDelimiter ||"~"}};
/** @type {WeakMap<CacheGroupSource, CacheGroup>} */this._cacheGroupCache = new WeakMap();//cacheGroup的数据结构是WeakMap,这样做的目的是缓存每一个CacheGroup后续使用}
准备阶段
先说结论:准备阶段最重要的是chunksInfoMap(存储代码分割信息,可以理解为处理我们配置的一些规则,比如chunk的size/count,这是为后续处理minSize/minChunks做准备的数据,chunksMap会一直更新,最终加入results返回,而results就是我们拆包之后能看到的chunks)和addModuleToChunksInfoMap(将代码分割信息加入chunksInfoMap),当然,在这两个方法中还调用了一些关键的方法,比如处理chunksSet、处理chunksKey等
接下来就到了SplitChunks最核心的一个方法了
/**
* Apply the plugin
* @param {Compiler} compiler the compiler instance
* @returns {void}
*/
apply(compiler) {//compiler是Compiler的一个实例,里面包含了webpack的所有配置,loaders、plugins等,在webpack启动的时候创建这个对象,全局唯一const cachedMakePathsRelative = makePathsRelative.bindContextCache(compiler.context,compiler.root);
...
}
接下来我们将这个方法拆开来看
chunksKey设置
在这里主要对chunks在映射中的key进行处理,用于后续优化
compiler.hooks.thisCompilation.tap("SplitChunksPlugin", compilation => {const logger = compilation.getLogger("webpack.SplitChunksPlugin");let alreadyOptimized = false;//这个标志表示收到新的模块compilation.hooks.unseal.tap("SplitChunksPlugin", () => {alreadyOptimized = false;});
compilation.hooks.optimizeChunks.tap({name: "SplitChunksPlugin",stage: STAGE_ADVANCED},chunks => {//所有的拆包过程都在这个回调中执行if (alreadyOptimized) return;alreadyOptimized = true;logger.time("prepare");const chunkGraph = compilation.chunkGraph;const moduleGraph = compilation.moduleGraph;// Give each selected chunk an index (to create strings from chunks)/** @type {Map<Chunk, bigint>} */const chunkIndexMap = new Map();//创建一个map存chunks的indexconst ZERO = BigInt("0");const ONE = BigInt("1");const START = ONE << BigInt("31");let index = START;for (const chunk of chunks) {//给每一个chunk一个初始indexchunkIndexMap.set(chunk,index | BigInt((Math.random() * 0x7fffffff) | 0));index = index << ONE;}
//关于这个index,4.x版本的方式是下面这样,没有看出现在这样方式的奥义,个人理解只是为了生成一个随机index
/*
* for (const chunk of chunks) {
*indexMap.set(chunk, index++); * }
*/
/** * @param {Iterable<Chunk>} chunks list of chunks * @returns {bigint | Chunk} key of the chunks */const getKey = chunks => {
//获取chunk的keyconst iterator = chunks[Symbol.iterator]();let result = iterator.next();if (result.done) return ZERO;const first = result.value;result = iterator.next();if (result.done) return first;let key =chunkIndexMap.get(first) | chunkIndexMap.get(result.value);while (!(result = iterator.next()).done) {const raw = chunkIndexMap.get(result.value);key = key ^ raw;}return key;};const keyToString = key => {if (typeof key === "bigint") return key.toString(16);return chunkIndexMap.get(key).toString(16);};const getChunkSetsInGraph = memoize(() => {/** @type {Map<bigint, Set<Chunk>>} */
// 这里将该key值和这些chunks建立映射关系,存在chunkSetsInGraph中,便于之后通过key值取出这些chunks集合,进行优化。 const chunkSetsInGraph = new Map();/** @type {Set<Chunk>} */const singleChunkSets = new Set();
//这里的重点是提取公共的module,这样就建立起了公共mudule和chunks(module.chunksIterable)的映射,chunks集合共用一个key
* //这样就知道每个module在哪些chunk中重复了for (const module of compilation.modules) {const chunks = chunkGraph.getModuleChunksIterable(module);const chunksKey = getKey(chunks);if (typeof chunksKey === "bigint") {if (!chunkSetsInGraph.has(chunksKey)) {chunkSetsInGraph.set(chunksKey, new Set(chunks));}} else {
//不存在chunkSetsInGraph中的chunk会走singleChunks处理singleChunkSets.add(chunksKey);}}return { chunkSetsInGraph, singleChunkSets };});
处理引用次数
这里需要注意的是,module和chunksSet是一对一的关系,而这里的count和chunksSet是一对多的关系,这里的处理是为了后续对minChunks过滤做准备
const groupChunkSetsByCount = chunkSets => {/** @type {Map<number, Array<Set<Chunk>>>} */const chunkSetsByCount = new Map();for (const chunksSet of chunkSets) {
//这里的count其实就是之前知道的module在chunks中的重复次数const count = chunksSet.size;let array = chunkSetsByCount.get(count);
//在chunkSetsByCount中的次数,如果没有则放空数组if (array === undefined) {array = [];chunkSetsByCount.set(count, array);}array.push(chunksSet);}return chunkSetsByCount;};
下面这段代码就是具体处理合并同key的chunk并处理子集的逻辑:
// Create a list of possible combinationsconst createGetCombinations = (chunkSets,singleChunkSets,chunkSetsByCount) => {/** @type {Map<bigint | Chunk, (Set<Chunk> | Chunk)[]>} */const combinationsCache = new Map();
return key => {const cacheEntry = combinationsCache.get(key);if (cacheEntry !== undefined) return cacheEntry;if (key instanceof Chunk) {const result = [key];
//在这里进行了相同key的chunk合并combinationsCache.set(key, result);return result;}const chunksSet = chunkSets.get(key);/** @type {(Set<Chunk> | Chunk)[]} */const array = [chunksSet];for (const [count, setArray] of chunkSetsByCount) {//当chunk集合小于传入key对应的chunk集合时,进入是否子集的判断。如果是子集则和通过key拿到的集合存在一个数组中if (count < chunksSet.size) {for (const set of setArray) {if (isSubset(chunksSet, set)) {array.push(set);}}}}for (const chunk of singleChunkSets) {if (chunksSet.has(chunk)) {array.push(chunk);}}combinationsCache.set(key, array);return array;};};
拆分缓存组
接下来就是最关键的准备工作:将每一项chunk分成独立的缓存组
/** * @param {CacheGroup} cacheGroup the current cache group * @param {number} cacheGroupIndex the index of the cache group of ordering * @param {Chunk[]} selectedChunks chunks selected for this module * @param {bigint | Chunk} selectedChunksKey a key of selectedChunks * @param {Module} module the current module * @returns {void} */const addModuleToChunksInfoMap = ( cacheGroup,cacheGroupIndex,selectedChunks,selectedChunksKey,module ) => {//小于minChunks直接返回if (selectedChunks.length < cacheGroup.minChunks) return;const name = cacheGroup.getName(module,selectedChunks,cacheGroup.key);// Check if the name is okconst existingChunk = compilation.namedChunks.get(name);if (existingChunk) {const parentValidationKey = `${name}|${typeof selectedChunksKey === "bigint"? selectedChunksKey: selectedChunksKey.debugId}`;const valid = alreadyValidatedParents.get(parentValidationKey);if (valid === false) return;if (valid === undefined) {// Module can only be moved into the existing chunk if the existing chunk// is a parent of all selected chunkslet isInAllParents = true;/** @type {Set<ChunkGroup>} */
//将每一个chunks分组写进一个queueconst queue = new Set();for (const chunk of selectedChunks) {for (const group of chunk.groupsIterable) {queue.add(group);}}for (const group of queue) {
//这一次循环是处理包含关系,如果当前group有parent 将它也拉进来if (existingChunk.isInGroup(group)) continue;let hasParent = false;for (const parent of group.parentsIterable) {hasParent = true;queue.add(parent);}if (!hasParent) {isInAllParents = false;}}const valid = isInAllParents;alreadyValidatedParents.set(parentValidationKey, valid);if (!valid) {if (!alreadyReportedErrors.has(name)) {alreadyReportedErrors.add(name);compilation.errors.push(new WebpackError("SplitChunksPlugin\n" +`Cache group "${cacheGroup.key}" conflicts with existing chunk.\n` +`Both have the same name "${name}" and existing chunk is not a parent of the selected modules.\n` +"Use a different name for the cache group or make sure that the existing chunk is a parent (e. g. via dependOn).\n" +'HINT: You can omit "name" to automatically create a name.\n' +"BREAKING CHANGE: webpack < 5 used to allow to use an entrypoint as splitChunk. " +"This is no longer allowed when the entrypoint is not a parent of the selected modules.\n" +"Remove this entrypoint and add modules to cache group's 'test' instead. " +"If you need modules to be evaluated on startup, add them to the existing entrypoints (make them arrays). " +"See migration guide of more info."));}return;}}}//当缓存组有name配置的时候采用name,如果没有配置则会通过key自动生成const key =cacheGroup.key +(name? ` name:${name}`: ` chunks:${keyToString(selectedChunksKey)}`);// 将module加入chunksInfoMaplet info = chunksInfoMap.get(key);if (info === undefined) {chunksInfoMap.set(key,(info = {modules: new SortableSet(undefined,compareModulesByIdentifier),cacheGroup,cacheGroupIndex,name,sizes: {},chunks: new Set(),reuseableChunks: new Set(),chunksKeys: new Set()}));}
//这里通过modules的大小和添加module之后的大小比较,决定要不要更新info的sizeconst oldSize = info.modules.size;info.modules.add(module);if (info.modules.size !== oldSize) {for (const type of module.getSourceTypes()) {info.sizes[type] = (info.sizes[type] || 0) + module.size(type);}}
//也是通过新老大小决定是否将选中chunks加入infoconst oldChunksKeysSize = info.chunksKeys.size;info.chunksKeys.add(selectedChunksKey);if (oldChunksKeysSize !== info.chunksKeys.size) {for (const chunk of selectedChunks) {info.chunks.add(chunk);}}};
分组阶段
这一阶段的工作是处理我们配置的cacheGroups,分组阶段的重点是cacheGroup.chunksFilter过滤缓存组和chunksInfoMap的更新,核心代码如下
for (const module of compilation.modules) {// 首先获取缓存组,如果不是数组或者为空则跳过
// 这里在options里面会处理我们的cacheGroups,如果传入函数,则会直接执行返回,如果是对象则会处理成一个results数组返回let cacheGroups = this.options.getCacheGroups(module, context);if (!Array.isArray(cacheGroups) || cacheGroups.length === 0) {continue;}
// 通过key获取包含同一module的chunks,前面讲过统一module的chunks会有一个共用的key存在combinationsCache中const getCombs = memoize(() => {const chunks = chunkGraph.getModuleChunksIterable(module);const chunksKey = getKey(chunks);return getCombinations(chunksKey);});
// 初始化缓存组index开始遍历let cacheGroupIndex = 0;for (const cacheGroupSource of cacheGroups) {const cacheGroup = this._getCacheGroup(cacheGroupSource);
const combs = cacheGroup.usedExports? getCombsByUsedExports(): getCombs();// For all combination of chunk selectionfor (const chunkCombination of combs) {// Break if minimum number of chunks is not reachedconst count =chunkCombination instanceof Chunk ? 1 : chunkCombination.size;if (count < cacheGroup.minChunks) continue;// Select chunks by configurationconst { chunks: selectedChunks, key: selectedChunksKey } =
//这里需要注意cacheGroup.chunksFilter,这是chunks属性的过滤条件,定义在下面getSelectedChunks(chunkCombination, cacheGroup.chunksFilter);//最终依然通过addModuleToChunksInfoMap处理addModuleToChunksInfoMap(cacheGroup,cacheGroupIndex,selectedChunks,selectedChunksKey,module);}cacheGroupIndex++;}}
//对应上面的cacheGroup.chunksFilter,会将chunks的初始条件过滤
const normalizeChunksFilter = chunks => {if (chunks === "initial") {return INITIAL_CHUNK_FILTER;}if (chunks === "async") {return ASYNC_CHUNK_FILTER;}if (chunks === "all") {return ALL_CHUNK_FILTER;}if (typeof chunks === "function") {return chunks;}
};
分组阶段总结下来就是:这个阶段会整理出cacheGroups,并基于chunks类型做一个初步处理,后续类似maxInitialRequests的条件会在下一个阶段(检查阶段)进行处理
检查阶段
在最终的检查阶段,主要工作是根据配置的对应规则进行处理,处理逻辑是一个很大的while,这里列举几个,感兴趣的同学可以到源码中看全部的检查规则
while (chunksInfoMap.size > 0) {//整个阶段的大循环//寻找最匹配的cacheGroup,优先分割,产出打包结果let bestEntryKey;let bestEntry;for (const pair of chunksInfoMap) {const key = pair[0];const info = pair[1];if (bestEntry === undefined ||
// 比较谁更需要有限分割compareEntries(bestEntry, info) < 0) {bestEntry = info;bestEntryKey = key;}}
const item = bestEntry;chunksInfoMap.delete(bestEntryKey);
//通过缓存组生成新的chunk
let chunkName = item.name;/** @type {Chunk} */let newChunk;// 如果chunk没有name,会判断是否可以复用一个chunklet isExistingChunk = false;let isReusedWithAllModules = false;if (chunkName) {const chunkByName = compilation.namedChunks.get(chunkName);if (chunkByName !== undefined) {newChunk = chunkByName;const oldSize = item.chunks.size;item.chunks.delete(newChunk);isExistingChunk = item.chunks.size !== oldSize;}} else if (item.cacheGroup.reuseExistingChunk) {outer: for (const chunk of item.chunks) {if (chunkGraph.getNumberOfChunkModules(chunk) !==item.modules.size) {continue;}if (item.chunks.size > 1 &&chunkGraph.getNumberOfEntryModules(chunk) > 0) {continue;}for (const module of item.modules) {if (!chunkGraph.isModuleInChunk(module, chunk)) {continue outer;}}if (!newChunk || !newChunk.name) {newChunk = chunk;} else if (chunk.name &&chunk.name.length < newChunk.name.length) {newChunk = chunk;} else if (chunk.name &&chunk.name.length === newChunk.name.length &&chunk.name < newChunk.name) {newChunk = chunk;}}if (newChunk) {item.chunks.delete(newChunk);chunkName = undefined;isExistingChunk = true;isReusedWithAllModules = true;}}
//如果剩下的chunk大小依然不符合标准,则尝试进一步分割
if (usedChunks.size < item.chunks.size) {if (isExistingChunk) usedChunks.add(newChunk);if (usedChunks.size >= item.cacheGroup.minChunks) {const chunksArr = Array.from(usedChunks);for (const module of item.modules) {addModuleToChunksInfoMap(item.cacheGroup,item.cacheGroupIndex,chunksArr,getKey(usedChunks),module);}}continue;}
以上就是列举的某一些配置项对应的规则的处理逻辑,在检查阶段的最后会生成results,也就是我们能看到的chunks,至此splitChunks就结束了
总结
纵观splitChunks的全部过程,可以看到,其实准备阶段的工作量是非常大的,进而到分组,再到最后的检查,将代码包一步步拆分,最终生成chunks。
以上就是本文的所有内容,如有错误,欢迎指正,感谢阅读。
最后
整理了75个JS高频面试题,并给出了答案和解析,基本上可以保证你能应付面试官关于JS的提问。
有需要的小伙伴,可以点击下方卡片领取,无偿分享