Преглед на файлове

Refactoring cache

* Update README
* Add includeStateValues option for debugging
* Add better detail on cache misses (i.e. what caused the miss)
* Reorganised runner structure
* Improved some function names
Craig Fletcher преди 5 месеца
родител
ревизия
a212e95446
променени са 6 файла, в които са добавени 99 реда и са изтрити 51 реда
  1. 24 1
      README.md
  2. 34 18
      src/cache.js
  3. 2 2
      src/defaults.js
  4. 4 15
      src/index.js
  5. 22 6
      src/lib.js
  6. 13 9
      src/util.js

+ 24 - 1
README.md

@@ -49,6 +49,9 @@ Cache files are stored in `.cache/` by default and can be disabled by setting `c
 
 If output file checks are skipped with `ignoreExisting`, only files that have changed inputs will be output.
 
+You can get additional detail on state cache misses by setting `opts.includeStateValues` to `true`, at the cost of
+considerably larger cache files.
+
 ## Configuration
 
 A working example of how to configure can be found in `src/defaults.js`, with the processors separated out to
@@ -76,6 +79,7 @@ opts: {
   cacheDir: '.cache',
   clean: true,
   ignoreExisting: false,
+  includeStateValues: false,
   logLevel: 'debug',
   include: { 
     styles: [{ pattern: '~/.rhedyn/styles/*.scss' }] 
@@ -132,6 +136,7 @@ expanded for every file found
 - `outputDir`: Directory within `outDir` where processed files should be placed
 - `outputFileExtension`: File extension for processed files
 - `processor`: Function that processes the files (required)
+- `logLevel`: optionally override the log level for a specific task
 - Additional processor-specific properties (e.g., `imageSizes`, `quality` for image processing)
 
 **Input File Patterns:**
@@ -147,6 +152,24 @@ inputFiles: [
 
 A processor is a function that receives an object with `config` and `meta` properties and returns an object describing what was processed.
 
+A processor that returns a `ref` will have it's `detail`, `paths`, `ref` and `fromCache` properties made available in
+`meta.resources`, with the `ref` as the key under the task name:
+```javascript
+{
+  ...meta,
+  resources: {
+    [task.name]: {
+      [jobResult.ref]: {
+        detail,
+        paths,
+        ref,
+        fromCache
+      }
+    }
+  }
+}
+```
+
 The processor function signature:
 ```javascript
 async function myProcessor({ config, meta }) {
@@ -157,7 +180,7 @@ async function myProcessor({ config, meta }) {
   
   // meta contains:
   // - opts: global configuration
-  // - resources: results from previously completed tasks
+  // - resources: results from previously completed tasks, structured as described above
   
   // Process the file...
   

+ 34 - 18
src/cache.js

@@ -5,7 +5,7 @@ import { createReadStream } from "fs"
 import stableStringify from "safe-stable-stringify"
 import {
   slugifyString,
-  checkPathExists,
+  checkFilesExist,
   getValueAtPath,
   removeCwd,
   getDeepestPropertiesForKey,
@@ -69,37 +69,47 @@ function getStatePropsHash(state, props) {
 export async function checkCache(cacheKey, currentState, opts) {
   const name = slugifyString(cacheKey)
   const existingCacheObject = await readCache(opts.cacheDir, name)
-  if (existingCacheObject) {
-    if (opts.ignoreExisting || (await checkPathExists(existingCacheObject.taskResult.paths, opts.outDir))) {
+  if (existingCacheObject.exists) {
+    const fileChecks = opts.ignoreExisting ? {} : await checkFilesExist(existingCacheObject.content.taskResult.paths, opts.outDir)
+    if (!fileChecks.absent?.length) {
       const stateHash = getStatePropsHash(
         currentState,
-        existingCacheObject.deps.state.props,
+        existingCacheObject.content.deps.state.props,
       )
-      if (stateHash === existingCacheObject.deps.state.hash) {
+      if (stateHash === existingCacheObject.content.deps.state.hash) {
         try {
-          await getFileHashes(existingCacheObject.deps.paths)
-          return { hit: true, taskResult: existingCacheObject.taskResult }
+          await getFileHashes(existingCacheObject.content.deps.paths)
+          return { hit: true, taskResult: existingCacheObject.content.taskResult, filePath: existingCacheObject.filePath }
         } catch (e) {
           const updates = {
             deps: {
               paths: [e],
             },
           }
-          return { hit: false, reason: "File hash mismatch", updates }
+          return { hit: false, reason: `File hash mismatch: ${e.filePath}`, updates, filePath: existingCacheObject.filePath }
         }
       }
       const updates = {
         deps: {
           state: {
-            ...existingCacheObject.deps.state,
+            ...existingCacheObject.content.deps.state,
             hash: stateHash,
           },
         },
       }
-      return { hit: false, reason: "State hash mismatch", updates }
+      const stateValuesList = existingCacheObject.content.deps.state.values
+      if (stateValuesList) {
+        const mismatchedStateProps = existingCacheObject.content.deps.state.props.filter((stateProp, index) => {
+          const currentValue = getValueAtPath(currentState, stateProp)
+          const cachedValue = stateValuesList[index]
+          return currentValue !== cachedValue
+        })
+        return { hit: false, reason: `State hash mismatch: ${mismatchedStateProps.join(", ")}`, updates, filePath: existingCacheObject.filePath }
+      }
+      return { hit: false, reason: "State hash mismatch (no values were found in cache)", updates, filePath: existingCacheObject.filePath }
     }
     if (opts.clean) {
-      const outFiles = existingCacheObject.taskResult.paths
+      const outFiles = existingCacheObject.content.taskResult.paths
       await Promise.all(
         outFiles.map(
           async outFile =>
@@ -107,9 +117,9 @@ export async function checkCache(cacheKey, currentState, opts) {
         ),
       )
     }
-    return { hit: false, reason: "Missing output file(s)" }
+    return { hit: false, reason: `Missing output file(s): ${fileChecks.absent.join(", ")}`, filePath: existingCacheObject.filePath }
   }
-  return { hit: false, reason: "Missing cache file" }
+  return { hit: false, reason: `Missing cache file: ${existingCacheObject.filePath}`, filePath: existingCacheObject.filePath }
 }
 
 export async function updateCache(
@@ -119,6 +129,7 @@ export async function updateCache(
   stateDeps,
   taskResult,
   updates,
+  includeStateValues,
 ) {
   await fs.mkdir(cacheDir, { recursive: true })
   const name = slugifyString(cacheKey)
@@ -131,6 +142,7 @@ export async function updateCache(
     ),
   }
   const statePropsList = Object.keys(deps.state)
+  const stateValuesList = Object.values(deps.state)
   const updatesStateHash = updates?.deps?.state?.props || []
   const stateDepsHash =
     JSON.stringify(statePropsList) === JSON.stringify(updatesStateHash)
@@ -166,6 +178,9 @@ export async function updateCache(
     },
     taskResult,
   }
+  if (includeStateValues) {
+    cacheObject.deps.state.values = stateValuesList
+  }
   return await writeCache(cacheDir, name, cacheObject)
 }
 
@@ -181,16 +196,17 @@ async function writeCache(cacheDir, name, cache) {
 }
 
 async function readCache(cacheDir, name) {
+  const filePath = path.join(cacheDir, `${name}.json`)
   if (!cacheDir) {
-    return false
+    return { exists: false, filePath }
   }
   try {
     const content = await fs.readFile(
-      path.join(cacheDir, `${name}.json`),
+      filePath,
       "utf8",
     )
-    return JSON.parse(content)
-  } catch {
-    return false
+    return { exists: true, content: JSON.parse(content), filePath }
+  } catch (e) {
+    return { exists: false, filePath, reason: e }
   }
 }

+ 2 - 2
src/defaults.js

@@ -57,7 +57,6 @@ export const tasks = [
     stripPaths: ["markdown/"],
     outputFileExtension: ".html",
     processor: renderMarkdownWithTemplate,
-    logLevel: "debug",
     defaultTemplate: "default",
     templateDirs: ["templates/", "~/.rhedyn/templates/"],
   },
@@ -72,7 +71,8 @@ export const opts = {
   },
   clean: true,
   ignoreExisting: false,
-  logLevel: "debug",
+  logLevel: "info",
+  includeStateValues: true,
   site: {
     name: "Website generated by Rhedyn",
     shortName: "Rhedyn test site",

+ 4 - 15
src/index.js

@@ -15,10 +15,6 @@ if (opts.cacheDir) {
 } else {
   log.warn("Cache disabled")
 }
-async function runTask(meta, task) {
-  const allResults = await processTask(task, meta)
-  return allResults
-}
 
 const taskRunner = tasks.reduce(
   async (metaPromise, step) => {
@@ -28,17 +24,9 @@ const taskRunner = tasks.reduce(
     log.info(`Starting tasks: ${stepTasks.join(", ")}`)
     const stepResults = await Promise.all(tasks.map(async task => {
       const log = getLogger(opts.logLevel, task.name)
-      const startTime = performance.now()
-      const taskResult = await runTask(meta, task)
-      const cached = taskResult.filter(taskResult => taskResult.fromCache)
-      const processed = taskResult.filter(taskResult => !taskResult.fromCache)
-      const resources = taskResult.reduce((obj, tResult) => tResult.ref ? ({ ...obj, [tResult.ref]: tResult }) : obj, {})
-      const endTime = performance.now()
-      const timeTaken = (endTime - startTime)
-      const hrTime = timeTaken > 1000 ? `${Number.parseFloat(timeTaken / 1000).toFixed(2)}s` : `${Number.parseFloat(timeTaken).toFixed(2)}ms`
-      const filesWritten = processed.reduce((acc, cur) => acc + cur.paths.length, 0)
-      log.info(`written: ${filesWritten} | processed: ${processed.length} | from cache: ${cached.length} | ${hrTime}`)
-      return { name: task.name, taskResult, cached, processed, resources, filesWritten }
+      const taskResult = await processTask(meta, task)
+      log.trace(`taskResult: ${JSON.stringify(taskResult)}`)
+      return taskResult
     }))
     const newState = stepResults.reduce((newState, taskResult) => {
       const resources = Object.keys(taskResult.resources).length > 0 ? {
@@ -58,6 +46,7 @@ const taskRunner = tasks.reduce(
   Promise.resolve({ meta: { opts }, filesWritten: 0 }),
 )
 const finalState = await taskRunner
+log.trace(`Final state: ${JSON.stringify(finalState, null, 2)}`)
 const endTime = performance.now()
 const timeTaken = (endTime - startTime)
 const hrTime = timeTaken > 1000 ? `${Number.parseFloat(timeTaken / 1000).toFixed(2)}s` : `${Number.parseFloat(timeTaken).toFixed(2)}ms`

+ 22 - 6
src/lib.js

@@ -30,7 +30,7 @@ export async function getConfig() {
   }
 }
 async function runTask({ meta, config, jobId }) {
-  const log = getLogger(meta.opts.logLevel, jobId)
+  const log = getLogger(config.logLevel ? config.logLevel : meta.opts.logLevel, jobId)
   log.trace(`meta: ${JSON.stringify(meta, null, 2)}`)
   log.trace(`config: ${JSON.stringify(config, null, 2)}`)
 
@@ -48,7 +48,7 @@ async function runTask({ meta, config, jobId }) {
     { disabled: true, reason: "Cache disabled" }
 
   if (cache && cache.hit) {
-    log.debug(`Loaded cache for ${jobId}`)
+    log.debug(`Loaded cache for ${jobId}: ${cache.filePath}`)
     return { ...cache.taskResult, fromCache: true }
   }
   log.debug(`Cache miss for ${jobId} (${cache.reason})`)
@@ -71,7 +71,7 @@ async function runTask({ meta, config, jobId }) {
   }
   log.debug(`Wrote ${taskResult.paths.length} files for ${jobId}`)
   if (cache && !cache.disabled) {
-    log.debug(`Updating cache for ${jobId}`)
+    log.debug(`Updating cache for ${jobId}: ${cache.filePath}`)
     const processorPathDeps = processorDeps?.paths || []
     const processorStateDeps = processorDeps?.state || []
     const configPathDeps = config.deps?.paths || []
@@ -87,13 +87,14 @@ async function runTask({ meta, config, jobId }) {
       ].filter(item => !!item),
       taskResult,
       cache.updates,
+      meta.opts.includeStateValues,
     )
   }
 
   return taskResult
 }
 
-async function processFileTask(patternsToInclude, config, meta) {
+async function expandFileTask(patternsToInclude, config, meta) {
   const filesToProcess = await readFilesByGlob(patternsToInclude)
   const pathsToStrip = config.stripPaths || []
   const outputDir = config.outputDir || ""
@@ -117,15 +118,30 @@ async function processFileTask(patternsToInclude, config, meta) {
     }),
   )}
 
-export async function processTask(config, meta) {
+export async function expandAndRunTask(meta, config) {
   const includes = meta.opts?.include?.[config.name] || []
   const patternsToInclude = [...(config?.inputFiles || []), ...includes]
 
   if (patternsToInclude.length) {
-    return processFileTask(patternsToInclude, config, meta)
+    return expandFileTask(patternsToInclude, config, meta)
   }
 
   const jobId = config.jobId || config.name
   const taskResult = await runTask({ meta, config, jobId })
   return [taskResult]
 }
+
+export async function processTask(meta, task) {
+  const log = getLogger(meta.opts.logLevel, task.name)
+  const startTime = performance.now()
+  const taskResult = await expandAndRunTask(meta, task)
+  const cached = taskResult.filter(taskResult => taskResult.fromCache)
+  const processed = taskResult.filter(taskResult => !taskResult.fromCache)
+  const resources = taskResult.reduce((obj, tResult) => tResult.ref ? ({ ...obj, [tResult.ref]: tResult }) : obj, {})
+  const endTime = performance.now()
+  const timeTaken = (endTime - startTime)
+  const hrTime = timeTaken > 1000 ? `${Number.parseFloat(timeTaken / 1000).toFixed(2)}s` : `${Number.parseFloat(timeTaken).toFixed(2)}ms`
+  const filesWritten = processed.reduce((acc, cur) => acc + cur.paths.length, 0)
+  log.info(`written: ${filesWritten} | processed: ${processed.length} | from cache: ${cached.length} | ${hrTime}`)
+  return { name: task.name, taskResult, cached, processed, resources, filesWritten }
+}

+ 13 - 9
src/util.js

@@ -187,15 +187,19 @@ export function getValueAtPath(obj, path) {
   return val
 }
 
-export async function checkPathExists(files, baseDir) {
-  if (Array.isArray(files)) {
-    return (await Promise.all(
-      files.map(file => {
-        return fileExists(path.join(baseDir, file))
-      }),
-    )).every(item => !!item)
-  }
-  return fileExists(path.join(baseDir, files))
+export async function checkFilesExist(files, baseDir) {
+  const filesToCheck = Array.isArray(files) ? files : [files]
+  const fileCheckResults = await Promise.all(
+    filesToCheck.map(async file => {
+      const filePath = path.join(baseDir, file)
+      const exists = await fileExists(filePath)
+      return { filePath, exists }
+    }),
+  )
+  return fileCheckResults.reduce((sorted, { filePath, exists }) => {
+    return exists ? { ...sorted, present: [...sorted.present, filePath] } : { ...sorted, absent: [...sorted.absent, filePath] }
+  }, { present: [], absent: [] })
+
 }
 
 export function generateRandomId(length = 8) {