Anton Bulakh 2e299c67cc
feat: untangle quartz from local configs in least amount of changes
For the current setup where people have to fork or at least clone quartz
this changes nothing - but it allows you to install quartz as a
devDependency via npm and have it actually work.

One real change is switch from `.quartz-cache` to
`node_modules/.cache/quartz` for transpilation results, this is an
artifact from my previous attempts, I guess with this one I can change
it back - but `node_modules/.cache` feels more better imo idk.

edit: OTOH if you want to have quartz be a _completely_ separate binary
(which this also enables I think), having it create a node_modules
folder is weird, so I made a quick hack for that for now.

Example:
```bash
$ mkdir my-repo && cd my-repo
$ npm i quartz@necauqua/quartz#untangled # quartz@ prefix is important
$ cp node_modules/quartz/quartz.*.ts .   # copy the default configs
$ mkdir content && echo "# Hello World!" > content/index.md
$ npx quartz build --serve # this just works!
$ echo 'body { background: red !important; }' > styles.scss
```
Notice how I used my branch in the `npm i` line, ideally it'd be
`npm i quartz@jackyzho0/quartz`, or maybe we can somehow get the quartz
package on npm and it'll just be `npm i quartz`.
In the latter case `npx quartz build` will literally just work without
a local npm package at all?.

Having some support for components and plugins being in separate npm
packages instead of people copying code around is not out of the picture
with this too btw.

Closes #502

MOVE ME
2025-01-23 20:53:58 +02:00

213 lines
6.9 KiB
TypeScript

import esbuild from "esbuild"
import remarkParse from "remark-parse"
import remarkRehype from "remark-rehype"
import { Processor, unified } from "unified"
import { Root as MDRoot } from "remark-parse/lib"
import { Root as HTMLRoot } from "hast"
import { MarkdownContent, ProcessedContent } from "../plugins/vfile"
import { PerfTimer } from "../util/perf"
import { read } from "to-vfile"
import { FilePath, FullSlug, slugifyFilePath } from "../util/path"
import path from "path"
import workerpool, { Promise as WorkerPromise } from "workerpool"
import { QuartzLogger } from "../util/log"
import { trace } from "../util/trace"
import { BuildCtx } from "../util/ctx"
export type QuartzMdProcessor = Processor<MDRoot, MDRoot, MDRoot>
export type QuartzHtmlProcessor = Processor<undefined, MDRoot, HTMLRoot>
export function createMdProcessor(ctx: BuildCtx): QuartzMdProcessor {
const transformers = ctx.cfg.plugins.transformers
return (
unified()
// base Markdown -> MD AST
.use(remarkParse)
// MD AST -> MD AST transforms
.use(
transformers.flatMap((plugin) => plugin.markdownPlugins?.(ctx) ?? []),
) as unknown as QuartzMdProcessor
// ^ sadly the typing of `use` is not smart enough to infer the correct type from our plugin list
)
}
export function createHtmlProcessor(ctx: BuildCtx): QuartzHtmlProcessor {
const transformers = ctx.cfg.plugins.transformers
return (
unified()
// MD AST -> HTML AST
.use(remarkRehype, { allowDangerousHtml: true })
// HTML AST -> HTML AST transforms
.use(transformers.flatMap((plugin) => plugin.htmlPlugins?.(ctx) ?? []))
)
}
function* chunks<T>(arr: T[], n: number) {
for (let i = 0; i < arr.length; i += n) {
yield arr.slice(i, i + n)
}
}
async function transpileWorkerScript(ctx: BuildCtx): Promise<string> {
// import.meta.dirname is the cache folder, because we're in transpiled-build.mjs atm technically
const cacheFile = path.join(import.meta.dirname, "transpiled-worker.mjs")
const fp = path.join(ctx.quartzRoot, "worker.ts")
await esbuild.build({
entryPoints: [fp],
outfile: cacheFile,
bundle: true,
keepNames: true,
minifyWhitespace: true,
minifySyntax: true,
platform: "node",
format: "esm",
packages: "external",
sourcemap: true,
sourcesContent: false,
alias: {
$config: path.join(process.cwd(), "quartz.config.ts"),
$layout: path.join(process.cwd(), "quartz.layout.ts"),
$styles: path.join(process.cwd(), "styles.scss"),
quartz: path.resolve(ctx.quartzRoot, ".."),
},
plugins: [
{
name: "css-and-scripts-as-text",
setup(build) {
build.onLoad({ filter: /\.scss$/ }, (_) => ({
contents: "",
loader: "text",
}))
build.onLoad({ filter: /\.inline\.(ts|js)$/ }, (_) => ({
contents: "",
loader: "text",
}))
},
},
],
})
return cacheFile
}
export function createFileParser(ctx: BuildCtx, fps: FilePath[]) {
const { argv, cfg } = ctx
return async (processor: QuartzMdProcessor) => {
const res: MarkdownContent[] = []
for (const fp of fps) {
try {
const perf = new PerfTimer()
const file = await read(fp)
// strip leading and trailing whitespace
file.value = file.value.toString().trim()
// Text -> Text transforms
for (const plugin of cfg.plugins.transformers.filter((p) => p.textTransform)) {
file.value = plugin.textTransform!(ctx, file.value.toString())
}
// base data properties that plugins may use
file.data.filePath = file.path as FilePath
file.data.relativePath = path.posix.relative(argv.directory, file.path) as FilePath
file.data.slug = slugifyFilePath(file.data.relativePath)
const ast = processor.parse(file)
const newAst = await processor.run(ast, file)
res.push([newAst, file])
if (argv.verbose) {
console.log(`[markdown] ${fp} -> ${file.data.slug} (${perf.timeSince()})`)
}
} catch (err) {
trace(`\nFailed to process markdown \`${fp}\``, err as Error)
}
}
return res
}
}
export function createMarkdownParser(ctx: BuildCtx, mdContent: MarkdownContent[]) {
return async (processor: QuartzHtmlProcessor) => {
const res: ProcessedContent[] = []
for (const [ast, file] of mdContent) {
try {
const perf = new PerfTimer()
const newAst = await processor.run(ast as MDRoot, file)
res.push([newAst, file])
if (ctx.argv.verbose) {
console.log(`[html] ${file.data.slug} (${perf.timeSince()})`)
}
} catch (err) {
trace(`\nFailed to process html \`${file.data.filePath}\``, err as Error)
}
}
return res
}
}
const clamp = (num: number, min: number, max: number) =>
Math.min(Math.max(Math.round(num), min), max)
export async function parseMarkdown(ctx: BuildCtx, fps: FilePath[]): Promise<ProcessedContent[]> {
const { argv } = ctx
const perf = new PerfTimer()
const log = new QuartzLogger(argv.verbose)
// rough heuristics: 128 gives enough time for v8 to JIT and optimize parsing code paths
const CHUNK_SIZE = 128
const concurrency = ctx.argv.concurrency ?? clamp(fps.length / CHUNK_SIZE, 1, 4)
let res: ProcessedContent[] = []
log.start(`Parsing input files using ${concurrency} threads`)
if (concurrency === 1) {
try {
const mdRes = await createFileParser(ctx, fps)(createMdProcessor(ctx))
res = await createMarkdownParser(ctx, mdRes)(createHtmlProcessor(ctx))
} catch (error) {
log.end()
throw error
}
} else {
const transpiledWorker = await transpileWorkerScript(ctx)
const pool = workerpool.pool(path.join(ctx.quartzRoot, "bootstrap-worker.mjs"), {
minWorkers: "max",
maxWorkers: concurrency,
workerType: "thread",
workerThreadOpts: { argv: [transpiledWorker] },
})
const errorHandler = (err: any) => {
console.error(`${err}`.replace(/^error:\s*/i, ""))
process.exit(1)
}
const mdPromises: WorkerPromise<[MarkdownContent[], FullSlug[]]>[] = []
for (const chunk of chunks(fps, CHUNK_SIZE)) {
mdPromises.push(pool.exec("parseMarkdown", [ctx.buildId, ctx.quartzRoot, argv, chunk]))
}
const mdResults: [MarkdownContent[], FullSlug[]][] =
await WorkerPromise.all(mdPromises).catch(errorHandler)
const childPromises: WorkerPromise<ProcessedContent[]>[] = []
for (const [_, extraSlugs] of mdResults) {
ctx.allSlugs.push(...extraSlugs)
}
for (const [mdChunk, _] of mdResults) {
childPromises.push(
pool.exec("processHtml", [ctx.buildId, ctx.quartzRoot, argv, mdChunk, ctx.allSlugs]),
)
}
const results: ProcessedContent[][] = await WorkerPromise.all(childPromises).catch(errorHandler)
res = results.flat()
await pool.terminate()
}
log.end(`Parsed ${res.length} Markdown files in ${perf.timeSince()}`)
return res
}