Compare commits

...

7 Commits

Author SHA1 Message Date
Aaron Pham
eb8a4cce18 revert: redudant changes
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
2025-10-05 20:04:19 -04:00
Aaron Pham
68682a8fe3 chore: revert vault specific branch
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
2025-10-05 19:58:20 -04:00
Aaron Pham
f533902c75 feat: semantic search (1/n)
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
2025-10-05 19:50:52 -04:00
martyone
f14260b2ba fix(oxhugo): Do not discard embedded HTML (#2151)
In 'processors/parse.ts' the 'remarkRehype' plugin is used with
'allowDangerousHtml' enabled, but that needs to be combined with (e.g.)
'rehypeRaw' to have any effect on the output.
2025-10-02 10:51:40 -07:00
dependabot[bot]
9ad3481da6 chore(deps): bump the production-dependencies group with 4 updates (#2146)
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-09-29 22:41:50 -04:00
dependabot[bot]
3ff7ca4155 chore(deps): bump sigstore/cosign-installer in the ci-dependencies group (#2135)
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-09-28 16:45:18 -04:00
dependabot[bot]
eb6cc6ff8e chore(deps): bump the production-dependencies group with 9 updates (#2134)
Bumps the production-dependencies group with 9 updates:

| Package | From | To |
| --- | --- | --- |
| [is-absolute-url](https://github.com/sindresorhus/is-absolute-url) | `4.0.1` | `5.0.0` |
| [pixi.js](https://github.com/pixijs/pixijs) | `8.13.1` | `8.13.2` |
| [preact](https://github.com/preactjs/preact) | `10.27.1` | `10.27.2` |
| [pretty-bytes](https://github.com/sindresorhus/pretty-bytes) | `7.0.1` | `7.1.0` |
| [satori](https://github.com/vercel/satori) | `0.18.2` | `0.18.3` |
| [sharp](https://github.com/lovell/sharp) | `0.34.3` | `0.34.4` |
| [workerpool](https://github.com/josdejong/workerpool) | `9.3.3` | `9.3.4` |
| [@types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node) | `24.3.1` | `24.5.2` |
| [esbuild](https://github.com/evanw/esbuild) | `0.25.9` | `0.25.10` |


Updates `is-absolute-url` from 4.0.1 to 5.0.0
- [Release notes](https://github.com/sindresorhus/is-absolute-url/releases)
- [Commits](https://github.com/sindresorhus/is-absolute-url/compare/v4.0.1...v5.0.0)

Updates `pixi.js` from 8.13.1 to 8.13.2
- [Release notes](https://github.com/pixijs/pixijs/releases)
- [Commits](https://github.com/pixijs/pixijs/compare/v8.13.1...v8.13.2)

Updates `preact` from 10.27.1 to 10.27.2
- [Release notes](https://github.com/preactjs/preact/releases)
- [Commits](https://github.com/preactjs/preact/compare/10.27.1...10.27.2)

Updates `pretty-bytes` from 7.0.1 to 7.1.0
- [Release notes](https://github.com/sindresorhus/pretty-bytes/releases)
- [Commits](https://github.com/sindresorhus/pretty-bytes/compare/v7.0.1...v7.1.0)

Updates `satori` from 0.18.2 to 0.18.3
- [Release notes](https://github.com/vercel/satori/releases)
- [Commits](https://github.com/vercel/satori/compare/0.18.2...0.18.3)

Updates `sharp` from 0.34.3 to 0.34.4
- [Release notes](https://github.com/lovell/sharp/releases)
- [Commits](https://github.com/lovell/sharp/compare/v0.34.3...v0.34.4)

Updates `workerpool` from 9.3.3 to 9.3.4
- [Changelog](https://github.com/josdejong/workerpool/blob/master/HISTORY.md)
- [Commits](https://github.com/josdejong/workerpool/commits)

Updates `@types/node` from 24.3.1 to 24.5.2
- [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases)
- [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/node)

Updates `esbuild` from 0.25.9 to 0.25.10
- [Release notes](https://github.com/evanw/esbuild/releases)
- [Changelog](https://github.com/evanw/esbuild/blob/main/CHANGELOG.md)
- [Commits](https://github.com/evanw/esbuild/compare/v0.25.9...v0.25.10)

---
updated-dependencies:
- dependency-name: is-absolute-url
  dependency-version: 5.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: production-dependencies
- dependency-name: pixi.js
  dependency-version: 8.13.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: production-dependencies
- dependency-name: preact
  dependency-version: 10.27.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: production-dependencies
- dependency-name: pretty-bytes
  dependency-version: 7.1.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: production-dependencies
- dependency-name: satori
  dependency-version: 0.18.3
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: production-dependencies
- dependency-name: sharp
  dependency-version: 0.34.4
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: production-dependencies
- dependency-name: workerpool
  dependency-version: 9.3.4
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: production-dependencies
- dependency-name: "@types/node"
  dependency-version: 24.5.2
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: production-dependencies
- dependency-name: esbuild
  dependency-version: 0.25.10
  dependency-type: direct:development
  update-type: version-update:semver-patch
  dependency-group: production-dependencies
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-09-22 13:11:44 -07:00
24 changed files with 3257 additions and 839 deletions

View File

@@ -37,7 +37,7 @@ jobs:
network=host
- name: Install cosign
if: github.event_name != 'pull_request'
uses: sigstore/cosign-installer@v3.9.2
uses: sigstore/cosign-installer@v3.10.0
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
if: github.event_name != 'pull_request'

BIN
docs/embeddings/hnsw.bin Normal file

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

1
index.d.ts vendored
View File

@@ -13,3 +13,4 @@ interface CustomEventMap {
type ContentIndex = Record<FullSlug, ContentDetails>
declare const fetchData: Promise<ContentIndex>
declare const semanticCfg: import("./quartz/cfg").GlobalConfiguration["semanticSearch"]

1517
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -37,6 +37,7 @@
"dependencies": {
"@clack/prompts": "^0.11.0",
"@floating-ui/dom": "^1.7.4",
"@huggingface/transformers": "^3.7.5",
"@myriaddreamin/rehype-typst": "^0.6.0",
"@napi-rs/simple-git": "0.1.22",
"@tweenjs/tween.js": "^25.0.0",
@@ -48,23 +49,24 @@
"esbuild-sass-plugin": "^3.3.1",
"flexsearch": "^0.8.205",
"github-slugger": "^2.0.0",
"globby": "^14.1.0",
"globby": "^15.0.0",
"gray-matter": "^4.0.3",
"hast-util-to-html": "^9.0.5",
"hast-util-to-jsx-runtime": "^2.3.6",
"hast-util-to-string": "^3.0.1",
"is-absolute-url": "^4.0.1",
"is-absolute-url": "^5.0.0",
"js-yaml": "^4.1.0",
"lightningcss": "^1.30.1",
"lightningcss": "^1.30.2",
"mdast-util-find-and-replace": "^3.0.2",
"mdast-util-to-hast": "^13.2.0",
"mdast-util-to-string": "^4.0.0",
"micromorph": "^0.4.5",
"minimatch": "^10.0.3",
"pixi.js": "^8.13.1",
"preact": "^10.27.1",
"onnxruntime-web": "^1.23.0",
"pixi.js": "^8.13.2",
"preact": "^10.27.2",
"preact-render-to-string": "^6.6.1",
"pretty-bytes": "^7.0.1",
"pretty-bytes": "^7.1.0",
"pretty-time": "^1.1.0",
"reading-time": "^1.5.0",
"rehype-autolink-headings": "^7.1.0",
@@ -83,9 +85,9 @@
"remark-rehype": "^11.1.2",
"remark-smartypants": "^3.0.2",
"rfdc": "^1.4.1",
"satori": "^0.18.2",
"satori": "^0.18.3",
"serve-handler": "^6.1.6",
"sharp": "^0.34.3",
"sharp": "^0.34.4",
"shiki": "^1.26.2",
"source-map-support": "^0.5.21",
"to-vfile": "^8.0.0",
@@ -93,7 +95,7 @@
"unified": "^11.0.5",
"unist-util-visit": "^5.0.0",
"vfile": "^6.0.3",
"workerpool": "^9.3.3",
"workerpool": "^9.3.4",
"ws": "^8.18.3",
"yargs": "^18.0.0"
},
@@ -101,14 +103,14 @@
"@types/d3": "^7.4.3",
"@types/hast": "^3.0.4",
"@types/js-yaml": "^4.0.9",
"@types/node": "^24.3.1",
"@types/node": "^24.6.0",
"@types/pretty-time": "^1.1.5",
"@types/source-map-support": "^0.5.10",
"@types/ws": "^8.18.1",
"@types/yargs": "^17.0.33",
"esbuild": "^0.25.9",
"esbuild": "^0.25.10",
"prettier": "^3.6.2",
"tsx": "^4.20.5",
"tsx": "^4.20.6",
"typescript": "^5.9.2"
}
}

View File

@@ -1,6 +1,18 @@
import { QuartzConfig } from "./quartz/cfg"
import { GlobalConfiguration, QuartzConfig } from "./quartz/cfg"
import * as Plugin from "./quartz/plugins"
const semanticSearch: GlobalConfiguration["semanticSearch"] = {
enable: true,
model: "onnx-community/embeddinggemma-300m-ONNX",
aot: true,
dims: 768,
dtype: "fp32",
shardSizeRows: 1024,
hnsw: { M: 16, efConstruction: 200 },
chunking: { chunkSize: 256, chunkOverlap: 64 },
vllm: { enable: true, concurrency: 16, batchSize: 128 },
}
/**
* Quartz 4 Configuration
*
@@ -52,6 +64,7 @@ const config: QuartzConfig = {
},
},
},
semanticSearch,
},
plugins: {
transformers: [
@@ -84,6 +97,7 @@ const config: QuartzConfig = {
enableSiteMap: true,
enableRSS: true,
}),
Plugin.SemanticIndex(semanticSearch),
Plugin.Assets(),
Plugin.Static(),
Plugin.Favicon(),

View File

@@ -78,6 +78,34 @@ export interface GlobalConfiguration {
* Region Codes: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
*/
locale: ValidLocale
/** Semantic search configuration */
semanticSearch?: {
enable: boolean
model: string
aot: boolean
dtype: "fp32" | "fp16"
dims: number
shardSizeRows: number
manifestUrl?: string
manifestBaseUrl?: string
disableCache?: boolean
hnsw: {
M: number
efConstruction: number
efSearch?: number
}
chunking: {
chunkSize: number
chunkOverlap: number
noChunking?: boolean
}
vllm?: {
enable: boolean
vllmUrl?: string
concurrency: number
batchSize: number
}
}
}
export interface QuartzConfig {

View File

@@ -7,10 +7,12 @@ import { i18n } from "../i18n"
export interface SearchOptions {
enablePreview: boolean
includeButton: boolean
}
const defaultOptions: SearchOptions = {
enablePreview: true,
includeButton: true,
}
export default ((userOpts?: Partial<SearchOptions>) => {
@@ -29,19 +31,54 @@ export default ((userOpts?: Partial<SearchOptions>) => {
</svg>
<p>{i18n(cfg.locale).components.search.title}</p>
</button>
<div class="search-container">
<div class="search-space">
<input
autocomplete="off"
class="search-bar"
name="search"
type="text"
aria-label={searchPlaceholder}
placeholder={searchPlaceholder}
/>
<div class="search-layout" data-preview={opts.enablePreview}></div>
</div>
</div>
<search class="search-container">
<form class="search-space">
<div class="input-container">
<input
autocomplete="off"
class="search-bar"
name="search"
type="text"
aria-label={searchPlaceholder}
placeholder={searchPlaceholder}
/>
<div class="search-mode-toggle" role="radiogroup" aria-label="Search mode">
<button
type="button"
class="mode-option"
data-mode="lexical"
aria-pressed="true"
aria-label="Full-text search"
>
<svg viewBox="0 0 20 20" role="img" aria-hidden="true">
<g fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round">
<path d="M4 6h12M4 10h8M4 14h6" />
</g>
</svg>
<span class="sr-only">Full-text</span>
</button>
<button
type="button"
class="mode-option"
data-mode="semantic"
aria-pressed="false"
aria-label="Semantic search"
>
<svg viewBox="0 0 20 20" role="img" aria-hidden="true">
<g fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round">
<circle cx="5.2" cy="10" r="2.4" />
<circle cx="14.8" cy="4.8" r="2.1" />
<circle cx="14.8" cy="15.2" r="2.1" />
<path d="M7.1 8.7l5.2-2.4M7.1 11.3l5.2 2.4M14.8 6.9v6.2" />
</g>
</svg>
<span class="sr-only">Semantic</span>
</button>
</div>
</div>
<output class="search-layout" data-preview={opts.enablePreview} />
</form>
</search>
</div>
)
}

View File

@@ -25,6 +25,7 @@ const headerRegex = new RegExp(/h[1-6]/)
export function pageResources(
baseDir: FullSlug | RelativeURL,
staticResources: StaticResources,
cfg?: GlobalConfiguration,
): StaticResources {
const contentIndexPath = joinSegments(baseDir, "static/contentIndex.json")
const contentIndexScript = `const fetchData = fetch("${contentIndexPath}").then(data => data.json())`
@@ -48,6 +49,12 @@ export function pageResources(
spaPreserve: true,
script: contentIndexScript,
},
{
loadTime: "beforeDOMReady",
contentType: "inline",
spaPreserve: true,
script: `const semanticCfg = ${JSON.stringify(cfg?.semanticSearch ?? {})};`,
},
...staticResources.js,
],
additionalHead: staticResources.additionalHead,

View File

@@ -1,6 +1,7 @@
import FlexSearch, { DefaultDocumentSearchResults } from "flexsearch"
import FlexSearch, { DefaultDocumentSearchResults, Id } from "flexsearch"
import { ContentDetails } from "../../plugins/emitters/contentIndex"
import { registerEscapeHandler, removeAllChildren } from "./util"
import { SemanticClient, type SemanticResult } from "./semantic.inline"
import { registerEscapeHandler, removeAllChildren, fetchCanonical } from "./util"
import { FullSlug, normalizeRelativeURLs, resolveRelative } from "../../util/path"
interface Item {
@@ -14,43 +15,46 @@ interface Item {
// Can be expanded with things like "term" in the future
type SearchType = "basic" | "tags"
let searchType: SearchType = "basic"
let currentSearchTerm: string = ""
const encoder = (str: string) => {
return str
.toLowerCase()
.split(/\s+/)
.filter((token) => token.length > 0)
type SearchMode = "lexical" | "semantic"
const SEARCH_MODE_STORAGE_KEY = "quartz:search:mode"
const loadStoredSearchMode = (): SearchMode | null => {
if (typeof window === "undefined") {
return null
}
try {
const stored = window.localStorage.getItem(SEARCH_MODE_STORAGE_KEY)
return stored === "lexical" || stored === "semantic" ? stored : null
} catch (err) {
console.warn("[Search] failed to read stored search mode:", err)
return null
}
}
let index = new FlexSearch.Document<Item>({
encode: encoder,
document: {
id: "id",
tag: "tags",
index: [
{
field: "title",
tokenize: "forward",
},
{
field: "content",
tokenize: "forward",
},
{
field: "tags",
tokenize: "forward",
},
],
},
})
const persistSearchMode = (mode: SearchMode) => {
if (typeof window === "undefined") {
return
}
try {
window.localStorage.setItem(SEARCH_MODE_STORAGE_KEY, mode)
} catch (err) {
console.warn("[Search] failed to persist search mode:", err)
}
}
let searchMode: SearchMode = "lexical"
let currentSearchTerm: string = ""
let rawSearchTerm: string = ""
let semantic: SemanticClient | null = null
let semanticReady = false
let semanticInitFailed = false
type SimilarityResult = { item: Item; similarity: number }
let chunkMetadata: Record<string, { parentSlug: string; chunkId: number }> = {}
let manifestIds: string[] = []
const p = new DOMParser()
const fetchContentCache: Map<FullSlug, Element[]> = new Map()
const contextWindowWords = 30
const numSearchResults = 8
const numTagResults = 5
const tokenizeTerm = (term: string) => {
const tokens = term.split(/\s+/).filter((t) => t.trim() !== "")
const tokenLen = tokens.length
@@ -108,6 +112,102 @@ function highlight(searchTerm: string, text: string, trim?: boolean) {
}`
}
// To be used with search and everything else with flexsearch
const encoder = (str: string) =>
str
.toLowerCase()
.split(/\s+/)
.filter((token) => token.length > 0)
/**
* Get parent document slug for a chunk ID
*/
function getParentSlug(slug: string): string {
const meta = chunkMetadata[slug]
return meta ? meta.parentSlug : slug
}
/**
* Aggregate semantic search results from chunks to documents using RRF
* @param results Raw semantic results (chunk-level)
* @param slugToDocIndex Map from document slug to index in idDataMap
* @returns Object with rrfScores (for ranking) and maxScores (for display)
*/
function aggregateChunkResults(
results: SemanticResult[],
slugToDocIndex: Map<FullSlug, number>,
): { rrfScores: Map<number, number>; maxScores: Map<number, number> } {
// Group chunks by parent document
const docChunks = new Map<string, Array<{ score: number }>>()
results.forEach(({ id, score }) => {
// id is an index into manifestIds (the chunk IDs from embeddings)
const chunkSlug = manifestIds[id]
if (!chunkSlug) return
// Get parent document slug
const parentSlug = getParentSlug(chunkSlug)
if (!docChunks.has(parentSlug)) {
docChunks.set(parentSlug, [])
}
docChunks.get(parentSlug)!.push({ score })
})
// Apply RRF for ranking and track max similarity for display
const rrfScores = new Map<number, number>()
const maxScores = new Map<number, number>()
const RRF_K = 60
for (const [parentSlug, chunks] of docChunks) {
const docIdx = slugToDocIndex.get(parentSlug as FullSlug)
if (typeof docIdx !== "number") continue
// Sort chunks by score descending to assign per-document ranks
chunks.sort((a, b) => b.score - a.score)
// RRF formula: sum(1 / (k + rank)) across all chunks, using per-document ranks
const rrfScore = chunks.reduce((sum, _, rank) => sum + 1.0 / (RRF_K + rank), 0)
// Max similarity score for display (original 0-1 range)
const maxScore = chunks[0].score
rrfScores.set(docIdx, rrfScore)
maxScores.set(docIdx, maxScore)
}
return { rrfScores, maxScores }
}
// Initialize the FlexSearch Document instance with the appropriate configuration
const index = new FlexSearch.Document<Item>({
tokenize: "forward",
encode: encoder,
document: {
id: "id",
tag: "tags",
index: [
{
field: "title",
tokenize: "forward",
},
{
field: "content",
tokenize: "forward",
},
{
field: "tags",
tokenize: "forward",
},
],
},
})
const p = new DOMParser()
const fetchContentCache: Map<FullSlug, Element[]> = new Map()
const numSearchResults = 10
const numTagResults = 10
function highlightHTML(searchTerm: string, el: HTMLElement) {
const p = new DOMParser()
const tokenizedTerms = tokenizeTerm(searchTerm)
@@ -149,7 +249,11 @@ function highlightHTML(searchTerm: string, el: HTMLElement) {
return html.body
}
async function setupSearch(searchElement: Element, currentSlug: FullSlug, data: ContentIndex) {
async function setupSearch(
searchElement: HTMLDivElement,
currentSlug: FullSlug,
data: ContentIndex,
) {
const container = searchElement.querySelector(".search-container") as HTMLElement
if (!container) return
@@ -164,12 +268,183 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
const searchLayout = searchElement.querySelector(".search-layout") as HTMLElement
if (!searchLayout) return
const searchSpace = searchElement?.querySelector(".search-space") as HTMLFormElement
if (!searchSpace) return
// Create semantic search progress bar
const progressBar = document.createElement("div")
progressBar.className = "semantic-search-progress"
progressBar.style.cssText = `
position: absolute;
bottom: 0;
left: 0;
height: 2px;
width: 0;
background: var(--secondary);
transition: width 0.3s ease, opacity 0.3s ease;
opacity: 0;
z-index: 9999;
`
searchBar.parentElement?.appendChild(progressBar)
const startSemanticProgress = () => {
progressBar.style.opacity = "1"
progressBar.style.width = "0"
setTimeout(() => {
progressBar.style.width = "100%"
}, 10)
}
const completeSemanticProgress = () => {
progressBar.style.opacity = "0"
setTimeout(() => {
progressBar.style.width = "0"
}, 300)
}
const resetProgressBar = () => {
progressBar.style.opacity = "0"
progressBar.style.width = "0"
}
const idDataMap = Object.keys(data) as FullSlug[]
const slugToIndex = new Map<FullSlug, number>()
idDataMap.forEach((slug, idx) => slugToIndex.set(slug, idx))
const modeToggle = searchSpace.querySelector(".search-mode-toggle") as HTMLDivElement | null
const modeButtons = modeToggle
? Array.from(modeToggle.querySelectorAll<HTMLButtonElement>(".mode-option"))
: []
const appendLayout = (el: HTMLElement) => {
searchLayout.appendChild(el)
}
const enablePreview = searchLayout.dataset.preview === "true"
if (!semantic && !semanticInitFailed) {
const client = new SemanticClient(semanticCfg)
try {
await client.ensureReady()
semantic = client
semanticReady = true
// Load chunk metadata and IDs from manifest
try {
const manifestUrl = "/embeddings/manifest.json"
const res = await fetch(manifestUrl)
if (res.ok) {
const manifest = await res.json()
chunkMetadata = manifest.chunkMetadata || {}
manifestIds = manifest.ids || []
console.debug(
`[Search] Loaded manifest: ${manifestIds.length} chunks, ${Object.keys(chunkMetadata).length} chunked documents`,
)
}
} catch (err) {
console.warn("[Search] failed to load chunk metadata:", err)
chunkMetadata = {}
manifestIds = []
}
} catch (err) {
console.warn("[SemanticClient] initialization failed:", err)
client.dispose()
semantic = null
semanticReady = false
semanticInitFailed = true
}
} else if (semantic && !semanticReady) {
try {
await semantic.ensureReady()
semanticReady = true
} catch (err) {
console.warn("[SemanticClient] became unavailable:", err)
semantic.dispose()
semantic = null
semanticReady = false
semanticInitFailed = true
}
}
const storedMode = loadStoredSearchMode()
if (storedMode === "semantic") {
if (semanticReady) {
searchMode = storedMode
}
} else if (storedMode === "lexical") {
searchMode = storedMode
}
if (!semanticReady && searchMode === "semantic") {
searchMode = "lexical"
}
let searchSeq = 0
let runSearchTimer: number | null = null
let lastInputAt = 0
searchLayout.dataset.mode = searchMode
const updateModeUI = (mode: SearchMode) => {
modeButtons.forEach((button) => {
const btnMode = (button.dataset.mode as SearchMode) ?? "lexical"
const isActive = btnMode === mode
button.classList.toggle("active", isActive)
button.setAttribute("aria-pressed", String(isActive))
})
if (modeToggle) {
modeToggle.dataset.mode = mode
}
searchLayout.dataset.mode = mode
}
const computeDebounceDelay = (term: string): number => {
const trimmed = term.trim()
const lastTerm = currentSearchTerm
const isExtension =
lastTerm.length > 0 && trimmed.length > lastTerm.length && trimmed.startsWith(lastTerm)
const isRetraction = lastTerm.length > trimmed.length
const isReplacement =
lastTerm.length > 0 && !trimmed.startsWith(lastTerm) && !lastTerm.startsWith(trimmed)
const baseFullQueryDelay = 200
const semanticPenalty = searchMode === "semantic" ? 60 : 0
if (isExtension && trimmed.length > 2) {
return baseFullQueryDelay + semanticPenalty
}
if (isReplacement && trimmed.length > 3) {
return Math.max(90, baseFullQueryDelay - 80)
}
if (isRetraction) {
return 90
}
return baseFullQueryDelay + (searchMode === "semantic" ? 40 : 0)
}
const triggerSearchWithMode = (mode: SearchMode) => {
if (mode === "semantic" && !semanticReady) {
return
}
if (searchMode === mode) return
searchMode = mode
updateModeUI(mode)
persistSearchMode(searchMode)
if (rawSearchTerm.trim() !== "") {
searchLayout.classList.add("display-results")
const token = ++searchSeq
void runSearch(rawSearchTerm, token)
}
}
updateModeUI(searchMode)
modeButtons.forEach((button) => {
const btnMode = (button.dataset.mode as SearchMode) ?? "lexical"
if (btnMode === "semantic") {
button.disabled = !semanticReady
button.setAttribute("aria-disabled", String(!semanticReady))
}
const handler = () => triggerSearchWithMode(btnMode)
button.addEventListener("click", handler)
window.addCleanup(() => button.removeEventListener("click", handler))
})
let preview: HTMLDivElement | undefined = undefined
let previewInner: HTMLDivElement | undefined = undefined
const results = document.createElement("div")
@@ -191,20 +466,23 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
removeAllChildren(preview)
}
searchLayout.classList.remove("display-results")
searchType = "basic" // reset search type after closing
searchButton.focus()
resetProgressBar()
}
function showSearch(searchTypeNew: SearchType) {
searchType = searchTypeNew
if (sidebar) sidebar.style.zIndex = "1"
function showSearch(type: SearchType) {
container.classList.add("active")
if (type === "tags") {
searchBar.value = "#"
rawSearchTerm = "#"
}
searchBar.focus()
}
let currentHover: HTMLInputElement | null = null
async function shortcutHandler(e: HTMLElementEventMap["keydown"]) {
if (e.key === "k" && (e.ctrlKey || e.metaKey) && !e.shiftKey) {
if ((e.key === "/" || e.key === "k") && (e.ctrlKey || e.metaKey) && !e.shiftKey) {
e.preventDefault()
const searchBarOpen = container.classList.contains("active")
searchBarOpen ? hideSearch() : showSearch("basic")
@@ -214,9 +492,6 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
e.preventDefault()
const searchBarOpen = container.classList.contains("active")
searchBarOpen ? hideSearch() : showSearch("tags")
// add "#" prefix for tag search
searchBar.value = "#"
return
}
@@ -226,20 +501,29 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
// If search is active, then we will render the first result and display accordingly
if (!container.classList.contains("active")) return
if (e.key === "Enter" && !e.isComposing) {
if (e.key === "Enter") {
// If result has focus, navigate to that one, otherwise pick first result
let anchor: HTMLAnchorElement | undefined
if (results.contains(document.activeElement)) {
const active = document.activeElement as HTMLInputElement
if (active.classList.contains("no-match")) return
await displayPreview(active)
active.click()
anchor = document.activeElement as HTMLAnchorElement
if (anchor.classList.contains("no-match")) return
await displayPreview(anchor)
e.preventDefault()
anchor.click()
} else {
const anchor = document.getElementsByClassName("result-card")[0] as HTMLInputElement | null
anchor = document.getElementsByClassName("result-card")[0] as HTMLAnchorElement
if (!anchor || anchor.classList.contains("no-match")) return
await displayPreview(anchor)
e.preventDefault()
anchor.click()
}
} else if (e.key === "ArrowUp" || (e.shiftKey && e.key === "Tab")) {
if (anchor !== undefined)
window.spaNavigate(new URL(new URL(anchor.href).pathname, window.location.toString()))
} else if (
e.key === "ArrowUp" ||
(e.shiftKey && e.key === "Tab") ||
(e.ctrlKey && e.key === "p")
) {
e.preventDefault()
if (results.contains(document.activeElement)) {
// If an element in results-container already has focus, focus previous one
@@ -252,7 +536,7 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
if (prevResult) currentHover = prevResult
await displayPreview(prevResult)
}
} else if (e.key === "ArrowDown" || e.key === "Tab") {
} else if (e.key === "ArrowDown" || e.key === "Tab" || (e.ctrlKey && e.key === "n")) {
e.preventDefault()
// The results should already been focused, so we need to find the next one.
// The activeElement is the search bar, so we need to find the first result and focus it.
@@ -269,25 +553,33 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
}
}
const formatForDisplay = (term: string, id: number) => {
const formatForDisplay = (term: string, id: number, renderType: SearchType) => {
const slug = idDataMap[id]
// Check if query contains title words (for boosting exact matches)
const queryTokens = tokenizeTerm(term)
const titleTokens = tokenizeTerm(data[slug].title ?? "")
const titleMatch = titleTokens.some((t) => queryTokens.includes(t))
return {
id,
slug,
title: searchType === "tags" ? data[slug].title : highlight(term, data[slug].title ?? ""),
title: renderType === "tags" ? data[slug].title : highlight(term, data[slug].title ?? ""),
content: highlight(term, data[slug].content ?? "", true),
tags: highlightTags(term.substring(1), data[slug].tags),
tags: highlightTags(term, data[slug].tags, renderType),
titleMatch, // Add title match flag for boosting
}
}
function highlightTags(term: string, tags: string[]) {
if (!tags || searchType !== "tags") {
function highlightTags(term: string, tags: string[], renderType: SearchType) {
if (!tags || renderType !== "tags") {
return []
}
const tagTerm = term.toLowerCase()
return tags
.map((tag) => {
if (tag.toLowerCase().includes(term.toLowerCase())) {
if (tag.toLowerCase().includes(tagTerm)) {
return `<li><p class="match-tag">#${tag}</p></li>`
} else {
return `<li><p>#${tag}</p></li>`
@@ -300,24 +592,40 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
return new URL(resolveRelative(currentSlug, slug), location.toString())
}
const resultToHTML = ({ slug, title, content, tags }: Item) => {
const resultToHTML = ({ item, percent }: { item: Item; percent: number | null }) => {
const { slug, title, content, tags, target } = item
const htmlTags = tags.length > 0 ? `<ul class="tags">${tags.join("")}</ul>` : ``
const itemTile = document.createElement("a")
const titleContent = target ? highlight(currentSearchTerm, target) : title
const subscript = target ? `<b>${slug}</b>` : ``
let percentLabel = "—"
let percentAttr = ""
if (percent !== null && Number.isFinite(percent)) {
const bounded = Math.max(0, Math.min(100, percent))
percentLabel = `${bounded.toFixed(1)}%`
percentAttr = bounded.toFixed(3)
}
itemTile.classList.add("result-card")
itemTile.id = slug
itemTile.href = resolveUrl(slug).toString()
itemTile.innerHTML = `
<h3 class="card-title">${title}</h3>
${htmlTags}
<p class="card-description">${content}</p>
`
itemTile.addEventListener("click", (event) => {
if (event.altKey || event.ctrlKey || event.metaKey || event.shiftKey) return
hideSearch()
})
itemTile.innerHTML = `<hgroup>
<h3>${titleContent}</h3>
${subscript}${htmlTags}
${searchMode === "semantic" ? `<span class="result-likelihood" title="match likelihood">&nbsp;${percentLabel}</span>` : ""}
${enablePreview && window.innerWidth > 600 ? "" : `<p>${content}</p>`}
</hgroup>`
if (percentAttr) itemTile.dataset.scorePercent = percentAttr
else delete itemTile.dataset.scorePercent
const handler = (event: MouseEvent) => {
if (event.altKey || event.ctrlKey || event.metaKey || event.shiftKey) return
const handler = (evt: MouseEvent) => {
if (evt.altKey || evt.ctrlKey || evt.metaKey || evt.shiftKey) return
const anchor = evt.currentTarget as HTMLAnchorElement | null
if (!anchor) return
evt.preventDefault()
const href = anchor.getAttribute("href")
if (!href) return
const url = new URL(href, window.location.toString())
window.spaNavigate(url)
hideSearch()
}
@@ -335,15 +643,22 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
return itemTile
}
async function displayResults(finalResults: Item[]) {
async function displayResults(finalResults: SimilarityResult[]) {
removeAllChildren(results)
if (finalResults.length === 0) {
results.innerHTML = `<a class="result-card no-match">
<h3>No results.</h3>
<p>Try another search term?</p>
</a>`
currentHover = null
} else {
results.append(...finalResults.map(resultToHTML))
const decorated = finalResults.map(({ item, similarity }) => {
if (!Number.isFinite(similarity)) return { item, percent: null }
const bounded = Math.max(-1, Math.min(1, similarity))
const percent = ((bounded + 1) / 2) * 100
return { item, percent }
})
results.append(...decorated.map(resultToHTML))
}
if (finalResults.length === 0 && preview) {
@@ -363,8 +678,8 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
return fetchContentCache.get(slug) as Element[]
}
const targetUrl = resolveUrl(slug).toString()
const contents = await fetch(targetUrl)
const targetUrl = resolveUrl(slug)
const contents = await fetchCanonical(targetUrl)
.then((res) => res.text())
.then((contents) => {
if (contents === undefined) {
@@ -394,73 +709,296 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
const highlights = [...preview.getElementsByClassName("highlight")].sort(
(a, b) => b.innerHTML.length - a.innerHTML.length,
)
highlights[0]?.scrollIntoView({ block: "start" })
if (highlights.length > 0) {
const highlight = highlights[0]
const container = preview
if (container && highlight) {
// Get the relative positions
const containerRect = container.getBoundingClientRect()
const highlightRect = highlight.getBoundingClientRect()
// Calculate the scroll position relative to the container
const relativeTop = highlightRect.top - containerRect.top + container.scrollTop - 20 // 20px buffer
// Smoothly scroll the container
container.scrollTo({
top: relativeTop,
behavior: "smooth",
})
}
}
}
async function onType(e: HTMLElementEventMap["input"]) {
async function runSearch(rawTerm: string, token: number) {
if (!searchLayout || !index) return
currentSearchTerm = (e.target as HTMLInputElement).value
searchLayout.classList.toggle("display-results", currentSearchTerm !== "")
searchType = currentSearchTerm.startsWith("#") ? "tags" : "basic"
const trimmed = rawTerm.trim()
if (trimmed === "") {
removeAllChildren(results)
if (preview) {
removeAllChildren(preview)
}
currentHover = null
searchLayout.classList.remove("display-results")
resetProgressBar()
return
}
let searchResults: DefaultDocumentSearchResults<Item>
if (searchType === "tags") {
currentSearchTerm = currentSearchTerm.substring(1).trim()
const separatorIndex = currentSearchTerm.indexOf(" ")
if (separatorIndex != -1) {
// search by title and content index and then filter by tag (implemented in flexsearch)
const tag = currentSearchTerm.substring(0, separatorIndex)
const query = currentSearchTerm.substring(separatorIndex + 1).trim()
searchResults = await index.searchAsync({
query: query,
// return at least 10000 documents, so it is enough to filter them by tag (implemented in flexsearch)
const modeForRanking: SearchMode = searchMode
const initialType: SearchType = trimmed.startsWith("#") ? "tags" : "basic"
let workingType: SearchType = initialType
let highlightTerm = trimmed
let tagTerm = ""
let searchResults: DefaultDocumentSearchResults<Item> = []
if (initialType === "tags") {
tagTerm = trimmed.substring(1).trim()
const separatorIndex = tagTerm.indexOf(" ")
if (separatorIndex !== -1) {
const tag = tagTerm.substring(0, separatorIndex).trim()
const query = tagTerm.substring(separatorIndex + 1).trim()
const results = await index.searchAsync({
query,
limit: Math.max(numSearchResults, 10000),
index: ["title", "content"],
tag: { tags: tag },
})
for (let searchResult of searchResults) {
searchResult.result = searchResult.result.slice(0, numSearchResults)
}
// set search type to basic and remove tag from term for proper highlightning and scroll
searchType = "basic"
currentSearchTerm = query
if (token !== searchSeq) return
searchResults = Object.values(results)
workingType = "basic"
highlightTerm = query
} else {
// default search by tags index
searchResults = await index.searchAsync({
query: currentSearchTerm,
const results = await index.searchAsync({
query: tagTerm,
limit: numSearchResults,
index: ["tags"],
})
if (token !== searchSeq) return
searchResults = Object.values(results)
highlightTerm = tagTerm
}
} else if (searchType === "basic") {
searchResults = await index.searchAsync({
query: currentSearchTerm,
} else {
const results = await index.searchAsync({
query: highlightTerm,
limit: numSearchResults,
index: ["title", "content"],
})
if (token !== searchSeq) return
searchResults = Object.values(results)
}
const coerceIds = (hit?: DefaultDocumentSearchResults<Item>[number]): number[] => {
if (!hit) return []
return hit.result
.map((value: Id) => {
if (typeof value === "number") {
return value
}
const parsed = Number.parseInt(String(value), 10)
return Number.isNaN(parsed) ? null : parsed
})
.filter((value): value is number => value !== null)
}
const getByField = (field: string): number[] => {
const results = searchResults.filter((x) => x.field === field)
return results.length === 0 ? [] : ([...results[0].result] as number[])
const hit = searchResults.find((x) => x.field === field)
return coerceIds(hit)
}
// order titles ahead of content
const allIds: Set<number> = new Set([
...getByField("title"),
...getByField("content"),
...getByField("tags"),
])
const finalResults = [...allIds].map((id) => formatForDisplay(currentSearchTerm, id))
await displayResults(finalResults)
currentSearchTerm = highlightTerm
const candidateItems = new Map<string, Item>()
const ensureItem = (id: number): Item | null => {
const slug = idDataMap[id]
if (!slug) return null
const cached = candidateItems.get(slug)
if (cached) return cached
const item = formatForDisplay(highlightTerm, id, workingType)
if (item) {
candidateItems.set(slug, item)
return item
}
return null
}
const baseIndices: number[] = []
for (const id of allIds) {
const item = ensureItem(id)
if (!item) continue
const idx = slugToIndex.get(item.slug)
if (typeof idx === "number") {
baseIndices.push(idx)
}
}
let semanticIds: number[] = []
const semanticSimilarity = new Map<number, number>()
const integrateIds = (ids: number[]) => {
ids.forEach((docId) => {
ensureItem(docId)
})
}
const orchestrator = semanticReady && semantic ? semantic : null
const resolveSimilarity = (item: Item): number => {
const semanticHit = semanticSimilarity.get(item.id)
return semanticHit ?? Number.NaN
}
const render = async () => {
if (token !== searchSeq) return
const useSemantic = semanticReady && semanticIds.length > 0
const weights =
modeForRanking === "semantic" && useSemantic
? { base: 0.3, semantic: 1.0 }
: { base: 1.0, semantic: useSemantic ? 0.3 : 0 }
const rrf = new Map<string, number>()
const push = (ids: number[], weight: number, applyTitleBoost: boolean = false) => {
if (!ids.length || weight <= 0) return
ids.forEach((docId, rank) => {
const slug = idDataMap[docId]
if (!slug) return
const item = ensureItem(docId)
if (!item) return
// Apply title boost for FlexSearch results (1.5x boost for exact title matches)
let effectiveWeight = weight
if (applyTitleBoost && item.titleMatch) {
effectiveWeight *= 1.5
}
const prev = rrf.get(slug) ?? 0
rrf.set(slug, prev + effectiveWeight / (1 + rank))
})
}
push(baseIndices, weights.base, true) // FlexSearch with title boost
push(semanticIds, weights.semantic, false) // Semantic without boost
const rankedEntries = Array.from(candidateItems.values())
.map((item) => ({ item, score: rrf.get(item.slug) ?? 0 }))
.sort((a, b) => b.score - a.score)
.slice(0, numSearchResults)
const displayEntries: SimilarityResult[] = []
for (const entry of rankedEntries) {
const similarity = resolveSimilarity(entry.item)
displayEntries.push({ item: entry.item, similarity })
}
await displayResults(displayEntries)
}
await render()
if (workingType === "tags" || !orchestrator || !semanticReady || highlightTerm.length < 2) {
return
}
const showProgress = modeForRanking === "semantic"
if (showProgress) {
startSemanticProgress()
}
try {
const { semantic: semRes } = await orchestrator.search(
highlightTerm,
numSearchResults * 3, // Request more chunks to ensure good document coverage
)
if (token !== searchSeq) {
if (showProgress) completeSemanticProgress()
return
}
// Aggregate chunk results to document level using RRF
const { rrfScores: semRrfScores, maxScores: semMaxScores } = aggregateChunkResults(
semRes,
slugToIndex,
)
// Use RRF scores for ranking
semanticIds = Array.from(semRrfScores.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, numSearchResults)
.map(([docIdx]) => docIdx)
// Use max chunk similarity for display (0-1 range)
semanticSimilarity.clear()
semMaxScores.forEach((score, docIdx) => {
semanticSimilarity.set(docIdx, score)
})
integrateIds(semanticIds)
if (showProgress) completeSemanticProgress()
} catch (err) {
console.warn("[SemanticClient] search failed:", err)
if (showProgress) completeSemanticProgress()
orchestrator.dispose()
semantic = null
semanticReady = false
semanticInitFailed = true
if (searchMode === "semantic") {
searchMode = "lexical"
updateModeUI(searchMode)
}
modeButtons.forEach((button) => {
if ((button.dataset.mode as SearchMode) === "semantic") {
button.disabled = true
button.setAttribute("aria-disabled", "true")
}
})
}
await render()
}
function onType(e: HTMLElementEventMap["input"]) {
if (!searchLayout || !index) return
rawSearchTerm = (e.target as HTMLInputElement).value
const hasQuery = rawSearchTerm.trim() !== ""
searchLayout.classList.toggle("display-results", hasQuery)
const term = rawSearchTerm
const token = ++searchSeq
if (runSearchTimer !== null) {
window.clearTimeout(runSearchTimer)
runSearchTimer = null
}
if (!hasQuery) {
void runSearch("", token)
return
}
const now = performance.now()
lastInputAt = now
const delay = computeDebounceDelay(term)
const scheduledAt = lastInputAt
runSearchTimer = window.setTimeout(() => {
if (scheduledAt !== lastInputAt) {
return
}
runSearchTimer = null
void runSearch(term, token)
}, delay)
}
document.addEventListener("keydown", shortcutHandler)
window.addCleanup(() => document.removeEventListener("keydown", shortcutHandler))
searchButton.addEventListener("click", () => showSearch("basic"))
window.addCleanup(() => searchButton.removeEventListener("click", () => showSearch("basic")))
const openHandler = () => showSearch("basic")
searchButton.addEventListener("click", openHandler)
window.addCleanup(() => searchButton.removeEventListener("click", openHandler))
searchBar.addEventListener("input", onType)
window.addCleanup(() => searchBar.removeEventListener("input", onType))
window.addCleanup(() => {
if (runSearchTimer !== null) {
window.clearTimeout(runSearchTimer)
runSearchTimer = null
}
resetProgressBar()
})
registerEscapeHandler(container, hideSearch)
await fillDocument(data)
@@ -468,17 +1006,17 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
/**
* Fills flexsearch document with data
* @param index index to fill
* @param data data to fill index with
*/
let indexPopulated = false
async function fillDocument(data: ContentIndex) {
if (indexPopulated) return
let id = 0
const promises: Array<Promise<unknown>> = []
const promises = []
for (const [slug, fileData] of Object.entries<ContentDetails>(data)) {
promises.push(
index.addAsync(id++, {
//@ts-ignore
index.addAsync({
id,
slug: slug as FullSlug,
title: fileData.title,
@@ -486,6 +1024,7 @@ async function fillDocument(data: ContentIndex) {
tags: fileData.tags,
}),
)
id++
}
await Promise.all(promises)
@@ -495,7 +1034,9 @@ async function fillDocument(data: ContentIndex) {
document.addEventListener("nav", async (e: CustomEventMap["nav"]) => {
const currentSlug = e.detail.url
const data = await fetchData
const searchElement = document.getElementsByClassName("search")
const searchElement = document.getElementsByClassName(
"search",
) as HTMLCollectionOf<HTMLDivElement>
for (const element of searchElement) {
await setupSearch(element, currentSlug, data)
}

View File

@@ -0,0 +1,182 @@
export type SemanticResult = { id: number; score: number }
type ProgressMessage = {
type: "progress"
loadedRows: number
totalRows: number
}
type ReadyMessage = { type: "ready" }
type ResultMessage = {
type: "search-result"
seq: number
semantic: SemanticResult[]
}
type ErrorMessage = { type: "error"; seq?: number; message: string }
type SearchPayload = {
semantic: SemanticResult[]
}
type PendingResolver = {
resolve: (payload: SearchPayload) => void
reject: (err: Error) => void
}
export class SemanticClient {
private ready: Promise<void>
private resolveReady!: () => void
private worker: Worker | null = null
private pending = new Map<number, PendingResolver>()
private seq = 0
private disposed = false
private readySettled = false
private configured = false
private lastError: Error | null = null
constructor(private cfg?: any) {
this.ready = new Promise((resolve) => {
this.resolveReady = () => {
if (this.readySettled) return
this.readySettled = true
resolve()
}
})
if (this.cfg?.enable === false) {
this.lastError = new Error("semantic search disabled by configuration")
this.resolveReady()
return
}
this.boot()
}
private boot() {
try {
this.worker = new Worker("/semantic.worker.js", { type: "module" })
} catch (err) {
this.handleFatal(err)
return
}
this.setupWorker()
this.startInit()
}
private setupWorker() {
if (!this.worker) return
this.worker.onmessage = (
event: MessageEvent<ProgressMessage | ReadyMessage | ResultMessage | ErrorMessage>,
) => {
const msg = event.data
if (msg.type === "progress") {
// Progress updates during initialization - can be logged if needed
return
}
if (msg.type === "ready") {
this.configured = true
this.lastError = null
this.resolveReady()
return
}
if (msg.type === "search-result") {
const pending = this.pending.get(msg.seq)
if (pending) {
this.pending.delete(msg.seq)
pending.resolve({ semantic: msg.semantic ?? [] })
}
return
}
if (msg.type === "error") {
if (typeof msg.seq === "number") {
const pending = this.pending.get(msg.seq)
if (pending) {
this.pending.delete(msg.seq)
pending.reject(new Error(msg.message))
}
} else {
this.handleFatal(msg.message)
}
}
}
}
private startInit() {
if (!this.worker) return
const manifestUrl =
typeof this.cfg?.manifestUrl === "string" && this.cfg.manifestUrl.length > 0
? this.cfg.manifestUrl
: "/embeddings/manifest.json"
const disableCache = Boolean(this.cfg?.disableCache)
const baseUrl =
typeof this.cfg?.manifestBaseUrl === "string" ? this.cfg.manifestBaseUrl : undefined
this.worker.postMessage({
type: "init",
cfg: this.cfg,
manifestUrl,
baseUrl,
disableCache,
})
}
private rejectAll(err: Error, fatal = false) {
for (const [id, pending] of this.pending.entries()) {
pending.reject(err)
this.pending.delete(id)
}
if (fatal) {
this.lastError = err
this.configured = false
if (!this.readySettled) {
this.resolveReady()
}
}
}
private handleFatal(err: unknown) {
const error = err instanceof Error ? err : new Error(String(err))
console.error("[SemanticClient] initialization failure:", error)
this.rejectAll(error, true)
if (this.worker) {
this.worker.postMessage({ type: "reset" })
this.worker.terminate()
this.worker = null
}
}
async ensureReady() {
await this.ready
if (!this.configured) {
throw this.lastError ?? new Error("semantic search unavailable")
}
}
async search(text: string, k: number): Promise<SearchPayload> {
if (this.disposed) {
throw new Error("semantic client has been disposed")
}
await this.ensureReady()
if (!this.worker || !this.configured) {
throw this.lastError ?? new Error("worker unavailable")
}
return new Promise<SearchPayload>((resolve, reject) => {
const seq = ++this.seq
this.pending.set(seq, { resolve, reject })
this.worker?.postMessage({ type: "search", text, k, seq })
})
}
dispose() {
if (this.disposed) return
this.disposed = true
this.rejectAll(new Error("semantic client disposed"))
if (this.worker) {
this.worker.postMessage({ type: "reset" })
this.worker.terminate()
}
this.worker = null
this.configured = false
}
}

View File

@@ -77,16 +77,97 @@
margin-bottom: 2em;
}
& > input {
& > .input-container {
align-items: center;
gap: 0.5rem;
display: flex;
flex-wrap: wrap;
position: relative;
box-sizing: border-box;
padding: 0.5em 1em;
font-family: var(--bodyFont);
color: var(--dark);
font-size: 1.1em;
border: 1px solid var(--lightgray);
&:focus {
outline: none;
.search-bar {
flex: 1 1 auto;
min-width: 0;
box-sizing: border-box;
padding: 0.5em 1em;
font-family: var(--bodyFont);
color: var(--dark);
font-size: 1.1em;
border: none;
background: transparent;
&:focus {
outline: none;
}
}
.semantic-search-progress {
position: absolute;
bottom: 0;
left: 0;
right: 0;
height: 2px;
background-color: var(--secondary);
width: 0;
opacity: 0;
transition:
width 0.3s ease,
opacity 0.2s ease;
pointer-events: none;
}
.search-mode-toggle {
display: inline-flex;
align-items: center;
border-radius: 9999px;
height: 1.4rem;
background-color: color-mix(in srgb, var(--darkgray) 12%, transparent);
margin-right: 1rem;
.mode-option {
border: none;
background: transparent;
font: inherit;
color: var(--gray);
border-radius: 9999px;
cursor: pointer;
transition:
background-color 0.2s ease,
color 0.2s ease;
display: inline-flex;
align-items: center;
justify-content: center;
width: 1.5rem;
height: 1.5rem;
position: relative;
&:focus-visible {
outline: 2px solid var(--tertiary);
outline-offset: 2px;
}
&.active {
background-color: var(--secondary);
color: var(--light);
}
svg {
width: 18px;
height: 18px;
}
.sr-only {
position: absolute;
width: 1px;
height: 1px;
padding: 0;
margin: -1px;
overflow: hidden;
clip: rect(0, 0, 0, 0);
white-space: nowrap;
border: 0;
}
}
}
}

542
quartz/embed_build.py Normal file
View File

@@ -0,0 +1,542 @@
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "langchain-text-splitters",
# "numpy",
# "openai",
# "sentence-transformers",
# "tiktoken",
# ]
# ///
from __future__ import annotations
import os, json, argparse, hashlib, math, random, logging
from pathlib import Path
from functools import lru_cache
from collections.abc import Iterable
from concurrent.futures import ThreadPoolExecutor, as_completed
import tiktoken, numpy as np
from openai import OpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
logger = logging.getLogger(__name__)
DEFAULT_VLLM_URL = os.environ.get("VLLM_URL") or os.environ.get("VLLM_EMBED_URL") or "http://127.0.0.1:8000/v1"
def resolve_vllm_base_url(url: str) -> str:
if not url:
raise ValueError("vLLM URL must be non-empty")
trimmed = url.rstrip("/")
if trimmed.endswith("/v1/embeddings"):
trimmed = trimmed[: -len("/embeddings")]
elif trimmed.endswith("/embeddings"):
trimmed = trimmed[: trimmed.rfind("/")]
if not trimmed.endswith("/v1"):
trimmed = f"{trimmed}/v1"
return trimmed
def load_jsonl(fp: str) -> Iterable[dict]:
with open(fp, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
yield json.loads(line)
def l2_normalize_rows(x: np.ndarray) -> np.ndarray:
# x: [N, D]
norms = np.linalg.norm(x, ord=2, axis=1, keepdims=True)
norms[norms == 0] = 1.0
return x / norms
@lru_cache(maxsize=1)
def get_tiktoken_encoder():
# Get the o200k_base tokenizer (GPT-4o) with caching
# change this if you want something else.
return tiktoken.get_encoding("o200k_base")
def count_tokens(text: str) -> int:
# Count tokens using o200k_base encoding
encoder = get_tiktoken_encoder()
return len(encoder.encode(text))
def get_text_splitter(chunk_size: int, overlap: int):
encoder = get_tiktoken_encoder()
return RecursiveCharacterTextSplitter(
chunk_size=chunk_size * 4, # character approximation
chunk_overlap=overlap * 4,
separators=["\n\n", "\n", ". ", " ", ""],
length_function=lambda t: len(encoder.encode(t)),
is_separator_regex=False,
)
def chunk_document(
doc: dict, max_tokens: int = 512, overlap_tokens: int = 128, min_chunk_size: int = 100
) -> list[dict]:
"""
Chunk a document if it exceeds max_tokens
Args:
doc: {'slug': str, 'title': str, 'text': str}
max_tokens: Maximum tokens per chunk
overlap_tokens: Overlap between chunks
min_chunk_size: Minimum chunk size (avoid tiny chunks)
Returns:
List of chunk dicts with metadata
"""
text = doc["text"]
token_count = count_tokens(text)
# No chunking needed
if token_count <= max_tokens:
return [
{
"slug": doc["slug"],
"title": doc.get("title", doc["slug"]),
"text": text,
"chunk_id": 0,
"parent_slug": doc["slug"],
"is_chunked": False,
}
]
# Apply chunking
splitter = get_text_splitter(max_tokens, overlap_tokens)
raw_chunks = splitter.split_text(text)
# Filter out tiny chunks
valid_chunks = [c for c in raw_chunks if count_tokens(c) >= min_chunk_size]
return [
{
"slug": f"{doc['slug']}#chunk{i}",
"title": doc.get("title", doc["slug"]),
"text": chunk,
"chunk_id": i,
"parent_slug": doc["slug"],
"is_chunked": True,
}
for i, chunk in enumerate(valid_chunks)
]
def write_shards(vectors: np.ndarray, shard_size: int, dtype: str, out_dir: Path) -> list[dict]:
out_dir.mkdir(parents=True, exist_ok=True)
rows, dims = vectors.shape
shards_meta: list[dict] = []
np_dtype = np.float16 if dtype == "fp16" else np.float32
bytes_per_value = np.dtype(np_dtype).itemsize
row_offset = 0
for si, start in enumerate(range(0, rows, shard_size)):
end = min(start + shard_size, rows)
shard = vectors[start:end] # [n, dims]
bin_path = out_dir / f"vectors-{si:03d}.bin"
payload = shard.astype(np_dtype, copy=False).tobytes(order="C")
digest = hashlib.sha256(payload).hexdigest()
with open(bin_path, "wb") as f:
f.write(payload)
shard_rows = int(shard.shape[0])
shards_meta.append(
{
"path": f"/embeddings/{bin_path.name}",
"rows": shard_rows,
"rowOffset": row_offset,
"byteLength": len(payload),
"sha256": digest,
"byteStride": dims * bytes_per_value,
},
)
row_offset += shard_rows
return shards_meta
def write_hnsw_graph(levels: list[list[list[int]]], rows: int, out_path: Path) -> tuple[list[dict], str]:
out_path.parent.mkdir(parents=True, exist_ok=True)
offset = 0
meta: list[dict] = []
digest = hashlib.sha256()
with open(out_path, "wb") as f:
for lvl in levels:
indptr = np.zeros(rows + 1, dtype=np.uint32)
edge_accum: list[int] = []
for idx in range(rows):
neighbors = lvl[idx] if idx < len(lvl) else []
indptr[idx + 1] = indptr[idx] + len(neighbors)
edge_accum.extend(neighbors)
indptr_bytes = indptr.tobytes(order="C")
indptr_offset = offset
f.write(indptr_bytes)
digest.update(indptr_bytes)
offset += len(indptr_bytes)
if edge_accum:
indices = np.asarray(edge_accum, dtype=np.uint32)
indices_bytes = indices.tobytes(order="C")
else:
indices = np.zeros(0, dtype=np.uint32)
indices_bytes = indices.tobytes(order="C")
indices_offset = offset
f.write(indices_bytes)
digest.update(indices_bytes)
offset += len(indices_bytes)
meta.append(
{
"level": len(meta),
"indptr": {
"offset": indptr_offset,
"elements": int(indptr.shape[0]),
"byteLength": len(indptr_bytes),
},
"indices": {
"offset": indices_offset,
"elements": int(indices.shape[0]),
"byteLength": len(indices_bytes),
},
},
)
return meta, digest.hexdigest()
def embed_vllm(
texts: list[str],
model_id: str,
vllm_url: str,
batch_size: int = 64,
concurrency: int = 8,
) -> np.ndarray:
base_url = resolve_vllm_base_url(vllm_url)
api_key = os.environ.get("VLLM_API_KEY") or os.environ.get("OPENAI_API_KEY") or "not-set"
client = OpenAI(base_url=base_url, api_key=api_key, timeout=300)
def list_available_models() -> list[str]:
models: list[str] = []
page = client.models.list()
models.extend(model.id for model in page.data)
while getattr(page, "has_more", False) and page.data:
cursor = page.data[-1].id
page = client.models.list(after=cursor)
models.extend(model.id for model in page.data)
return models
try:
available_models = list_available_models()
except Exception as exc:
raise RuntimeError(f"failed to query {base_url}/models: {exc}") from exc
if model_id not in available_models:
suggestions = ", ".join(sorted(available_models)) if available_models else "<none>"
logger.warning(
"model '%s' not served by vLLM at %s. Available models: %s. Use the first model, results may differ during semantic search (you can omit this message if your weights is a ONNX checkpoint of the same model.)", model_id, base_url, suggestions,
)
model_id = available_models[0]
# Apply model-specific prefixes for documents (asymmetric search)
model_lower = model_id.lower()
if "e5" in model_lower:
# E5 models: use "passage:" prefix for documents
prefixed = [f"passage: {t}" for t in texts]
elif "qwen" in model_lower and "embedding" in model_lower:
# Qwen3-Embedding: documents use plain text (no prefix)
prefixed = texts
elif "embeddinggemma" in model_lower:
# embeddinggemma: use "title: none | text:" prefix for documents
prefixed = [f"title: none | text: {t}" for t in texts]
else:
# Default: no prefix for unknown models
prefixed = texts
print(
"Embedding"
f" {len(prefixed)} texts with vLLM"
f" (model={model_id}, batch_size={batch_size}, concurrency={concurrency})",
)
# Create batches
batches = []
for i in range(0, len(prefixed), batch_size):
batch = prefixed[i : i + batch_size]
batches.append((i, batch))
# Function to send a single batch request
def send_batch(batch_info: tuple[int, list[str]]) -> tuple[int, list[np.ndarray]]:
idx, batch = batch_info
response = client.embeddings.create(model=model_id, input=batch)
embeddings = [np.asarray(item.embedding, dtype=np.float32) for item in response.data]
return (idx, embeddings)
# Send batches concurrently (or sequentially if only 1 batch)
results: dict[int, list[np.ndarray]] = {}
if len(batches) == 1:
# Single batch - no need for threading
idx, embeddings = send_batch(batches[0])
results[idx] = embeddings
else:
# Multiple batches - use concurrent requests
with ThreadPoolExecutor(max_workers=concurrency) as executor:
futures = {executor.submit(send_batch, batch_info): batch_info[0] for batch_info in batches}
completed = 0
for future in as_completed(futures):
idx, embeddings = future.result()
results[idx] = embeddings
completed += 1
if completed % max(1, len(batches) // 10) == 0 or completed == len(batches):
print(f" Completed {completed}/{len(batches)} batches ({completed * 100 // len(batches)}%)")
# Reconstruct in order
out: list[np.ndarray] = []
for i in sorted(results.keys()):
out.extend(results[i])
return np.stack(out, axis=0)
def embed_hf(texts: list[str], model_id: str, device: str) -> np.ndarray:
# Prefer sentence-transformers for E5 and similar embed models
from sentence_transformers import SentenceTransformer
model = SentenceTransformer(model_id, device=device)
# Apply model-specific prefixes for documents (asymmetric search)
model_lower = model_id.lower()
if "e5" in model_lower:
# E5 models: use "passage:" prefix for documents
prefixed = [f"passage: {t}" for t in texts]
elif "qwen" in model_lower and "embedding" in model_lower:
# Qwen3-Embedding: documents use plain text (no prefix)
prefixed = texts
elif "embeddinggemma" in model_lower:
# embeddinggemma: use "title: none | text:" prefix for documents
prefixed = [f"title: none | text: {t}" for t in texts]
else:
# Default: no prefix for unknown models
prefixed = texts
vecs = model.encode(
prefixed,
batch_size=64,
normalize_embeddings=True,
convert_to_numpy=True,
show_progress_bar=True,
)
return vecs.astype(np.float32, copy=False)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--jsonl", default="public/embeddings-text.jsonl")
ap.add_argument("--model", default=os.environ.get("SEM_MODEL", "intfloat/multilingual-e5-large"))
ap.add_argument("--dims", type=int, default=int(os.environ.get("SEM_DIMS", "1024")))
ap.add_argument("--dtype", choices=["fp16", "fp32"], default=os.environ.get("SEM_DTYPE", "fp32"))
ap.add_argument("--shard-size", type=int, default=int(os.environ.get("SEM_SHARD", "1024")))
ap.add_argument("--out", default="public/embeddings")
ap.add_argument("--use-vllm", action="store_true", default=bool(os.environ.get("USE_VLLM", "")))
ap.add_argument(
"--vllm-url",
default=DEFAULT_VLLM_URL,
help="Base URL for the vLLM OpenAI-compatible server (accepts either /v1 or /v1/embeddings)",
)
ap.add_argument("--chunk-size", type=int, default=512, help="Max tokens per chunk")
ap.add_argument("--chunk-overlap", type=int, default=128, help="Overlap tokens between chunks")
ap.add_argument("--no-chunking", action="store_true", help="Disable chunking (embed full docs)")
ap.add_argument(
"--concurrency",
type=int,
default=int(os.environ.get("VLLM_CONCURRENCY", "8")),
help="Number of concurrent requests to vLLM (default: 8)",
)
ap.add_argument(
"--batch-size",
type=int,
default=int(os.environ.get("VLLM_BATCH_SIZE", "64")),
help="Batch size for vLLM requests (default: 64)",
)
args = ap.parse_args()
recs = list(load_jsonl(args.jsonl))
if not recs:
print("No input found in public/embeddings-text.jsonl; run the site build first to emit JSONL.")
return
# Apply chunking
if args.no_chunking:
chunks = recs
chunk_metadata = {}
print(f"Chunking disabled. Processing {len(chunks)} full documents")
else:
chunks = []
chunk_metadata = {}
for rec in recs:
doc_chunks = chunk_document(rec, max_tokens=args.chunk_size, overlap_tokens=args.chunk_overlap)
chunks.extend(doc_chunks)
# Build chunk metadata map
for chunk in doc_chunks:
if chunk["is_chunked"]:
chunk_metadata[chunk["slug"]] = {
"parentSlug": chunk["parent_slug"],
"chunkId": chunk["chunk_id"],
}
chunked_count = sum(1 for c in chunks if c.get("is_chunked", False))
print(f"Chunked {len(recs)} documents into {len(chunks)} chunks ({chunked_count} chunked, {len(chunks) - chunked_count} unchanged)")
print(f" Chunk size: {args.chunk_size} tokens, overlap: {args.chunk_overlap} tokens")
ids = [c["slug"] for c in chunks]
titles = [c.get("title", c["slug"]) for c in chunks]
texts = [c["text"] for c in chunks]
if args.use_vllm:
vecs = embed_vllm(
texts,
args.model,
args.vllm_url,
batch_size=args.batch_size,
concurrency=args.concurrency,
)
else:
device = "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu"
vecs = embed_hf(texts, args.model, device)
# Coerce dims and re-normalize
if vecs.shape[1] != args.dims:
if vecs.shape[1] > args.dims:
vecs = vecs[:, : args.dims]
else:
vecs = np.pad(vecs, ((0, 0), (0, args.dims - vecs.shape[1])))
vecs = l2_normalize_rows(vecs.astype(np.float32, copy=False))
out_dir = Path(args.out)
shards = write_shards(vecs, args.shard_size, args.dtype, out_dir)
# Build a lightweight HNSW graph and store it in a compact binary layout
def hnsw_build(data: np.ndarray, M: int = 16, efC: int = 200, seed: int = 0) -> dict:
rng = random.Random(seed)
N, D = data.shape
levels: list[list[list[int]]] = [] # levels[L][i] = neighbors of node i at level L
# random level assignment using 1/e distribution
node_levels = []
for _ in range(N):
lvl = 0
while rng.random() < 1 / math.e:
lvl += 1
node_levels.append(lvl)
max_level = max(node_levels) if N > 0 else 0
for _ in range(max_level + 1):
levels.append([[] for _ in range(N)])
def sim(i: int, j: int) -> float:
return float((data[i] * data[j]).sum())
entry = 0 if N > 0 else -1
def search_layer(q: int, ep: int, ef: int, L: int) -> list[int]:
if ep < 0:
return []
visited = set()
cand: list[tuple[float, int]] = []
top: list[tuple[float, int]] = []
def push(node: int):
if node in visited:
return
visited.add(node)
cand.append((sim(q, node), node))
push(ep)
while cand:
cand.sort(reverse=True)
s, v = cand.pop(0)
if len(top) >= ef and s <= top[-1][0]:
break
top.append((s, v))
for u in levels[L][v]:
push(u)
top.sort(reverse=True)
return [n for _, n in top]
for i in range(N):
if i == 0:
continue
lvl = node_levels[i]
ep = entry
for L in range(max_level, lvl, -1):
c = search_layer(i, ep, 1, L)
if c:
ep = c[0]
for L in range(min(max_level, lvl), -1, -1):
W = search_layer(i, ep, efC, L)
# Select top M by similarity
neigh = sorted(((sim(i, j), j) for j in W if j != i), reverse=True)[:M]
for _, e in neigh:
if e not in levels[L][i]:
levels[L][i].append(e)
if i not in levels[L][e]:
levels[L][e].append(i)
# trim neighbors to M
for L in range(len(levels)):
for i in range(N):
if len(levels[L][i]) > M:
# keep top M by sim
nb = levels[L][i]
nb = sorted(nb, key=lambda j: sim(i, j), reverse=True)[:M]
levels[L][i] = nb
return {
"M": M,
"efConstruction": efC,
"entryPoint": entry,
"maxLevel": max_level,
"levels": levels,
}
hnsw = hnsw_build(vecs, M=16, efC=200)
hnsw_meta, hnsw_sha = write_hnsw_graph(hnsw["levels"], int(vecs.shape[0]), out_dir / "hnsw.bin")
manifest = {
"version": 2,
"dims": args.dims,
"dtype": args.dtype,
"normalized": True,
"rows": int(vecs.shape[0]),
"shardSizeRows": args.shard_size,
"vectors": {
"dtype": args.dtype,
"rows": int(vecs.shape[0]),
"dims": args.dims,
"shards": shards,
},
"ids": ids,
"titles": titles,
"chunkMetadata": chunk_metadata,
"hnsw": {
"M": hnsw["M"],
"efConstruction": hnsw["efConstruction"],
"entryPoint": hnsw["entryPoint"],
"maxLevel": hnsw["maxLevel"],
"graph": {
"path": "/embeddings/hnsw.bin",
"sha256": hnsw_sha,
"levels": hnsw_meta,
},
},
}
(out_dir / "manifest.json").write_text(json.dumps(manifest, ensure_ascii=False), encoding="utf-8")
print(f"Wrote {len(shards)} vector shard(s), HNSW graph, and manifest to {out_dir}")
if __name__ == "__main__":
main()

View File

@@ -40,7 +40,7 @@ export const NotFoundPage: QuartzEmitterPlugin = () => {
description: notFound,
frontmatter: { title: notFound, tags: [] },
})
const externalResources = pageResources(path, resources)
const externalResources = pageResources(path, resources, ctx.cfg.configuration)
const componentData: QuartzComponentProps = {
ctx,
fileData: vfile.data,

View File

@@ -1,5 +1,8 @@
import { FullSlug, joinSegments } from "../../util/path"
import { QuartzEmitterPlugin } from "../types"
import path from "path"
import fs from "node:fs/promises"
import { globby } from "globby"
// @ts-ignore
import spaRouterScript from "../../components/scripts/spa.inline"
@@ -16,7 +19,7 @@ import {
processGoogleFonts,
} from "../../util/theme"
import { Features, transform } from "lightningcss"
import { transform as transpile } from "esbuild"
import { transform as transpile, build as bundle } from "esbuild"
import { write } from "./helpers"
type ComponentResources = {
@@ -357,7 +360,47 @@ export const ComponentResources: QuartzEmitterPlugin = () => {
ext: ".js",
content: postscript,
})
// Bundle all worker files
const workerFiles = await globby(["quartz/**/*.worker.ts"])
for (const src of workerFiles) {
const result = await bundle({
entryPoints: [src],
bundle: true,
minify: true,
platform: "browser",
format: "esm",
write: false,
})
const code = result.outputFiles[0].text
const name = path.basename(src).replace(/\.ts$/, "")
yield write({ ctx, slug: name as FullSlug, ext: ".js", content: code })
}
},
async *partialEmit(ctx, _content, _resources, changeEvents) {
// Handle worker file changes in incremental builds
for (const changeEvent of changeEvents) {
if (!/\.worker\.ts$/.test(changeEvent.path)) continue
if (changeEvent.type === "delete") {
const name = path.basename(changeEvent.path).replace(/\.ts$/, "")
const dest = joinSegments(ctx.argv.output, `${name}.js`)
try {
await fs.unlink(dest)
} catch {}
continue
}
const result = await bundle({
entryPoints: [changeEvent.path],
bundle: true,
minify: true,
platform: "browser",
format: "esm",
write: false,
})
const code = result.outputFiles[0].text
const name = path.basename(changeEvent.path).replace(/\.ts$/, "")
yield write({ ctx, slug: name as FullSlug, ext: ".js", content: code })
}
},
async *partialEmit() {},
}
}

View File

@@ -25,7 +25,7 @@ async function processContent(
) {
const slug = fileData.slug!
const cfg = ctx.cfg.configuration
const externalResources = pageResources(pathToRoot(slug), resources)
const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
const componentData: QuartzComponentProps = {
ctx,
fileData,

View File

@@ -38,7 +38,7 @@ async function* processFolderInfo(
const slug = joinSegments(folder, "index") as FullSlug
const [tree, file] = folderContent
const cfg = ctx.cfg.configuration
const externalResources = pageResources(pathToRoot(slug), resources)
const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
const componentData: QuartzComponentProps = {
ctx,
fileData: file.data,

View File

@@ -1,7 +1,7 @@
export { ContentPage } from "./contentPage"
export { TagPage } from "./tagPage"
export { FolderPage } from "./folderPage"
export { ContentIndex as ContentIndex } from "./contentIndex"
export { ContentIndex } from "./contentIndex"
export { AliasRedirects } from "./aliases"
export { Assets } from "./assets"
export { Static } from "./static"
@@ -10,3 +10,4 @@ export { ComponentResources } from "./componentResources"
export { NotFoundPage } from "./404"
export { CNAME } from "./cname"
export { CustomOgImages } from "./ogImage"
export { SemanticIndex } from "./semantic"

View File

@@ -0,0 +1,235 @@
import { write } from "./helpers"
import { QuartzEmitterPlugin } from "../types"
import { FilePath, FullSlug, joinSegments, QUARTZ } from "../../util/path"
import { ReadTimeResults } from "reading-time"
import { GlobalConfiguration } from "../../cfg"
import { spawn } from "child_process"
const DEFAULT_MODEL_ID = "onnx-community/Qwen3-Embedding-0.6B-ONNX"
const defaults: GlobalConfiguration["semanticSearch"] = {
enable: true,
model: DEFAULT_MODEL_ID,
aot: false,
dims: 1024,
dtype: "fp32",
shardSizeRows: 1024,
hnsw: { M: 16, efConstruction: 200 },
chunking: {
chunkSize: 512,
chunkOverlap: 128,
noChunking: false,
},
vllm: {
enable: false,
vllmUrl:
process.env.VLLM_URL || process.env.VLLM_EMBED_URL || "http://127.0.0.1:8000/v1/embeddings",
concurrency: parseInt(process.env.VLLM_CONCURRENCY || "8", 10),
batchSize: parseInt(process.env.VLLM_BATCH_SIZE || "64", 10),
},
}
type ContentDetails = {
slug: string
title: string
filePath: FilePath
content: string
readingTime?: Partial<ReadTimeResults>
}
/**
* Check if uv is installed
*/
function checkUvInstalled(): Promise<boolean> {
return new Promise((resolve) => {
const proc = spawn("uv", ["--version"], { shell: true })
proc.on("error", () => resolve(false))
proc.on("close", (code) => resolve(code === 0))
})
}
/**
* Run the Python embedding build script using uv
* Script uses PEP 723 inline metadata for dependency management
*/
function runEmbedBuild(
jsonlPath: string,
outDir: string,
opts: {
model: string
dtype: string
dims: number
shardSizeRows: number
chunking: { chunkSize: number; chunkOverlap: number; noChunking: boolean }
vllm: { enable: boolean; vllmUrl?: string; concurrency: number; batchSize: number }
},
): Promise<void> {
return new Promise((resolve, reject) => {
const scriptPath = joinSegments(QUARTZ, "embed_build.py")
const args = [
"run",
scriptPath,
"--jsonl",
jsonlPath,
"--model",
opts.model,
"--out",
outDir,
"--dtype",
opts.dtype,
"--dims",
String(opts.dims),
"--shard-size",
String(opts.shardSizeRows),
"--chunk-size",
String(opts.chunking.chunkSize),
"--chunk-overlap",
String(opts.chunking.chunkOverlap),
]
if (opts.chunking.noChunking) {
args.push("--no-chunking")
}
if (opts.vllm.enable) {
args.push("--use-vllm")
if (opts.vllm.vllmUrl) {
args.push("--vllm-url", opts.vllm.vllmUrl)
}
args.push("--concurrency", String(opts.vllm.concurrency))
args.push("--batch-size", String(opts.vllm.batchSize))
}
console.log("\nRunning embedding generation:")
console.log(` uv ${args.join(" ")}`)
const env = { ...process.env }
if (opts.vllm.enable && !env.USE_VLLM) {
env.USE_VLLM = "1"
}
const proc = spawn("uv", args, {
stdio: "inherit",
shell: true,
env,
})
proc.on("error", (err) => {
reject(new Error(`Failed to spawn uv: ${err.message}`))
})
proc.on("close", (code) => {
if (code === 0) {
console.log("Embedding generation completed successfully")
resolve()
} else {
reject(new Error(`embed_build.py exited with code ${code}`))
}
})
})
}
export const SemanticIndex: QuartzEmitterPlugin<Partial<GlobalConfiguration["semanticSearch"]>> = (
opts,
) => {
const merged = { ...defaults, ...opts }
const o = {
enable: merged.enable!,
model: merged.model!,
aot: merged.aot!,
dims: merged.dims!,
dtype: merged.dtype!,
shardSizeRows: merged.shardSizeRows!,
hnsw: {
M: merged.hnsw?.M ?? defaults.hnsw!.M!,
efConstruction: merged.hnsw?.efConstruction ?? defaults.hnsw!.efConstruction!,
efSearch: merged.hnsw?.efSearch,
},
chunking: {
chunkSize: merged.chunking?.chunkSize ?? defaults.chunking!.chunkSize!,
chunkOverlap: merged.chunking?.chunkOverlap ?? defaults.chunking!.chunkOverlap!,
noChunking: merged.chunking?.noChunking ?? defaults.chunking!.noChunking!,
},
vllm: {
enable: merged.vllm?.enable ?? defaults.vllm!.enable!,
vllmUrl: merged.vllm?.vllmUrl ?? defaults.vllm!.vllmUrl,
concurrency: merged.vllm?.concurrency ?? defaults.vllm!.concurrency!,
batchSize: merged.vllm?.batchSize ?? defaults.vllm!.batchSize!,
},
}
if (!o.model) {
throw new Error("Semantic search requires a model identifier")
}
return {
name: "SemanticIndex",
getQuartzComponents() {
return []
},
async *partialEmit() {},
async *emit(ctx, content, _resources) {
if (!o.enable) return
const docs: ContentDetails[] = []
for (const [_, file] of content) {
const slug = file.data.slug!
const title = file.data.frontmatter?.title ?? slug
const text = file.data.text
if (text) {
docs.push({
slug,
title,
filePath: file.data.filePath!,
content: text,
readingTime: file.data.readingTime,
})
}
}
// Emit JSONL with the exact text used for embeddings
const jsonl = docs
.map((d) => ({ slug: d.slug, title: d.title, text: d.content }))
.map((o) => JSON.stringify(o))
.join("\n")
const jsonlSlug = "embeddings-text" as FullSlug
yield write({
ctx,
slug: jsonlSlug,
ext: ".jsonl",
content: jsonl,
})
// If aot is false, run the embedding generation script
if (!o.aot) {
console.log("\nGenerating embeddings (aot=false)...")
// Check for uv
const hasUv = await checkUvInstalled()
if (!hasUv) {
throw new Error(
"uv is required for embedding generation. Install it from https://docs.astral.sh/uv/",
)
}
const jsonlPath = joinSegments(ctx.argv.output, "embeddings-text.jsonl")
const outDir = joinSegments(ctx.argv.output, "embeddings")
try {
await runEmbedBuild(jsonlPath, outDir, o)
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
throw new Error(`Embedding generation failed: ${message}`)
}
} else {
console.log(
"\nSkipping embedding generation (aot=true). Expecting pre-generated embeddings in public/embeddings/",
)
}
},
externalResources(_ctx) {
return {}
},
}
}

View File

@@ -73,7 +73,7 @@ async function processTagPage(
const slug = joinSegments("tags", tag) as FullSlug
const [tree, file] = tagContent
const cfg = ctx.cfg.configuration
const externalResources = pageResources(pathToRoot(slug), resources)
const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
const componentData: QuartzComponentProps = {
ctx,
fileData: file.data,

View File

@@ -1,4 +1,6 @@
import { QuartzTransformerPlugin } from "../types"
import rehypeRaw from "rehype-raw"
import { PluggableList } from "unified"
export interface Options {
/** Replace {{ relref }} with quartz wikilinks []() */
@@ -102,5 +104,9 @@ export const OxHugoFlavouredMarkdown: QuartzTransformerPlugin<Partial<Options>>
}
return src
},
htmlPlugins() {
const plugins: PluggableList = [rehypeRaw]
return plugins
},
}
}

View File

@@ -0,0 +1,548 @@
// Unified semantic search worker: handles data loading and query execution
import { env, pipeline } from "@huggingface/transformers"
import "onnxruntime-web/webgpu"
import "onnxruntime-web/wasm"
export {}
type VectorShardMeta = {
path: string
rows: number
rowOffset: number
byteLength: number
sha256?: string
byteStride: number
}
type LevelSection = {
level: number
indptr: { offset: number; elements: number; byteLength: number }
indices: { offset: number; elements: number; byteLength: number }
}
type ChunkMetadata = {
parentSlug: string
chunkId: number
}
type Manifest = {
version: number
dims: number
dtype: string
normalized: boolean
rows: number
shardSizeRows: number
vectors: {
dtype: string
rows: number
dims: number
shards: VectorShardMeta[]
}
ids: string[]
titles?: string[]
chunkMetadata?: Record<string, ChunkMetadata>
hnsw: {
M: number
efConstruction: number
entryPoint: number
maxLevel: number
graph: {
path: string
sha256?: string
levels: LevelSection[]
}
}
}
type InitMessage = {
type: "init"
cfg: any
manifestUrl: string
baseUrl?: string
disableCache?: boolean
}
type SearchMessage = { type: "search"; text: string; k: number; seq: number }
type ResetMessage = { type: "reset" }
type WorkerMessage = InitMessage | SearchMessage | ResetMessage
type ReadyMessage = { type: "ready" }
type ProgressMessage = {
type: "progress"
loadedRows: number
totalRows: number
}
type SearchHit = { id: number; score: number }
type SearchResultMessage = {
type: "search-result"
seq: number
semantic: SearchHit[]
}
type ErrorMessage = { type: "error"; seq?: number; message: string }
type WorkerState = "idle" | "loading" | "ready" | "error"
// IndexedDB configuration
const DB_NAME = "semantic-search-cache"
const STORE_NAME = "assets"
const DB_VERSION = 1
const hasIndexedDB = typeof indexedDB !== "undefined"
const supportsSharedArrayBuffer = typeof SharedArrayBuffer !== "undefined"
// State
let state: WorkerState = "idle"
let manifest: Manifest | null = null
let cfg: any = null
let vectorsView: Float32Array | null = null
let dims = 0
let rows = 0
let classifier: any = null
let envConfigured = false
let entryPoint = -1
let maxLevel = 0
let efDefault = 128
let levelGraph: { indptr: Uint32Array; indices: Uint32Array }[] = []
let abortController: AbortController | null = null
let dbPromise: Promise<IDBDatabase> | null = null
// IndexedDB helpers
function openDatabase(): Promise<IDBDatabase> {
if (!hasIndexedDB) {
return Promise.reject(new Error("indexedDB unavailable"))
}
if (!dbPromise) {
dbPromise = new Promise((resolve, reject) => {
const req = indexedDB.open(DB_NAME, DB_VERSION)
req.onupgradeneeded = () => {
const db = req.result
if (!db.objectStoreNames.contains(STORE_NAME)) {
db.createObjectStore(STORE_NAME)
}
}
req.onsuccess = () => resolve(req.result)
req.onerror = () => reject(req.error ?? new Error("failed to open cache store"))
})
}
return dbPromise
}
async function readAsset(hash: string): Promise<ArrayBuffer | null> {
if (!hasIndexedDB) {
return null
}
const db = await openDatabase()
return new Promise((resolve, reject) => {
const tx = db.transaction(STORE_NAME, "readonly")
const store = tx.objectStore(STORE_NAME)
const req = store.get(hash)
req.onsuccess = () => {
const value = req.result
if (value instanceof ArrayBuffer) {
resolve(value)
} else if (value && value.buffer instanceof ArrayBuffer) {
resolve(value.buffer as ArrayBuffer)
} else {
resolve(null)
}
}
req.onerror = () => reject(req.error ?? new Error("failed to read cached asset"))
})
}
async function writeAsset(hash: string, buffer: ArrayBuffer): Promise<void> {
if (!hasIndexedDB) {
return
}
const db = await openDatabase()
await new Promise<void>((resolve, reject) => {
const tx = db.transaction(STORE_NAME, "readwrite")
const store = tx.objectStore(STORE_NAME)
const req = store.put(buffer, hash)
req.onsuccess = () => resolve()
req.onerror = () => reject(req.error ?? new Error("failed to cache asset"))
})
}
function toAbsolute(path: string, baseUrl?: string): string {
if (path.startsWith("http://") || path.startsWith("https://")) {
return path
}
const base = baseUrl ?? self.location.origin
return new URL(path, base).toString()
}
async function fetchBinary(
path: string,
disableCache: boolean,
sha?: string,
): Promise<ArrayBuffer> {
if (!disableCache && sha && hasIndexedDB) {
try {
const cached = await readAsset(sha)
if (cached) {
return cached
}
} catch {
// fall through to network fetch on cache errors
}
}
const res = await fetch(path, { signal: abortController?.signal ?? undefined })
if (!res.ok) {
throw new Error(`failed to fetch ${path}: ${res.status} ${res.statusText}`)
}
const payload = await res.arrayBuffer()
if (!disableCache && sha && hasIndexedDB) {
try {
await writeAsset(sha, payload)
} catch {
// ignore cache write failures
}
}
return payload
}
async function populateVectors(
manifest: Manifest,
baseUrl: string | undefined,
disableCache: boolean | undefined,
): Promise<{ buffer: Float32Array; rowsLoaded: number }> {
if (manifest.vectors.dtype !== "fp32") {
throw new Error(`unsupported embedding dtype '${manifest.vectors.dtype}', regenerate with fp32`)
}
const rows = manifest.rows
const dims = manifest.dims
const totalBytes = rows * dims * Float32Array.BYTES_PER_ELEMENT
const buffer = supportsSharedArrayBuffer
? new Float32Array(new SharedArrayBuffer(totalBytes))
: new Float32Array(totalBytes)
let loadedRows = 0
for (const shard of manifest.vectors.shards) {
const absolute = toAbsolute(shard.path, baseUrl)
const payload = await fetchBinary(absolute, Boolean(disableCache), shard.sha256)
const view = new Float32Array(payload)
if (view.length !== shard.rows * dims) {
throw new Error(
`shard ${shard.path} has mismatched length (expected ${shard.rows * dims}, got ${view.length})`,
)
}
buffer.set(view, shard.rowOffset * dims)
loadedRows = Math.min(rows, shard.rowOffset + shard.rows)
const progress: ProgressMessage = {
type: "progress",
loadedRows,
totalRows: rows,
}
self.postMessage(progress)
}
return { buffer, rowsLoaded: loadedRows }
}
async function populateGraph(
manifest: Manifest,
baseUrl: string | undefined,
disableCache: boolean | undefined,
): Promise<ArrayBuffer> {
const graphMeta = manifest.hnsw.graph
const absolute = toAbsolute(graphMeta.path, baseUrl)
return await fetchBinary(absolute, Boolean(disableCache), graphMeta.sha256)
}
function configureRuntimeEnv() {
if (envConfigured) return
env.allowLocalModels = false
env.allowRemoteModels = true
const wasmBackend = env.backends?.onnx?.wasm
if (!wasmBackend) {
throw new Error("transformers.js ONNX runtime backend unavailable")
}
const cdnBase = `https://cdn.jsdelivr.net/npm/@huggingface/transformers@${env.version}/dist/`
wasmBackend.wasmPaths = cdnBase
envConfigured = true
}
async function ensureEncoder() {
if (classifier) return
if (!cfg?.model) {
throw new Error("semantic worker missing model identifier")
}
configureRuntimeEnv()
const dtype = typeof cfg?.dtype === "string" && cfg.dtype.length > 0 ? cfg.dtype : "fp32"
const pipelineOpts: Record<string, unknown> = {
device: "wasm",
dtype,
local_files_only: false,
}
classifier = await pipeline("feature-extraction", cfg.model, pipelineOpts)
cfg.dtype = dtype
}
function vectorSlice(id: number): Float32Array {
if (!vectorsView) {
throw new Error("vector buffer not configured")
}
const start = id * dims
const end = start + dims
return vectorsView.subarray(start, end)
}
function dot(a: Float32Array, b: Float32Array): number {
let s = 0
for (let i = 0; i < dims; i++) {
s += a[i] * b[i]
}
return s
}
function neighborsFor(level: number, node: number): Uint32Array {
const meta = levelGraph[level]
if (!meta) return new Uint32Array()
const { indptr, indices } = meta
if (node < 0 || node + 1 >= indptr.length) return new Uint32Array()
const start = indptr[node]
const end = indptr[node + 1]
return indices.subarray(start, end)
}
function insertSortedDescending(arr: SearchHit[], item: SearchHit) {
let idx = arr.length
while (idx > 0 && arr[idx - 1].score < item.score) {
idx -= 1
}
arr.splice(idx, 0, item)
}
function bruteForceSearch(query: Float32Array, k: number): SearchHit[] {
if (!vectorsView) return []
const hits: SearchHit[] = []
for (let id = 0; id < rows; id++) {
const score = dot(query, vectorSlice(id))
if (hits.length < k) {
insertSortedDescending(hits, { id, score })
} else if (score > hits[hits.length - 1].score) {
insertSortedDescending(hits, { id, score })
hits.length = k
}
}
return hits
}
function hnswSearch(query: Float32Array, k: number): SearchHit[] {
if (!manifest || !vectorsView || entryPoint < 0 || levelGraph.length === 0) {
return bruteForceSearch(query, k)
}
const ef = Math.max(efDefault, k * 10)
let ep = entryPoint
let epScore = dot(query, vectorSlice(ep))
for (let level = maxLevel; level > 0; level--) {
let changed = true
while (changed) {
changed = false
const neigh = neighborsFor(level, ep)
for (let i = 0; i < neigh.length; i++) {
const candidate = neigh[i]
if (candidate >= rows) continue
const score = dot(query, vectorSlice(candidate))
if (score > epScore) {
epScore = score
ep = candidate
changed = true
}
}
}
}
const visited = new Set<number>()
const candidateQueue: SearchHit[] = []
const best: SearchHit[] = []
insertSortedDescending(candidateQueue, { id: ep, score: epScore })
insertSortedDescending(best, { id: ep, score: epScore })
visited.add(ep)
while (candidateQueue.length > 0) {
const current = candidateQueue.shift()!
const worstBest = best.length >= ef ? best[best.length - 1].score : -Infinity
if (current.score < worstBest && best.length >= ef) {
break
}
const neigh = neighborsFor(0, current.id)
for (let i = 0; i < neigh.length; i++) {
const candidate = neigh[i]
if (candidate >= rows || visited.has(candidate)) continue
visited.add(candidate)
const score = dot(query, vectorSlice(candidate))
const hit = { id: candidate, score }
insertSortedDescending(candidateQueue, hit)
if (best.length < ef || score > best[best.length - 1].score) {
insertSortedDescending(best, hit)
if (best.length > ef) {
best.pop()
}
}
}
}
best.sort((a, b) => b.score - a.score)
return best.slice(0, k)
}
async function embed(text: string, isQuery: boolean = false): Promise<Float32Array> {
await ensureEncoder()
// Apply model-specific prefixes for asymmetric search
let prefixedText = text
if (cfg?.model) {
const modelName = cfg.model.toLowerCase()
switch (true) {
case modelName.includes("e5"): {
// E5 models require query: or passage: prefix
prefixedText = isQuery ? `query: ${text}` : `passage: ${text}`
break
}
case modelName.includes("qwen") && modelName.includes("embedding"): {
// Qwen3-Embedding requires task instruction for queries only
if (isQuery) {
const task = "Given a web search query, retrieve relevant passages that answer the query"
prefixedText = `Instruct: ${task}\nQuery: ${text}`
}
// Documents use plain text (no prefix)
break
}
case modelName.includes("embeddinggemma"): {
// embeddinggemma requires specific prefixes
prefixedText = isQuery
? `task: search result | query: ${text}`
: `title: none | text: ${text}`
break
}
default:
break
}
}
const out = await classifier(prefixedText, { pooling: "mean", normalize: true })
const data = Array.from(out?.data ?? out) as number[]
const vec = new Float32Array(dims)
for (let i = 0; i < dims; i++) vec[i] = data[i] ?? 0
return vec
}
async function handleInit(msg: InitMessage) {
if (state === "loading" || state === "ready") {
throw new Error("worker already initialized or loading")
}
state = "loading"
abortController?.abort()
abortController = new AbortController()
try {
cfg = msg.cfg
const manifestUrl = toAbsolute(msg.manifestUrl, msg.baseUrl)
const response = await fetch(manifestUrl, { signal: abortController.signal })
if (!response.ok) {
throw new Error(
`failed to fetch manifest ${manifestUrl}: ${response.status} ${response.statusText}`,
)
}
manifest = (await response.json()) as Manifest
if (manifest.vectors.dtype !== "fp32") {
throw new Error(
`unsupported embedding dtype '${manifest.vectors.dtype}', regenerate with fp32`,
)
}
dims = manifest.dims
rows = manifest.rows
const { buffer: vectorBuffer } = await populateVectors(manifest, msg.baseUrl, msg.disableCache)
vectorsView = vectorBuffer
const graphBuffer = await populateGraph(manifest, msg.baseUrl, msg.disableCache)
entryPoint = manifest.hnsw.entryPoint
maxLevel = manifest.hnsw.maxLevel
efDefault = Math.max(64, manifest.hnsw.M * 4)
levelGraph = manifest.hnsw.graph.levels.map((level) => {
const indptr = new Uint32Array(graphBuffer, level.indptr.offset, level.indptr.elements)
const indices = new Uint32Array(graphBuffer, level.indices.offset, level.indices.elements)
return { indptr, indices }
})
state = "ready"
const ready: ReadyMessage = { type: "ready" }
self.postMessage(ready)
} catch (err) {
state = "error"
throw err
}
}
async function handleSearch(msg: SearchMessage) {
if (state !== "ready") {
throw new Error("worker not ready for search")
}
if (!manifest || !vectorsView) {
throw new Error("semantic worker not configured")
}
const queryVec = await embed(msg.text, true)
const semanticHits = hnswSearch(queryVec, Math.max(1, msg.k))
const message: SearchResultMessage = {
type: "search-result",
seq: msg.seq,
semantic: semanticHits,
}
self.postMessage(message)
}
function handleReset() {
abortController?.abort()
abortController = null
state = "idle"
manifest = null
cfg = null
vectorsView = null
dims = 0
rows = 0
classifier = null
envConfigured = false
levelGraph = []
entryPoint = -1
maxLevel = 0
}
self.onmessage = (event: MessageEvent<WorkerMessage>) => {
const data = event.data
if (data.type === "reset") {
handleReset()
return
}
if (data.type === "init") {
void handleInit(data).catch((err: unknown) => {
const message: ErrorMessage = {
type: "error",
message: err instanceof Error ? err.message : String(err),
}
self.postMessage(message)
})
return
}
if (data.type === "search") {
void handleSearch(data).catch((err: unknown) => {
const message: ErrorMessage = {
type: "error",
seq: data.seq,
message: err instanceof Error ? err.message : String(err),
}
self.postMessage(message)
})
}
}