revert: redudant changes

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
chore: revert vault specific branch
2025-12-01 02:07:55 +01:00 · 2025-10-05 20:04:19 -04:00 · 2025-10-05 19:58:20 -04:00 · 2025-10-05 19:50:52 -04:00 · 2025-10-02 10:51:40 -07:00 · 2025-09-29 22:41:50 -04:00
24 changed files with 3257 additions and 839 deletions
--- a/.github/workflows/docker-build-push.yaml
+++ b/.github/workflows/docker-build-push.yaml
@@ -37,7 +37,7 @@ jobs:
            network=host
      - name: Install cosign
        if: github.event_name != 'pull_request'
-        uses: sigstore/cosign-installer@v3.9.2
+        uses: sigstore/cosign-installer@v3.10.0
      - name: Login to GitHub Container Registry
        uses: docker/login-action@v3
        if: github.event_name != 'pull_request'
--- a/docs/embeddings/hnsw.bin
+++ b/docs/embeddings/hnsw.bin
--- a/docs/embeddings/manifest.json
+++ b/docs/embeddings/manifest.json
--- a/docs/embeddings/vectors-000.bin
+++ b/docs/embeddings/vectors-000.bin
--- a/index.d.ts
+++ b/index.d.ts
@@ -13,3 +13,4 @@ interface CustomEventMap {

 type ContentIndex = Record<FullSlug, ContentDetails>
 declare const fetchData: Promise<ContentIndex>
+declare const semanticCfg: import("./quartz/cfg").GlobalConfiguration["semanticSearch"]
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -37,6 +37,7 @@
  "dependencies": {
    "@clack/prompts": "^0.11.0",
    "@floating-ui/dom": "^1.7.4",
+    "@huggingface/transformers": "^3.7.5",
    "@myriaddreamin/rehype-typst": "^0.6.0",
    "@napi-rs/simple-git": "0.1.22",
    "@tweenjs/tween.js": "^25.0.0",
@@ -48,23 +49,24 @@
    "esbuild-sass-plugin": "^3.3.1",
    "flexsearch": "^0.8.205",
    "github-slugger": "^2.0.0",
-    "globby": "^14.1.0",
+    "globby": "^15.0.0",
    "gray-matter": "^4.0.3",
    "hast-util-to-html": "^9.0.5",
    "hast-util-to-jsx-runtime": "^2.3.6",
    "hast-util-to-string": "^3.0.1",
-    "is-absolute-url": "^4.0.1",
+    "is-absolute-url": "^5.0.0",
    "js-yaml": "^4.1.0",
-    "lightningcss": "^1.30.1",
+    "lightningcss": "^1.30.2",
    "mdast-util-find-and-replace": "^3.0.2",
    "mdast-util-to-hast": "^13.2.0",
    "mdast-util-to-string": "^4.0.0",
    "micromorph": "^0.4.5",
    "minimatch": "^10.0.3",
-    "pixi.js": "^8.13.1",
-    "preact": "^10.27.1",
+    "onnxruntime-web": "^1.23.0",
+    "pixi.js": "^8.13.2",
+    "preact": "^10.27.2",
    "preact-render-to-string": "^6.6.1",
-    "pretty-bytes": "^7.0.1",
+    "pretty-bytes": "^7.1.0",
    "pretty-time": "^1.1.0",
    "reading-time": "^1.5.0",
    "rehype-autolink-headings": "^7.1.0",
@@ -83,9 +85,9 @@
    "remark-rehype": "^11.1.2",
    "remark-smartypants": "^3.0.2",
    "rfdc": "^1.4.1",
-    "satori": "^0.18.2",
+    "satori": "^0.18.3",
    "serve-handler": "^6.1.6",
-    "sharp": "^0.34.3",
+    "sharp": "^0.34.4",
    "shiki": "^1.26.2",
    "source-map-support": "^0.5.21",
    "to-vfile": "^8.0.0",
@@ -93,7 +95,7 @@
    "unified": "^11.0.5",
    "unist-util-visit": "^5.0.0",
    "vfile": "^6.0.3",
-    "workerpool": "^9.3.3",
+    "workerpool": "^9.3.4",
    "ws": "^8.18.3",
    "yargs": "^18.0.0"
  },
@@ -101,14 +103,14 @@
    "@types/d3": "^7.4.3",
    "@types/hast": "^3.0.4",
    "@types/js-yaml": "^4.0.9",
-    "@types/node": "^24.3.1",
+    "@types/node": "^24.6.0",
    "@types/pretty-time": "^1.1.5",
    "@types/source-map-support": "^0.5.10",
    "@types/ws": "^8.18.1",
    "@types/yargs": "^17.0.33",
-    "esbuild": "^0.25.9",
+    "esbuild": "^0.25.10",
    "prettier": "^3.6.2",
-    "tsx": "^4.20.5",
+    "tsx": "^4.20.6",
    "typescript": "^5.9.2"
  }
 }
--- a/quartz.config.ts
+++ b/quartz.config.ts
@@ -1,6 +1,18 @@
-import { QuartzConfig } from "./quartz/cfg"
+import { GlobalConfiguration, QuartzConfig } from "./quartz/cfg"
 import * as Plugin from "./quartz/plugins"

+const semanticSearch: GlobalConfiguration["semanticSearch"] = {
+  enable: true,
+  model: "onnx-community/embeddinggemma-300m-ONNX",
+  aot: true,
+  dims: 768,
+  dtype: "fp32",
+  shardSizeRows: 1024,
+  hnsw: { M: 16, efConstruction: 200 },
+  chunking: { chunkSize: 256, chunkOverlap: 64 },
+  vllm: { enable: true, concurrency: 16, batchSize: 128 },
+}
+
 /**
 * Quartz 4 Configuration
 *
@@ -52,6 +64,7 @@ const config: QuartzConfig = {
        },
      },
    },
+    semanticSearch,
  },
  plugins: {
    transformers: [
@@ -84,6 +97,7 @@ const config: QuartzConfig = {
        enableSiteMap: true,
        enableRSS: true,
      }),
+      Plugin.SemanticIndex(semanticSearch),
      Plugin.Assets(),
      Plugin.Static(),
      Plugin.Favicon(),
--- a/quartz/cfg.ts
+++ b/quartz/cfg.ts
@@ -78,6 +78,34 @@ export interface GlobalConfiguration {
   * Region Codes: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
   */
  locale: ValidLocale
+  /** Semantic search configuration */
+  semanticSearch?: {
+    enable: boolean
+    model: string
+    aot: boolean
+    dtype: "fp32" | "fp16"
+    dims: number
+    shardSizeRows: number
+    manifestUrl?: string
+    manifestBaseUrl?: string
+    disableCache?: boolean
+    hnsw: {
+      M: number
+      efConstruction: number
+      efSearch?: number
+    }
+    chunking: {
+      chunkSize: number
+      chunkOverlap: number
+      noChunking?: boolean
+    }
+    vllm?: {
+      enable: boolean
+      vllmUrl?: string
+      concurrency: number
+      batchSize: number
+    }
+  }
 }

 export interface QuartzConfig {
--- a/quartz/components/Search.tsx
+++ b/quartz/components/Search.tsx
@@ -7,10 +7,12 @@ import { i18n } from "../i18n"

 export interface SearchOptions {
  enablePreview: boolean
+  includeButton: boolean
 }

 const defaultOptions: SearchOptions = {
  enablePreview: true,
+  includeButton: true,
 }

 export default ((userOpts?: Partial<SearchOptions>) => {
@@ -29,19 +31,54 @@ export default ((userOpts?: Partial<SearchOptions>) => {
          </svg>
          <p>{i18n(cfg.locale).components.search.title}</p>
        </button>
-        <div class="search-container">
-          <div class="search-space">
-            <input
-              autocomplete="off"
-              class="search-bar"
-              name="search"
-              type="text"
-              aria-label={searchPlaceholder}
-              placeholder={searchPlaceholder}
-            />
-            <div class="search-layout" data-preview={opts.enablePreview}></div>
-          </div>
-        </div>
+        <search class="search-container">
+          <form class="search-space">
+            <div class="input-container">
+              <input
+                autocomplete="off"
+                class="search-bar"
+                name="search"
+                type="text"
+                aria-label={searchPlaceholder}
+                placeholder={searchPlaceholder}
+              />
+              <div class="search-mode-toggle" role="radiogroup" aria-label="Search mode">
+                <button
+                  type="button"
+                  class="mode-option"
+                  data-mode="lexical"
+                  aria-pressed="true"
+                  aria-label="Full-text search"
+                >
+                  <svg viewBox="0 0 20 20" role="img" aria-hidden="true">
+                    <g fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round">
+                      <path d="M4 6h12M4 10h8M4 14h6" />
+                    </g>
+                  </svg>
+                  <span class="sr-only">Full-text</span>
+                </button>
+                <button
+                  type="button"
+                  class="mode-option"
+                  data-mode="semantic"
+                  aria-pressed="false"
+                  aria-label="Semantic search"
+                >
+                  <svg viewBox="0 0 20 20" role="img" aria-hidden="true">
+                    <g fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round">
+                      <circle cx="5.2" cy="10" r="2.4" />
+                      <circle cx="14.8" cy="4.8" r="2.1" />
+                      <circle cx="14.8" cy="15.2" r="2.1" />
+                      <path d="M7.1 8.7l5.2-2.4M7.1 11.3l5.2 2.4M14.8 6.9v6.2" />
+                    </g>
+                  </svg>
+                  <span class="sr-only">Semantic</span>
+                </button>
+              </div>
+            </div>
+            <output class="search-layout" data-preview={opts.enablePreview} />
+          </form>
+        </search>
      </div>
    )
  }
--- a/quartz/components/renderPage.tsx
+++ b/quartz/components/renderPage.tsx
@@ -25,6 +25,7 @@ const headerRegex = new RegExp(/h[1-6]/)
 export function pageResources(
  baseDir: FullSlug | RelativeURL,
  staticResources: StaticResources,
+  cfg?: GlobalConfiguration,
 ): StaticResources {
  const contentIndexPath = joinSegments(baseDir, "static/contentIndex.json")
  const contentIndexScript = `const fetchData = fetch("${contentIndexPath}").then(data => data.json())`
@@ -48,6 +49,12 @@ export function pageResources(
        spaPreserve: true,
        script: contentIndexScript,
      },
+      {
+        loadTime: "beforeDOMReady",
+        contentType: "inline",
+        spaPreserve: true,
+        script: `const semanticCfg = ${JSON.stringify(cfg?.semanticSearch ?? {})};`,
+      },
      ...staticResources.js,
    ],
    additionalHead: staticResources.additionalHead,
--- a/quartz/components/scripts/search.inline.ts
+++ b/quartz/components/scripts/search.inline.ts
@@ -1,6 +1,7 @@
-import FlexSearch, { DefaultDocumentSearchResults } from "flexsearch"
+import FlexSearch, { DefaultDocumentSearchResults, Id } from "flexsearch"
 import { ContentDetails } from "../../plugins/emitters/contentIndex"
-import { registerEscapeHandler, removeAllChildren } from "./util"
+import { SemanticClient, type SemanticResult } from "./semantic.inline"
+import { registerEscapeHandler, removeAllChildren, fetchCanonical } from "./util"
 import { FullSlug, normalizeRelativeURLs, resolveRelative } from "../../util/path"

 interface Item {
@@ -14,43 +15,46 @@ interface Item {

 // Can be expanded with things like "term" in the future
 type SearchType = "basic" | "tags"
-let searchType: SearchType = "basic"
-let currentSearchTerm: string = ""
-const encoder = (str: string) => {
-  return str
-    .toLowerCase()
-    .split(/\s+/)
-    .filter((token) => token.length > 0)
+type SearchMode = "lexical" | "semantic"
+const SEARCH_MODE_STORAGE_KEY = "quartz:search:mode"
+
+const loadStoredSearchMode = (): SearchMode | null => {
+  if (typeof window === "undefined") {
+    return null
+  }
+
+  try {
+    const stored = window.localStorage.getItem(SEARCH_MODE_STORAGE_KEY)
+    return stored === "lexical" || stored === "semantic" ? stored : null
+  } catch (err) {
+    console.warn("[Search] failed to read stored search mode:", err)
+    return null
+  }
 }

-let index = new FlexSearch.Document<Item>({
-  encode: encoder,
-  document: {
-    id: "id",
-    tag: "tags",
-    index: [
-      {
-        field: "title",
-        tokenize: "forward",
-      },
-      {
-        field: "content",
-        tokenize: "forward",
-      },
-      {
-        field: "tags",
-        tokenize: "forward",
-      },
-    ],
-  },
-})
+const persistSearchMode = (mode: SearchMode) => {
+  if (typeof window === "undefined") {
+    return
+  }
+
+  try {
+    window.localStorage.setItem(SEARCH_MODE_STORAGE_KEY, mode)
+  } catch (err) {
+    console.warn("[Search] failed to persist search mode:", err)
+  }
+}
+
+let searchMode: SearchMode = "lexical"
+let currentSearchTerm: string = ""
+let rawSearchTerm: string = ""
+let semantic: SemanticClient | null = null
+let semanticReady = false
+let semanticInitFailed = false
+type SimilarityResult = { item: Item; similarity: number }
+let chunkMetadata: Record<string, { parentSlug: string; chunkId: number }> = {}
+let manifestIds: string[] = []

-const p = new DOMParser()
-const fetchContentCache: Map<FullSlug, Element[]> = new Map()
 const contextWindowWords = 30
-const numSearchResults = 8
-const numTagResults = 5
-
 const tokenizeTerm = (term: string) => {
  const tokens = term.split(/\s+/).filter((t) => t.trim() !== "")
  const tokenLen = tokens.length
@@ -108,6 +112,102 @@ function highlight(searchTerm: string, text: string, trim?: boolean) {
  }`
 }

+// To be used with search and everything else with flexsearch
+const encoder = (str: string) =>
+  str
+    .toLowerCase()
+    .split(/\s+/)
+    .filter((token) => token.length > 0)
+
+/**
+ * Get parent document slug for a chunk ID
+ */
+function getParentSlug(slug: string): string {
+  const meta = chunkMetadata[slug]
+  return meta ? meta.parentSlug : slug
+}
+
+/**
+ * Aggregate semantic search results from chunks to documents using RRF
+ * @param results Raw semantic results (chunk-level)
+ * @param slugToDocIndex Map from document slug to index in idDataMap
+ * @returns Object with rrfScores (for ranking) and maxScores (for display)
+ */
+function aggregateChunkResults(
+  results: SemanticResult[],
+  slugToDocIndex: Map<FullSlug, number>,
+): { rrfScores: Map<number, number>; maxScores: Map<number, number> } {
+  // Group chunks by parent document
+  const docChunks = new Map<string, Array<{ score: number }>>()
+
+  results.forEach(({ id, score }) => {
+    // id is an index into manifestIds (the chunk IDs from embeddings)
+    const chunkSlug = manifestIds[id]
+    if (!chunkSlug) return
+
+    // Get parent document slug
+    const parentSlug = getParentSlug(chunkSlug)
+
+    if (!docChunks.has(parentSlug)) {
+      docChunks.set(parentSlug, [])
+    }
+
+    docChunks.get(parentSlug)!.push({ score })
+  })
+
+  // Apply RRF for ranking and track max similarity for display
+  const rrfScores = new Map<number, number>()
+  const maxScores = new Map<number, number>()
+  const RRF_K = 60
+
+  for (const [parentSlug, chunks] of docChunks) {
+    const docIdx = slugToDocIndex.get(parentSlug as FullSlug)
+    if (typeof docIdx !== "number") continue
+
+    // Sort chunks by score descending to assign per-document ranks
+    chunks.sort((a, b) => b.score - a.score)
+
+    // RRF formula: sum(1 / (k + rank)) across all chunks, using per-document ranks
+    const rrfScore = chunks.reduce((sum, _, rank) => sum + 1.0 / (RRF_K + rank), 0)
+
+    // Max similarity score for display (original 0-1 range)
+    const maxScore = chunks[0].score
+
+    rrfScores.set(docIdx, rrfScore)
+    maxScores.set(docIdx, maxScore)
+  }
+
+  return { rrfScores, maxScores }
+}
+
+// Initialize the FlexSearch Document instance with the appropriate configuration
+const index = new FlexSearch.Document<Item>({
+  tokenize: "forward",
+  encode: encoder,
+  document: {
+    id: "id",
+    tag: "tags",
+    index: [
+      {
+        field: "title",
+        tokenize: "forward",
+      },
+      {
+        field: "content",
+        tokenize: "forward",
+      },
+      {
+        field: "tags",
+        tokenize: "forward",
+      },
+    ],
+  },
+})
+
+const p = new DOMParser()
+const fetchContentCache: Map<FullSlug, Element[]> = new Map()
+const numSearchResults = 10
+const numTagResults = 10
 function highlightHTML(searchTerm: string, el: HTMLElement) {
  const p = new DOMParser()
  const tokenizedTerms = tokenizeTerm(searchTerm)
@@ -149,7 +249,11 @@ function highlightHTML(searchTerm: string, el: HTMLElement) {
  return html.body
 }

-async function setupSearch(searchElement: Element, currentSlug: FullSlug, data: ContentIndex) {
+async function setupSearch(
+  searchElement: HTMLDivElement,
+  currentSlug: FullSlug,
+  data: ContentIndex,
+) {
  const container = searchElement.querySelector(".search-container") as HTMLElement
  if (!container) return

@@ -164,12 +268,183 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
  const searchLayout = searchElement.querySelector(".search-layout") as HTMLElement
  if (!searchLayout) return

+  const searchSpace = searchElement?.querySelector(".search-space") as HTMLFormElement
+  if (!searchSpace) return
+
+  // Create semantic search progress bar
+  const progressBar = document.createElement("div")
+  progressBar.className = "semantic-search-progress"
+  progressBar.style.cssText = `
+    position: absolute;
+    bottom: 0;
+    left: 0;
+    height: 2px;
+    width: 0;
+    background: var(--secondary);
+    transition: width 0.3s ease, opacity 0.3s ease;
+    opacity: 0;
+    z-index: 9999;
+  `
+  searchBar.parentElement?.appendChild(progressBar)
+
+  const startSemanticProgress = () => {
+    progressBar.style.opacity = "1"
+    progressBar.style.width = "0"
+    setTimeout(() => {
+      progressBar.style.width = "100%"
+    }, 10)
+  }
+
+  const completeSemanticProgress = () => {
+    progressBar.style.opacity = "0"
+    setTimeout(() => {
+      progressBar.style.width = "0"
+    }, 300)
+  }
+
+  const resetProgressBar = () => {
+    progressBar.style.opacity = "0"
+    progressBar.style.width = "0"
+  }
+
  const idDataMap = Object.keys(data) as FullSlug[]
+  const slugToIndex = new Map<FullSlug, number>()
+  idDataMap.forEach((slug, idx) => slugToIndex.set(slug, idx))
+  const modeToggle = searchSpace.querySelector(".search-mode-toggle") as HTMLDivElement | null
+  const modeButtons = modeToggle
+    ? Array.from(modeToggle.querySelectorAll<HTMLButtonElement>(".mode-option"))
+    : []
+
  const appendLayout = (el: HTMLElement) => {
    searchLayout.appendChild(el)
  }

  const enablePreview = searchLayout.dataset.preview === "true"
+  if (!semantic && !semanticInitFailed) {
+    const client = new SemanticClient(semanticCfg)
+    try {
+      await client.ensureReady()
+      semantic = client
+      semanticReady = true
+
+      // Load chunk metadata and IDs from manifest
+      try {
+        const manifestUrl = "/embeddings/manifest.json"
+        const res = await fetch(manifestUrl)
+        if (res.ok) {
+          const manifest = await res.json()
+          chunkMetadata = manifest.chunkMetadata || {}
+          manifestIds = manifest.ids || []
+          console.debug(
+            `[Search] Loaded manifest: ${manifestIds.length} chunks, ${Object.keys(chunkMetadata).length} chunked documents`,
+          )
+        }
+      } catch (err) {
+        console.warn("[Search] failed to load chunk metadata:", err)
+        chunkMetadata = {}
+        manifestIds = []
+      }
+    } catch (err) {
+      console.warn("[SemanticClient] initialization failed:", err)
+      client.dispose()
+      semantic = null
+      semanticReady = false
+      semanticInitFailed = true
+    }
+  } else if (semantic && !semanticReady) {
+    try {
+      await semantic.ensureReady()
+      semanticReady = true
+    } catch (err) {
+      console.warn("[SemanticClient] became unavailable:", err)
+      semantic.dispose()
+      semantic = null
+      semanticReady = false
+      semanticInitFailed = true
+    }
+  }
+  const storedMode = loadStoredSearchMode()
+  if (storedMode === "semantic") {
+    if (semanticReady) {
+      searchMode = storedMode
+    }
+  } else if (storedMode === "lexical") {
+    searchMode = storedMode
+  }
+  if (!semanticReady && searchMode === "semantic") {
+    searchMode = "lexical"
+  }
+  let searchSeq = 0
+  let runSearchTimer: number | null = null
+  let lastInputAt = 0
+  searchLayout.dataset.mode = searchMode
+
+  const updateModeUI = (mode: SearchMode) => {
+    modeButtons.forEach((button) => {
+      const btnMode = (button.dataset.mode as SearchMode) ?? "lexical"
+      const isActive = btnMode === mode
+      button.classList.toggle("active", isActive)
+      button.setAttribute("aria-pressed", String(isActive))
+    })
+    if (modeToggle) {
+      modeToggle.dataset.mode = mode
+    }
+    searchLayout.dataset.mode = mode
+  }
+
+  const computeDebounceDelay = (term: string): number => {
+    const trimmed = term.trim()
+    const lastTerm = currentSearchTerm
+    const isExtension =
+      lastTerm.length > 0 && trimmed.length > lastTerm.length && trimmed.startsWith(lastTerm)
+    const isRetraction = lastTerm.length > trimmed.length
+    const isReplacement =
+      lastTerm.length > 0 && !trimmed.startsWith(lastTerm) && !lastTerm.startsWith(trimmed)
+    const baseFullQueryDelay = 200
+    const semanticPenalty = searchMode === "semantic" ? 60 : 0
+
+    if (isExtension && trimmed.length > 2) {
+      return baseFullQueryDelay + semanticPenalty
+    }
+
+    if (isReplacement && trimmed.length > 3) {
+      return Math.max(90, baseFullQueryDelay - 80)
+    }
+
+    if (isRetraction) {
+      return 90
+    }
+
+    return baseFullQueryDelay + (searchMode === "semantic" ? 40 : 0)
+  }
+
+  const triggerSearchWithMode = (mode: SearchMode) => {
+    if (mode === "semantic" && !semanticReady) {
+      return
+    }
+    if (searchMode === mode) return
+    searchMode = mode
+    updateModeUI(mode)
+    persistSearchMode(searchMode)
+    if (rawSearchTerm.trim() !== "") {
+      searchLayout.classList.add("display-results")
+      const token = ++searchSeq
+      void runSearch(rawSearchTerm, token)
+    }
+  }
+
+  updateModeUI(searchMode)
+
+  modeButtons.forEach((button) => {
+    const btnMode = (button.dataset.mode as SearchMode) ?? "lexical"
+    if (btnMode === "semantic") {
+      button.disabled = !semanticReady
+      button.setAttribute("aria-disabled", String(!semanticReady))
+    }
+    const handler = () => triggerSearchWithMode(btnMode)
+    button.addEventListener("click", handler)
+    window.addCleanup(() => button.removeEventListener("click", handler))
+  })
  let preview: HTMLDivElement | undefined = undefined
  let previewInner: HTMLDivElement | undefined = undefined
  const results = document.createElement("div")
@@ -191,20 +466,23 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
      removeAllChildren(preview)
    }
    searchLayout.classList.remove("display-results")
-    searchType = "basic" // reset search type after closing
    searchButton.focus()
+    resetProgressBar()
  }

-  function showSearch(searchTypeNew: SearchType) {
-    searchType = searchTypeNew
-    if (sidebar) sidebar.style.zIndex = "1"
+  function showSearch(type: SearchType) {
    container.classList.add("active")
+    if (type === "tags") {
+      searchBar.value = "#"
+      rawSearchTerm = "#"
+    }
    searchBar.focus()
  }

  let currentHover: HTMLInputElement | null = null
+
  async function shortcutHandler(e: HTMLElementEventMap["keydown"]) {
-    if (e.key === "k" && (e.ctrlKey || e.metaKey) && !e.shiftKey) {
+    if ((e.key === "/" || e.key === "k") && (e.ctrlKey || e.metaKey) && !e.shiftKey) {
      e.preventDefault()
      const searchBarOpen = container.classList.contains("active")
      searchBarOpen ? hideSearch() : showSearch("basic")
@@ -214,9 +492,6 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
      e.preventDefault()
      const searchBarOpen = container.classList.contains("active")
      searchBarOpen ? hideSearch() : showSearch("tags")
-
-      // add "#" prefix for tag search
-      searchBar.value = "#"
      return
    }

@@ -226,20 +501,29 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:

    // If search is active, then we will render the first result and display accordingly
    if (!container.classList.contains("active")) return
-    if (e.key === "Enter" && !e.isComposing) {
+    if (e.key === "Enter") {
      // If result has focus, navigate to that one, otherwise pick first result
+      let anchor: HTMLAnchorElement | undefined
      if (results.contains(document.activeElement)) {
-        const active = document.activeElement as HTMLInputElement
-        if (active.classList.contains("no-match")) return
-        await displayPreview(active)
-        active.click()
+        anchor = document.activeElement as HTMLAnchorElement
+        if (anchor.classList.contains("no-match")) return
+        await displayPreview(anchor)
+        e.preventDefault()
+        anchor.click()
      } else {
-        const anchor = document.getElementsByClassName("result-card")[0] as HTMLInputElement | null
+        anchor = document.getElementsByClassName("result-card")[0] as HTMLAnchorElement
        if (!anchor || anchor.classList.contains("no-match")) return
        await displayPreview(anchor)
+        e.preventDefault()
        anchor.click()
      }
-    } else if (e.key === "ArrowUp" || (e.shiftKey && e.key === "Tab")) {
+      if (anchor !== undefined)
+        window.spaNavigate(new URL(new URL(anchor.href).pathname, window.location.toString()))
+    } else if (
+      e.key === "ArrowUp" ||
+      (e.shiftKey && e.key === "Tab") ||
+      (e.ctrlKey && e.key === "p")
+    ) {
      e.preventDefault()
      if (results.contains(document.activeElement)) {
        // If an element in results-container already has focus, focus previous one
@@ -252,7 +536,7 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
        if (prevResult) currentHover = prevResult
        await displayPreview(prevResult)
      }
-    } else if (e.key === "ArrowDown" || e.key === "Tab") {
+    } else if (e.key === "ArrowDown" || e.key === "Tab" || (e.ctrlKey && e.key === "n")) {
      e.preventDefault()
      // The results should already been focused, so we need to find the next one.
      // The activeElement is the search bar, so we need to find the first result and focus it.
@@ -269,25 +553,33 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
    }
  }

-  const formatForDisplay = (term: string, id: number) => {
+  const formatForDisplay = (term: string, id: number, renderType: SearchType) => {
    const slug = idDataMap[id]
+
+    // Check if query contains title words (for boosting exact matches)
+    const queryTokens = tokenizeTerm(term)
+    const titleTokens = tokenizeTerm(data[slug].title ?? "")
+    const titleMatch = titleTokens.some((t) => queryTokens.includes(t))
+
    return {
      id,
      slug,
-      title: searchType === "tags" ? data[slug].title : highlight(term, data[slug].title ?? ""),
+      title: renderType === "tags" ? data[slug].title : highlight(term, data[slug].title ?? ""),
      content: highlight(term, data[slug].content ?? "", true),
-      tags: highlightTags(term.substring(1), data[slug].tags),
+      tags: highlightTags(term, data[slug].tags, renderType),
+      titleMatch, // Add title match flag for boosting
    }
  }

-  function highlightTags(term: string, tags: string[]) {
-    if (!tags || searchType !== "tags") {
+  function highlightTags(term: string, tags: string[], renderType: SearchType) {
+    if (!tags || renderType !== "tags") {
      return []
    }

+    const tagTerm = term.toLowerCase()
    return tags
      .map((tag) => {
-        if (tag.toLowerCase().includes(term.toLowerCase())) {
+        if (tag.toLowerCase().includes(tagTerm)) {
          return `<li><p class="match-tag">#${tag}</p></li>`
        } else {
          return `<li><p>#${tag}</p></li>`
@@ -300,24 +592,40 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
    return new URL(resolveRelative(currentSlug, slug), location.toString())
  }

-  const resultToHTML = ({ slug, title, content, tags }: Item) => {
+  const resultToHTML = ({ item, percent }: { item: Item; percent: number | null }) => {
+    const { slug, title, content, tags, target } = item
    const htmlTags = tags.length > 0 ? `<ul class="tags">${tags.join("")}</ul>` : ``
    const itemTile = document.createElement("a")
+    const titleContent = target ? highlight(currentSearchTerm, target) : title
+    const subscript = target ? `<b>${slug}</b>` : ``
+    let percentLabel = "—"
+    let percentAttr = ""
+    if (percent !== null && Number.isFinite(percent)) {
+      const bounded = Math.max(0, Math.min(100, percent))
+      percentLabel = `${bounded.toFixed(1)}%`
+      percentAttr = bounded.toFixed(3)
+    }
    itemTile.classList.add("result-card")
    itemTile.id = slug
    itemTile.href = resolveUrl(slug).toString()
-    itemTile.innerHTML = `
-      <h3 class="card-title">${title}</h3>
-      ${htmlTags}
-      <p class="card-description">${content}</p>
-    `
-    itemTile.addEventListener("click", (event) => {
-      if (event.altKey || event.ctrlKey || event.metaKey || event.shiftKey) return
-      hideSearch()
-    })
+    itemTile.innerHTML = `<hgroup>
+      <h3>${titleContent}</h3>
+      ${subscript}${htmlTags}
+      ${searchMode === "semantic" ? `<span class="result-likelihood" title="match likelihood">&nbsp;${percentLabel}</span>` : ""}
+      ${enablePreview && window.innerWidth > 600 ? "" : `<p>${content}</p>`}
+    </hgroup>`
+    if (percentAttr) itemTile.dataset.scorePercent = percentAttr
+    else delete itemTile.dataset.scorePercent

-    const handler = (event: MouseEvent) => {
-      if (event.altKey || event.ctrlKey || event.metaKey || event.shiftKey) return
+    const handler = (evt: MouseEvent) => {
+      if (evt.altKey || evt.ctrlKey || evt.metaKey || evt.shiftKey) return
+      const anchor = evt.currentTarget as HTMLAnchorElement | null
+      if (!anchor) return
+      evt.preventDefault()
+      const href = anchor.getAttribute("href")
+      if (!href) return
+      const url = new URL(href, window.location.toString())
+      window.spaNavigate(url)
      hideSearch()
    }

@@ -335,15 +643,22 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
    return itemTile
  }

-  async function displayResults(finalResults: Item[]) {
+  async function displayResults(finalResults: SimilarityResult[]) {
    removeAllChildren(results)
    if (finalResults.length === 0) {
      results.innerHTML = `<a class="result-card no-match">
          <h3>No results.</h3>
          <p>Try another search term?</p>
      </a>`
+      currentHover = null
    } else {
-      results.append(...finalResults.map(resultToHTML))
+      const decorated = finalResults.map(({ item, similarity }) => {
+        if (!Number.isFinite(similarity)) return { item, percent: null }
+        const bounded = Math.max(-1, Math.min(1, similarity))
+        const percent = ((bounded + 1) / 2) * 100
+        return { item, percent }
+      })
+      results.append(...decorated.map(resultToHTML))
    }

    if (finalResults.length === 0 && preview) {
@@ -363,8 +678,8 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
      return fetchContentCache.get(slug) as Element[]
    }

-    const targetUrl = resolveUrl(slug).toString()
-    const contents = await fetch(targetUrl)
+    const targetUrl = resolveUrl(slug)
+    const contents = await fetchCanonical(targetUrl)
      .then((res) => res.text())
      .then((contents) => {
        if (contents === undefined) {
@@ -394,73 +709,296 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
    const highlights = [...preview.getElementsByClassName("highlight")].sort(
      (a, b) => b.innerHTML.length - a.innerHTML.length,
    )
-    highlights[0]?.scrollIntoView({ block: "start" })
+    if (highlights.length > 0) {
+      const highlight = highlights[0]
+      const container = preview
+      if (container && highlight) {
+        // Get the relative positions
+        const containerRect = container.getBoundingClientRect()
+        const highlightRect = highlight.getBoundingClientRect()
+        // Calculate the scroll position relative to the container
+        const relativeTop = highlightRect.top - containerRect.top + container.scrollTop - 20 // 20px buffer
+        // Smoothly scroll the container
+        container.scrollTo({
+          top: relativeTop,
+          behavior: "smooth",
+        })
+      }
+    }
  }

-  async function onType(e: HTMLElementEventMap["input"]) {
+  async function runSearch(rawTerm: string, token: number) {
    if (!searchLayout || !index) return
-    currentSearchTerm = (e.target as HTMLInputElement).value
-    searchLayout.classList.toggle("display-results", currentSearchTerm !== "")
-    searchType = currentSearchTerm.startsWith("#") ? "tags" : "basic"
+    const trimmed = rawTerm.trim()
+    if (trimmed === "") {
+      removeAllChildren(results)
+      if (preview) {
+        removeAllChildren(preview)
+      }
+      currentHover = null
+      searchLayout.classList.remove("display-results")
+      resetProgressBar()
+      return
+    }

-    let searchResults: DefaultDocumentSearchResults<Item>
-    if (searchType === "tags") {
-      currentSearchTerm = currentSearchTerm.substring(1).trim()
-      const separatorIndex = currentSearchTerm.indexOf(" ")
-      if (separatorIndex != -1) {
-        // search by title and content index and then filter by tag (implemented in flexsearch)
-        const tag = currentSearchTerm.substring(0, separatorIndex)
-        const query = currentSearchTerm.substring(separatorIndex + 1).trim()
-        searchResults = await index.searchAsync({
-          query: query,
-          // return at least 10000 documents, so it is enough to filter them by tag (implemented in flexsearch)
+    const modeForRanking: SearchMode = searchMode
+    const initialType: SearchType = trimmed.startsWith("#") ? "tags" : "basic"
+    let workingType: SearchType = initialType
+    let highlightTerm = trimmed
+    let tagTerm = ""
+    let searchResults: DefaultDocumentSearchResults<Item> = []
+
+    if (initialType === "tags") {
+      tagTerm = trimmed.substring(1).trim()
+      const separatorIndex = tagTerm.indexOf(" ")
+      if (separatorIndex !== -1) {
+        const tag = tagTerm.substring(0, separatorIndex).trim()
+        const query = tagTerm.substring(separatorIndex + 1).trim()
+        const results = await index.searchAsync({
+          query,
          limit: Math.max(numSearchResults, 10000),
          index: ["title", "content"],
          tag: { tags: tag },
        })
-        for (let searchResult of searchResults) {
-          searchResult.result = searchResult.result.slice(0, numSearchResults)
-        }
-        // set search type to basic and remove tag from term for proper highlightning and scroll
-        searchType = "basic"
-        currentSearchTerm = query
+        if (token !== searchSeq) return
+        searchResults = Object.values(results)
+        workingType = "basic"
+        highlightTerm = query
      } else {
-        // default search by tags index
-        searchResults = await index.searchAsync({
-          query: currentSearchTerm,
+        const results = await index.searchAsync({
+          query: tagTerm,
          limit: numSearchResults,
          index: ["tags"],
        })
+        if (token !== searchSeq) return
+        searchResults = Object.values(results)
+        highlightTerm = tagTerm
      }
-    } else if (searchType === "basic") {
-      searchResults = await index.searchAsync({
-        query: currentSearchTerm,
+    } else {
+      const results = await index.searchAsync({
+        query: highlightTerm,
        limit: numSearchResults,
        index: ["title", "content"],
      })
+      if (token !== searchSeq) return
+      searchResults = Object.values(results)
+    }
+
+    const coerceIds = (hit?: DefaultDocumentSearchResults<Item>[number]): number[] => {
+      if (!hit) return []
+      return hit.result
+        .map((value: Id) => {
+          if (typeof value === "number") {
+            return value
+          }
+          const parsed = Number.parseInt(String(value), 10)
+          return Number.isNaN(parsed) ? null : parsed
+        })
+        .filter((value): value is number => value !== null)
    }

    const getByField = (field: string): number[] => {
-      const results = searchResults.filter((x) => x.field === field)
-      return results.length === 0 ? [] : ([...results[0].result] as number[])
+      const hit = searchResults.find((x) => x.field === field)
+      return coerceIds(hit)
    }

-    // order titles ahead of content
    const allIds: Set<number> = new Set([
      ...getByField("title"),
      ...getByField("content"),
      ...getByField("tags"),
    ])
-    const finalResults = [...allIds].map((id) => formatForDisplay(currentSearchTerm, id))
-    await displayResults(finalResults)
+
+    currentSearchTerm = highlightTerm
+
+    const candidateItems = new Map<string, Item>()
+    const ensureItem = (id: number): Item | null => {
+      const slug = idDataMap[id]
+      if (!slug) return null
+      const cached = candidateItems.get(slug)
+      if (cached) return cached
+      const item = formatForDisplay(highlightTerm, id, workingType)
+      if (item) {
+        candidateItems.set(slug, item)
+        return item
+      }
+      return null
+    }
+
+    const baseIndices: number[] = []
+    for (const id of allIds) {
+      const item = ensureItem(id)
+      if (!item) continue
+      const idx = slugToIndex.get(item.slug)
+      if (typeof idx === "number") {
+        baseIndices.push(idx)
+      }
+    }
+
+    let semanticIds: number[] = []
+    const semanticSimilarity = new Map<number, number>()
+
+    const integrateIds = (ids: number[]) => {
+      ids.forEach((docId) => {
+        ensureItem(docId)
+      })
+    }
+
+    const orchestrator = semanticReady && semantic ? semantic : null
+
+    const resolveSimilarity = (item: Item): number => {
+      const semanticHit = semanticSimilarity.get(item.id)
+      return semanticHit ?? Number.NaN
+    }
+
+    const render = async () => {
+      if (token !== searchSeq) return
+      const useSemantic = semanticReady && semanticIds.length > 0
+      const weights =
+        modeForRanking === "semantic" && useSemantic
+          ? { base: 0.3, semantic: 1.0 }
+          : { base: 1.0, semantic: useSemantic ? 0.3 : 0 }
+      const rrf = new Map<string, number>()
+      const push = (ids: number[], weight: number, applyTitleBoost: boolean = false) => {
+        if (!ids.length || weight <= 0) return
+        ids.forEach((docId, rank) => {
+          const slug = idDataMap[docId]
+          if (!slug) return
+          const item = ensureItem(docId)
+          if (!item) return
+
+          // Apply title boost for FlexSearch results (1.5x boost for exact title matches)
+          let effectiveWeight = weight
+          if (applyTitleBoost && item.titleMatch) {
+            effectiveWeight *= 1.5
+          }
+
+          const prev = rrf.get(slug) ?? 0
+          rrf.set(slug, prev + effectiveWeight / (1 + rank))
+        })
+      }
+
+      push(baseIndices, weights.base, true) // FlexSearch with title boost
+      push(semanticIds, weights.semantic, false) // Semantic without boost
+
+      const rankedEntries = Array.from(candidateItems.values())
+        .map((item) => ({ item, score: rrf.get(item.slug) ?? 0 }))
+        .sort((a, b) => b.score - a.score)
+        .slice(0, numSearchResults)
+
+      const displayEntries: SimilarityResult[] = []
+      for (const entry of rankedEntries) {
+        const similarity = resolveSimilarity(entry.item)
+        displayEntries.push({ item: entry.item, similarity })
+      }
+
+      await displayResults(displayEntries)
+    }
+
+    await render()
+
+    if (workingType === "tags" || !orchestrator || !semanticReady || highlightTerm.length < 2) {
+      return
+    }
+
+    const showProgress = modeForRanking === "semantic"
+    if (showProgress) {
+      startSemanticProgress()
+    }
+
+    try {
+      const { semantic: semRes } = await orchestrator.search(
+        highlightTerm,
+        numSearchResults * 3, // Request more chunks to ensure good document coverage
+      )
+      if (token !== searchSeq) {
+        if (showProgress) completeSemanticProgress()
+        return
+      }
+
+      // Aggregate chunk results to document level using RRF
+      const { rrfScores: semRrfScores, maxScores: semMaxScores } = aggregateChunkResults(
+        semRes,
+        slugToIndex,
+      )
+
+      // Use RRF scores for ranking
+      semanticIds = Array.from(semRrfScores.entries())
+        .sort((a, b) => b[1] - a[1])
+        .slice(0, numSearchResults)
+        .map(([docIdx]) => docIdx)
+
+      // Use max chunk similarity for display (0-1 range)
+      semanticSimilarity.clear()
+      semMaxScores.forEach((score, docIdx) => {
+        semanticSimilarity.set(docIdx, score)
+      })
+
+      integrateIds(semanticIds)
+      if (showProgress) completeSemanticProgress()
+    } catch (err) {
+      console.warn("[SemanticClient] search failed:", err)
+      if (showProgress) completeSemanticProgress()
+      orchestrator.dispose()
+      semantic = null
+      semanticReady = false
+      semanticInitFailed = true
+      if (searchMode === "semantic") {
+        searchMode = "lexical"
+        updateModeUI(searchMode)
+      }
+      modeButtons.forEach((button) => {
+        if ((button.dataset.mode as SearchMode) === "semantic") {
+          button.disabled = true
+          button.setAttribute("aria-disabled", "true")
+        }
+      })
+    }
+
+    await render()
+  }
+
+  function onType(e: HTMLElementEventMap["input"]) {
+    if (!searchLayout || !index) return
+    rawSearchTerm = (e.target as HTMLInputElement).value
+    const hasQuery = rawSearchTerm.trim() !== ""
+    searchLayout.classList.toggle("display-results", hasQuery)
+    const term = rawSearchTerm
+    const token = ++searchSeq
+    if (runSearchTimer !== null) {
+      window.clearTimeout(runSearchTimer)
+      runSearchTimer = null
+    }
+    if (!hasQuery) {
+      void runSearch("", token)
+      return
+    }
+    const now = performance.now()
+    lastInputAt = now
+    const delay = computeDebounceDelay(term)
+    const scheduledAt = lastInputAt
+    runSearchTimer = window.setTimeout(() => {
+      if (scheduledAt !== lastInputAt) {
+        return
+      }
+      runSearchTimer = null
+      void runSearch(term, token)
+    }, delay)
  }

  document.addEventListener("keydown", shortcutHandler)
  window.addCleanup(() => document.removeEventListener("keydown", shortcutHandler))
-  searchButton.addEventListener("click", () => showSearch("basic"))
-  window.addCleanup(() => searchButton.removeEventListener("click", () => showSearch("basic")))
+  const openHandler = () => showSearch("basic")
+  searchButton.addEventListener("click", openHandler)
+  window.addCleanup(() => searchButton.removeEventListener("click", openHandler))
  searchBar.addEventListener("input", onType)
  window.addCleanup(() => searchBar.removeEventListener("input", onType))
+  window.addCleanup(() => {
+    if (runSearchTimer !== null) {
+      window.clearTimeout(runSearchTimer)
+      runSearchTimer = null
+    }
+    resetProgressBar()
+  })

  registerEscapeHandler(container, hideSearch)
  await fillDocument(data)
@@ -468,17 +1006,17 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:

 /**
 * Fills flexsearch document with data
- * @param index index to fill
 * @param data data to fill index with
 */
 let indexPopulated = false
 async function fillDocument(data: ContentIndex) {
  if (indexPopulated) return
  let id = 0
-  const promises: Array<Promise<unknown>> = []
+  const promises = []
  for (const [slug, fileData] of Object.entries<ContentDetails>(data)) {
    promises.push(
-      index.addAsync(id++, {
+      //@ts-ignore
+      index.addAsync({
        id,
        slug: slug as FullSlug,
        title: fileData.title,
@@ -486,6 +1024,7 @@ async function fillDocument(data: ContentIndex) {
        tags: fileData.tags,
      }),
    )
+    id++
  }

  await Promise.all(promises)
@@ -495,7 +1034,9 @@ async function fillDocument(data: ContentIndex) {
 document.addEventListener("nav", async (e: CustomEventMap["nav"]) => {
  const currentSlug = e.detail.url
  const data = await fetchData
-  const searchElement = document.getElementsByClassName("search")
+  const searchElement = document.getElementsByClassName(
+    "search",
+  ) as HTMLCollectionOf<HTMLDivElement>
  for (const element of searchElement) {
    await setupSearch(element, currentSlug, data)
  }
--- a/quartz/components/scripts/semantic.inline.ts
+++ b/quartz/components/scripts/semantic.inline.ts
@@ -0,0 +1,182 @@
+export type SemanticResult = { id: number; score: number }
+
+type ProgressMessage = {
+  type: "progress"
+  loadedRows: number
+  totalRows: number
+}
+
+type ReadyMessage = { type: "ready" }
+
+type ResultMessage = {
+  type: "search-result"
+  seq: number
+  semantic: SemanticResult[]
+}
+
+type ErrorMessage = { type: "error"; seq?: number; message: string }
+
+type SearchPayload = {
+  semantic: SemanticResult[]
+}
+
+type PendingResolver = {
+  resolve: (payload: SearchPayload) => void
+  reject: (err: Error) => void
+}
+
+export class SemanticClient {
+  private ready: Promise<void>
+  private resolveReady!: () => void
+  private worker: Worker | null = null
+  private pending = new Map<number, PendingResolver>()
+  private seq = 0
+  private disposed = false
+  private readySettled = false
+  private configured = false
+  private lastError: Error | null = null
+
+  constructor(private cfg?: any) {
+    this.ready = new Promise((resolve) => {
+      this.resolveReady = () => {
+        if (this.readySettled) return
+        this.readySettled = true
+        resolve()
+      }
+    })
+
+    if (this.cfg?.enable === false) {
+      this.lastError = new Error("semantic search disabled by configuration")
+      this.resolveReady()
+      return
+    }
+
+    this.boot()
+  }
+
+  private boot() {
+    try {
+      this.worker = new Worker("/semantic.worker.js", { type: "module" })
+    } catch (err) {
+      this.handleFatal(err)
+      return
+    }
+    this.setupWorker()
+    this.startInit()
+  }
+
+  private setupWorker() {
+    if (!this.worker) return
+    this.worker.onmessage = (
+      event: MessageEvent<ProgressMessage | ReadyMessage | ResultMessage | ErrorMessage>,
+    ) => {
+      const msg = event.data
+      if (msg.type === "progress") {
+        // Progress updates during initialization - can be logged if needed
+        return
+      }
+      if (msg.type === "ready") {
+        this.configured = true
+        this.lastError = null
+        this.resolveReady()
+        return
+      }
+      if (msg.type === "search-result") {
+        const pending = this.pending.get(msg.seq)
+        if (pending) {
+          this.pending.delete(msg.seq)
+          pending.resolve({ semantic: msg.semantic ?? [] })
+        }
+        return
+      }
+      if (msg.type === "error") {
+        if (typeof msg.seq === "number") {
+          const pending = this.pending.get(msg.seq)
+          if (pending) {
+            this.pending.delete(msg.seq)
+            pending.reject(new Error(msg.message))
+          }
+        } else {
+          this.handleFatal(msg.message)
+        }
+      }
+    }
+  }
+
+  private startInit() {
+    if (!this.worker) return
+    const manifestUrl =
+      typeof this.cfg?.manifestUrl === "string" && this.cfg.manifestUrl.length > 0
+        ? this.cfg.manifestUrl
+        : "/embeddings/manifest.json"
+    const disableCache = Boolean(this.cfg?.disableCache)
+    const baseUrl =
+      typeof this.cfg?.manifestBaseUrl === "string" ? this.cfg.manifestBaseUrl : undefined
+    this.worker.postMessage({
+      type: "init",
+      cfg: this.cfg,
+      manifestUrl,
+      baseUrl,
+      disableCache,
+    })
+  }
+
+  private rejectAll(err: Error, fatal = false) {
+    for (const [id, pending] of this.pending.entries()) {
+      pending.reject(err)
+      this.pending.delete(id)
+    }
+    if (fatal) {
+      this.lastError = err
+      this.configured = false
+      if (!this.readySettled) {
+        this.resolveReady()
+      }
+    }
+  }
+
+  private handleFatal(err: unknown) {
+    const error = err instanceof Error ? err : new Error(String(err))
+    console.error("[SemanticClient] initialization failure:", error)
+    this.rejectAll(error, true)
+    if (this.worker) {
+      this.worker.postMessage({ type: "reset" })
+      this.worker.terminate()
+      this.worker = null
+    }
+  }
+
+  async ensureReady() {
+    await this.ready
+    if (!this.configured) {
+      throw this.lastError ?? new Error("semantic search unavailable")
+    }
+  }
+
+  async search(text: string, k: number): Promise<SearchPayload> {
+    if (this.disposed) {
+      throw new Error("semantic client has been disposed")
+    }
+    await this.ensureReady()
+    if (!this.worker || !this.configured) {
+      throw this.lastError ?? new Error("worker unavailable")
+    }
+    return new Promise<SearchPayload>((resolve, reject) => {
+      const seq = ++this.seq
+      this.pending.set(seq, { resolve, reject })
+      this.worker?.postMessage({ type: "search", text, k, seq })
+    })
+  }
+
+  dispose() {
+    if (this.disposed) return
+    this.disposed = true
+    this.rejectAll(new Error("semantic client disposed"))
+    if (this.worker) {
+      this.worker.postMessage({ type: "reset" })
+      this.worker.terminate()
+    }
+    this.worker = null
+    this.configured = false
+  }
+}
--- a/quartz/components/styles/search.scss
+++ b/quartz/components/styles/search.scss
@@ -77,16 +77,97 @@
        margin-bottom: 2em;
      }

-      & > input {
+      & > .input-container {
+        align-items: center;
+        gap: 0.5rem;
+        display: flex;
+        flex-wrap: wrap;
+        position: relative;
        box-sizing: border-box;
-        padding: 0.5em 1em;
-        font-family: var(--bodyFont);
-        color: var(--dark);
-        font-size: 1.1em;
-        border: 1px solid var(--lightgray);

-        &:focus {
-          outline: none;
+        .search-bar {
+          flex: 1 1 auto;
+          min-width: 0;
+          box-sizing: border-box;
+          padding: 0.5em 1em;
+          font-family: var(--bodyFont);
+          color: var(--dark);
+          font-size: 1.1em;
+          border: none;
+          background: transparent;
+
+          &:focus {
+            outline: none;
+          }
+        }
+
+        .semantic-search-progress {
+          position: absolute;
+          bottom: 0;
+          left: 0;
+          right: 0;
+          height: 2px;
+          background-color: var(--secondary);
+          width: 0;
+          opacity: 0;
+          transition:
+            width 0.3s ease,
+            opacity 0.2s ease;
+          pointer-events: none;
+        }
+
+        .search-mode-toggle {
+          display: inline-flex;
+          align-items: center;
+          border-radius: 9999px;
+          height: 1.4rem;
+          background-color: color-mix(in srgb, var(--darkgray) 12%, transparent);
+          margin-right: 1rem;
+
+          .mode-option {
+            border: none;
+            background: transparent;
+            font: inherit;
+            color: var(--gray);
+            border-radius: 9999px;
+            cursor: pointer;
+            transition:
+              background-color 0.2s ease,
+              color 0.2s ease;
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            width: 1.5rem;
+            height: 1.5rem;
+            position: relative;
+
+            &:focus-visible {
+              outline: 2px solid var(--tertiary);
+              outline-offset: 2px;
+            }
+
+            &.active {
+              background-color: var(--secondary);
+              color: var(--light);
+            }
+
+            svg {
+              width: 18px;
+              height: 18px;
+            }
+
+            .sr-only {
+              position: absolute;
+              width: 1px;
+              height: 1px;
+              padding: 0;
+              margin: -1px;
+              overflow: hidden;
+              clip: rect(0, 0, 0, 0);
+              white-space: nowrap;
+              border: 0;
+            }
+          }
        }
      }

--- a/quartz/embed_build.py
+++ b/quartz/embed_build.py
@@ -0,0 +1,542 @@
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#     "langchain-text-splitters",
+#     "numpy",
+#     "openai",
+#     "sentence-transformers",
+#     "tiktoken",
+# ]
+# ///
+
+from __future__ import annotations
+
+import os, json, argparse, hashlib, math, random, logging
+
+from pathlib import Path
+from functools import lru_cache
+from collections.abc import Iterable
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+import tiktoken, numpy as np
+
+from openai import OpenAI
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+
+
+logger = logging.getLogger(__name__)
+DEFAULT_VLLM_URL = os.environ.get("VLLM_URL") or os.environ.get("VLLM_EMBED_URL") or "http://127.0.0.1:8000/v1"
+
+
+def resolve_vllm_base_url(url: str) -> str:
+  if not url:
+    raise ValueError("vLLM URL must be non-empty")
+
+  trimmed = url.rstrip("/")
+  if trimmed.endswith("/v1/embeddings"):
+    trimmed = trimmed[: -len("/embeddings")]
+  elif trimmed.endswith("/embeddings"):
+    trimmed = trimmed[: trimmed.rfind("/")]
+
+  if not trimmed.endswith("/v1"):
+    trimmed = f"{trimmed}/v1"
+
+  return trimmed
+
+
+def load_jsonl(fp: str) -> Iterable[dict]:
+  with open(fp, "r", encoding="utf-8") as f:
+    for line in f:
+      line = line.strip()
+      if not line:
+        continue
+      yield json.loads(line)
+
+
+def l2_normalize_rows(x: np.ndarray) -> np.ndarray:
+  # x: [N, D]
+  norms = np.linalg.norm(x, ord=2, axis=1, keepdims=True)
+  norms[norms == 0] = 1.0
+  return x / norms
+
+
+@lru_cache(maxsize=1)
+def get_tiktoken_encoder():
+  # Get the o200k_base tokenizer (GPT-4o) with caching
+  # change this if you want something else.
+  return tiktoken.get_encoding("o200k_base")
+
+
+def count_tokens(text: str) -> int:
+  # Count tokens using o200k_base encoding
+  encoder = get_tiktoken_encoder()
+  return len(encoder.encode(text))
+
+
+def get_text_splitter(chunk_size: int, overlap: int):
+  encoder = get_tiktoken_encoder()
+  return RecursiveCharacterTextSplitter(
+    chunk_size=chunk_size * 4,  # character approximation
+    chunk_overlap=overlap * 4,
+    separators=["\n\n", "\n", ". ", " ", ""],
+    length_function=lambda t: len(encoder.encode(t)),
+    is_separator_regex=False,
+  )
+
+
+def chunk_document(
+  doc: dict, max_tokens: int = 512, overlap_tokens: int = 128, min_chunk_size: int = 100
+) -> list[dict]:
+  """
+  Chunk a document if it exceeds max_tokens
+
+  Args:
+    doc: {'slug': str, 'title': str, 'text': str}
+    max_tokens: Maximum tokens per chunk
+    overlap_tokens: Overlap between chunks
+    min_chunk_size: Minimum chunk size (avoid tiny chunks)
+
+  Returns:
+    List of chunk dicts with metadata
+  """
+  text = doc["text"]
+  token_count = count_tokens(text)
+
+  # No chunking needed
+  if token_count <= max_tokens:
+    return [
+      {
+        "slug": doc["slug"],
+        "title": doc.get("title", doc["slug"]),
+        "text": text,
+        "chunk_id": 0,
+        "parent_slug": doc["slug"],
+        "is_chunked": False,
+      }
+    ]
+
+  # Apply chunking
+  splitter = get_text_splitter(max_tokens, overlap_tokens)
+  raw_chunks = splitter.split_text(text)
+
+  # Filter out tiny chunks
+  valid_chunks = [c for c in raw_chunks if count_tokens(c) >= min_chunk_size]
+
+  return [
+    {
+      "slug": f"{doc['slug']}#chunk{i}",
+      "title": doc.get("title", doc["slug"]),
+      "text": chunk,
+      "chunk_id": i,
+      "parent_slug": doc["slug"],
+      "is_chunked": True,
+    }
+    for i, chunk in enumerate(valid_chunks)
+  ]
+
+
+def write_shards(vectors: np.ndarray, shard_size: int, dtype: str, out_dir: Path) -> list[dict]:
+  out_dir.mkdir(parents=True, exist_ok=True)
+  rows, dims = vectors.shape
+  shards_meta: list[dict] = []
+  np_dtype = np.float16 if dtype == "fp16" else np.float32
+  bytes_per_value = np.dtype(np_dtype).itemsize
+  row_offset = 0
+  for si, start in enumerate(range(0, rows, shard_size)):
+    end = min(start + shard_size, rows)
+    shard = vectors[start:end]  # [n, dims]
+    bin_path = out_dir / f"vectors-{si:03d}.bin"
+    payload = shard.astype(np_dtype, copy=False).tobytes(order="C")
+    digest = hashlib.sha256(payload).hexdigest()
+    with open(bin_path, "wb") as f:
+      f.write(payload)
+    shard_rows = int(shard.shape[0])
+    shards_meta.append(
+      {
+        "path": f"/embeddings/{bin_path.name}",
+        "rows": shard_rows,
+        "rowOffset": row_offset,
+        "byteLength": len(payload),
+        "sha256": digest,
+        "byteStride": dims * bytes_per_value,
+      },
+    )
+    row_offset += shard_rows
+  return shards_meta
+
+
+def write_hnsw_graph(levels: list[list[list[int]]], rows: int, out_path: Path) -> tuple[list[dict], str]:
+  out_path.parent.mkdir(parents=True, exist_ok=True)
+  offset = 0
+  meta: list[dict] = []
+  digest = hashlib.sha256()
+  with open(out_path, "wb") as f:
+    for lvl in levels:
+      indptr = np.zeros(rows + 1, dtype=np.uint32)
+      edge_accum: list[int] = []
+      for idx in range(rows):
+        neighbors = lvl[idx] if idx < len(lvl) else []
+        indptr[idx + 1] = indptr[idx] + len(neighbors)
+        edge_accum.extend(neighbors)
+      indptr_bytes = indptr.tobytes(order="C")
+      indptr_offset = offset
+      f.write(indptr_bytes)
+      digest.update(indptr_bytes)
+      offset += len(indptr_bytes)
+
+      if edge_accum:
+        indices = np.asarray(edge_accum, dtype=np.uint32)
+        indices_bytes = indices.tobytes(order="C")
+      else:
+        indices = np.zeros(0, dtype=np.uint32)
+        indices_bytes = indices.tobytes(order="C")
+      indices_offset = offset
+      f.write(indices_bytes)
+      digest.update(indices_bytes)
+      offset += len(indices_bytes)
+
+      meta.append(
+        {
+          "level": len(meta),
+          "indptr": {
+            "offset": indptr_offset,
+            "elements": int(indptr.shape[0]),
+            "byteLength": len(indptr_bytes),
+          },
+          "indices": {
+            "offset": indices_offset,
+            "elements": int(indices.shape[0]),
+            "byteLength": len(indices_bytes),
+          },
+        },
+      )
+  return meta, digest.hexdigest()
+
+
+
+def embed_vllm(
+  texts: list[str],
+  model_id: str,
+  vllm_url: str,
+  batch_size: int = 64,
+  concurrency: int = 8,
+) -> np.ndarray:
+  base_url = resolve_vllm_base_url(vllm_url)
+  api_key = os.environ.get("VLLM_API_KEY") or os.environ.get("OPENAI_API_KEY") or "not-set"
+  client = OpenAI(base_url=base_url, api_key=api_key, timeout=300)
+
+  def list_available_models() -> list[str]:
+    models: list[str] = []
+    page = client.models.list()
+    models.extend(model.id for model in page.data)
+    while getattr(page, "has_more", False) and page.data:
+      cursor = page.data[-1].id
+      page = client.models.list(after=cursor)
+      models.extend(model.id for model in page.data)
+    return models
+
+  try:
+    available_models = list_available_models()
+  except Exception as exc:
+    raise RuntimeError(f"failed to query {base_url}/models: {exc}") from exc
+
+  if model_id not in available_models:
+    suggestions = ", ".join(sorted(available_models)) if available_models else "<none>"
+    logger.warning(
+      "model '%s' not served by vLLM at %s. Available models: %s. Use the first model, results may differ during semantic search (you can omit this message if your weights is a ONNX checkpoint of the same model.)", model_id, base_url, suggestions,
+    )
+    model_id = available_models[0]
+
+  # Apply model-specific prefixes for documents (asymmetric search)
+  model_lower = model_id.lower()
+  if "e5" in model_lower:
+    # E5 models: use "passage:" prefix for documents
+    prefixed = [f"passage: {t}" for t in texts]
+  elif "qwen" in model_lower and "embedding" in model_lower:
+    # Qwen3-Embedding: documents use plain text (no prefix)
+    prefixed = texts
+  elif "embeddinggemma" in model_lower:
+    # embeddinggemma: use "title: none | text:" prefix for documents
+    prefixed = [f"title: none | text: {t}" for t in texts]
+  else:
+    # Default: no prefix for unknown models
+    prefixed = texts
+
+  print(
+    "Embedding"
+    f" {len(prefixed)} texts with vLLM"
+    f" (model={model_id}, batch_size={batch_size}, concurrency={concurrency})",
+  )
+
+  # Create batches
+  batches = []
+  for i in range(0, len(prefixed), batch_size):
+    batch = prefixed[i : i + batch_size]
+    batches.append((i, batch))
+
+  # Function to send a single batch request
+  def send_batch(batch_info: tuple[int, list[str]]) -> tuple[int, list[np.ndarray]]:
+    idx, batch = batch_info
+    response = client.embeddings.create(model=model_id, input=batch)
+    embeddings = [np.asarray(item.embedding, dtype=np.float32) for item in response.data]
+    return (idx, embeddings)
+
+  # Send batches concurrently (or sequentially if only 1 batch)
+  results: dict[int, list[np.ndarray]] = {}
+  if len(batches) == 1:
+    # Single batch - no need for threading
+    idx, embeddings = send_batch(batches[0])
+    results[idx] = embeddings
+  else:
+    # Multiple batches - use concurrent requests
+    with ThreadPoolExecutor(max_workers=concurrency) as executor:
+      futures = {executor.submit(send_batch, batch_info): batch_info[0] for batch_info in batches}
+      completed = 0
+      for future in as_completed(futures):
+        idx, embeddings = future.result()
+        results[idx] = embeddings
+        completed += 1
+        if completed % max(1, len(batches) // 10) == 0 or completed == len(batches):
+          print(f"  Completed {completed}/{len(batches)} batches ({completed * 100 // len(batches)}%)")
+
+  # Reconstruct in order
+  out: list[np.ndarray] = []
+  for i in sorted(results.keys()):
+    out.extend(results[i])
+
+  return np.stack(out, axis=0)
+
+
+def embed_hf(texts: list[str], model_id: str, device: str) -> np.ndarray:
+  # Prefer sentence-transformers for E5 and similar embed models
+  from sentence_transformers import SentenceTransformer
+
+  model = SentenceTransformer(model_id, device=device)
+
+  # Apply model-specific prefixes for documents (asymmetric search)
+  model_lower = model_id.lower()
+  if "e5" in model_lower:
+    # E5 models: use "passage:" prefix for documents
+    prefixed = [f"passage: {t}" for t in texts]
+  elif "qwen" in model_lower and "embedding" in model_lower:
+    # Qwen3-Embedding: documents use plain text (no prefix)
+    prefixed = texts
+  elif "embeddinggemma" in model_lower:
+    # embeddinggemma: use "title: none | text:" prefix for documents
+    prefixed = [f"title: none | text: {t}" for t in texts]
+  else:
+    # Default: no prefix for unknown models
+    prefixed = texts
+
+  vecs = model.encode(
+    prefixed,
+    batch_size=64,
+    normalize_embeddings=True,
+    convert_to_numpy=True,
+    show_progress_bar=True,
+  )
+  return vecs.astype(np.float32, copy=False)
+
+
+def main():
+  ap = argparse.ArgumentParser()
+  ap.add_argument("--jsonl", default="public/embeddings-text.jsonl")
+  ap.add_argument("--model", default=os.environ.get("SEM_MODEL", "intfloat/multilingual-e5-large"))
+  ap.add_argument("--dims", type=int, default=int(os.environ.get("SEM_DIMS", "1024")))
+  ap.add_argument("--dtype", choices=["fp16", "fp32"], default=os.environ.get("SEM_DTYPE", "fp32"))
+  ap.add_argument("--shard-size", type=int, default=int(os.environ.get("SEM_SHARD", "1024")))
+  ap.add_argument("--out", default="public/embeddings")
+  ap.add_argument("--use-vllm", action="store_true", default=bool(os.environ.get("USE_VLLM", "")))
+  ap.add_argument(
+    "--vllm-url",
+    default=DEFAULT_VLLM_URL,
+    help="Base URL for the vLLM OpenAI-compatible server (accepts either /v1 or /v1/embeddings)",
+  )
+  ap.add_argument("--chunk-size", type=int, default=512, help="Max tokens per chunk")
+  ap.add_argument("--chunk-overlap", type=int, default=128, help="Overlap tokens between chunks")
+  ap.add_argument("--no-chunking", action="store_true", help="Disable chunking (embed full docs)")
+  ap.add_argument(
+    "--concurrency",
+    type=int,
+    default=int(os.environ.get("VLLM_CONCURRENCY", "8")),
+    help="Number of concurrent requests to vLLM (default: 8)",
+  )
+  ap.add_argument(
+    "--batch-size",
+    type=int,
+    default=int(os.environ.get("VLLM_BATCH_SIZE", "64")),
+    help="Batch size for vLLM requests (default: 64)",
+  )
+  args = ap.parse_args()
+
+  recs = list(load_jsonl(args.jsonl))
+  if not recs:
+    print("No input found in public/embeddings-text.jsonl; run the site build first to emit JSONL.")
+    return
+
+  # Apply chunking
+  if args.no_chunking:
+    chunks = recs
+    chunk_metadata = {}
+    print(f"Chunking disabled. Processing {len(chunks)} full documents")
+  else:
+    chunks = []
+    chunk_metadata = {}
+    for rec in recs:
+      doc_chunks = chunk_document(rec, max_tokens=args.chunk_size, overlap_tokens=args.chunk_overlap)
+      chunks.extend(doc_chunks)
+      # Build chunk metadata map
+      for chunk in doc_chunks:
+        if chunk["is_chunked"]:
+          chunk_metadata[chunk["slug"]] = {
+            "parentSlug": chunk["parent_slug"],
+            "chunkId": chunk["chunk_id"],
+          }
+    chunked_count = sum(1 for c in chunks if c.get("is_chunked", False))
+    print(f"Chunked {len(recs)} documents into {len(chunks)} chunks ({chunked_count} chunked, {len(chunks) - chunked_count} unchanged)")
+    print(f"  Chunk size: {args.chunk_size} tokens, overlap: {args.chunk_overlap} tokens")
+
+  ids = [c["slug"] for c in chunks]
+  titles = [c.get("title", c["slug"]) for c in chunks]
+  texts = [c["text"] for c in chunks]
+
+  if args.use_vllm:
+    vecs = embed_vllm(
+      texts,
+      args.model,
+      args.vllm_url,
+      batch_size=args.batch_size,
+      concurrency=args.concurrency,
+    )
+  else:
+    device = "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu"
+    vecs = embed_hf(texts, args.model, device)
+
+  # Coerce dims and re-normalize
+  if vecs.shape[1] != args.dims:
+    if vecs.shape[1] > args.dims:
+      vecs = vecs[:, : args.dims]
+    else:
+      vecs = np.pad(vecs, ((0, 0), (0, args.dims - vecs.shape[1])))
+  vecs = l2_normalize_rows(vecs.astype(np.float32, copy=False))
+
+  out_dir = Path(args.out)
+  shards = write_shards(vecs, args.shard_size, args.dtype, out_dir)
+
+  # Build a lightweight HNSW graph and store it in a compact binary layout
+  def hnsw_build(data: np.ndarray, M: int = 16, efC: int = 200, seed: int = 0) -> dict:
+    rng = random.Random(seed)
+    N, D = data.shape
+    levels: list[list[list[int]]] = []  # levels[L][i] = neighbors of node i at level L
+
+    # random level assignment using 1/e distribution
+    node_levels = []
+    for _ in range(N):
+      lvl = 0
+      while rng.random() < 1 / math.e:
+        lvl += 1
+      node_levels.append(lvl)
+    max_level = max(node_levels) if N > 0 else 0
+    for _ in range(max_level + 1):
+      levels.append([[] for _ in range(N)])
+
+    def sim(i: int, j: int) -> float:
+      return float((data[i] * data[j]).sum())
+
+    entry = 0 if N > 0 else -1
+
+    def search_layer(q: int, ep: int, ef: int, L: int) -> list[int]:
+      if ep < 0:
+        return []
+      visited = set()
+      cand: list[tuple[float, int]] = []
+      top: list[tuple[float, int]] = []
+      def push(node: int):
+        if node in visited:
+          return
+        visited.add(node)
+        cand.append((sim(q, node), node))
+      push(ep)
+      while cand:
+        cand.sort(reverse=True)
+        s, v = cand.pop(0)
+        if len(top) >= ef and s <= top[-1][0]:
+          break
+        top.append((s, v))
+        for u in levels[L][v]:
+          push(u)
+      top.sort(reverse=True)
+      return [n for _, n in top]
+
+    for i in range(N):
+      if i == 0:
+        continue
+      lvl = node_levels[i]
+      ep = entry
+      for L in range(max_level, lvl, -1):
+        c = search_layer(i, ep, 1, L)
+        if c:
+          ep = c[0]
+      for L in range(min(max_level, lvl), -1, -1):
+        W = search_layer(i, ep, efC, L)
+        # Select top M by similarity
+        neigh = sorted(((sim(i, j), j) for j in W if j != i), reverse=True)[:M]
+        for _, e in neigh:
+          if e not in levels[L][i]:
+            levels[L][i].append(e)
+          if i not in levels[L][e]:
+            levels[L][e].append(i)
+
+    # trim neighbors to M
+    for L in range(len(levels)):
+      for i in range(N):
+        if len(levels[L][i]) > M:
+          # keep top M by sim
+          nb = levels[L][i]
+          nb = sorted(nb, key=lambda j: sim(i, j), reverse=True)[:M]
+          levels[L][i] = nb
+
+    return {
+      "M": M,
+      "efConstruction": efC,
+      "entryPoint": entry,
+      "maxLevel": max_level,
+      "levels": levels,
+    }
+
+  hnsw = hnsw_build(vecs, M=16, efC=200)
+  hnsw_meta, hnsw_sha = write_hnsw_graph(hnsw["levels"], int(vecs.shape[0]), out_dir / "hnsw.bin")
+
+  manifest = {
+    "version": 2,
+    "dims": args.dims,
+    "dtype": args.dtype,
+    "normalized": True,
+    "rows": int(vecs.shape[0]),
+    "shardSizeRows": args.shard_size,
+    "vectors": {
+      "dtype": args.dtype,
+      "rows": int(vecs.shape[0]),
+      "dims": args.dims,
+      "shards": shards,
+    },
+    "ids": ids,
+    "titles": titles,
+    "chunkMetadata": chunk_metadata,
+    "hnsw": {
+      "M": hnsw["M"],
+      "efConstruction": hnsw["efConstruction"],
+      "entryPoint": hnsw["entryPoint"],
+      "maxLevel": hnsw["maxLevel"],
+      "graph": {
+        "path": "/embeddings/hnsw.bin",
+        "sha256": hnsw_sha,
+        "levels": hnsw_meta,
+      },
+    },
+  }
+  (out_dir / "manifest.json").write_text(json.dumps(manifest, ensure_ascii=False), encoding="utf-8")
+  print(f"Wrote {len(shards)} vector shard(s), HNSW graph, and manifest to {out_dir}")
+
+if __name__ == "__main__":
+  main()
--- a/quartz/plugins/emitters/404.tsx
+++ b/quartz/plugins/emitters/404.tsx
@@ -40,7 +40,7 @@ export const NotFoundPage: QuartzEmitterPlugin = () => {
        description: notFound,
        frontmatter: { title: notFound, tags: [] },
      })
-      const externalResources = pageResources(path, resources)
+      const externalResources = pageResources(path, resources, ctx.cfg.configuration)
      const componentData: QuartzComponentProps = {
        ctx,
        fileData: vfile.data,
--- a/quartz/plugins/emitters/componentResources.ts
+++ b/quartz/plugins/emitters/componentResources.ts
@@ -1,5 +1,8 @@
 import { FullSlug, joinSegments } from "../../util/path"
 import { QuartzEmitterPlugin } from "../types"
+import path from "path"
+import fs from "node:fs/promises"
+import { globby } from "globby"

 // @ts-ignore
 import spaRouterScript from "../../components/scripts/spa.inline"
@@ -16,7 +19,7 @@ import {
  processGoogleFonts,
 } from "../../util/theme"
 import { Features, transform } from "lightningcss"
-import { transform as transpile } from "esbuild"
+import { transform as transpile, build as bundle } from "esbuild"
 import { write } from "./helpers"

 type ComponentResources = {
@@ -357,7 +360,47 @@ export const ComponentResources: QuartzEmitterPlugin = () => {
        ext: ".js",
        content: postscript,
      })
+
+      // Bundle all worker files
+      const workerFiles = await globby(["quartz/**/*.worker.ts"])
+      for (const src of workerFiles) {
+        const result = await bundle({
+          entryPoints: [src],
+          bundle: true,
+          minify: true,
+          platform: "browser",
+          format: "esm",
+          write: false,
+        })
+        const code = result.outputFiles[0].text
+        const name = path.basename(src).replace(/\.ts$/, "")
+        yield write({ ctx, slug: name as FullSlug, ext: ".js", content: code })
+      }
+    },
+    async *partialEmit(ctx, _content, _resources, changeEvents) {
+      // Handle worker file changes in incremental builds
+      for (const changeEvent of changeEvents) {
+        if (!/\.worker\.ts$/.test(changeEvent.path)) continue
+        if (changeEvent.type === "delete") {
+          const name = path.basename(changeEvent.path).replace(/\.ts$/, "")
+          const dest = joinSegments(ctx.argv.output, `${name}.js`)
+          try {
+            await fs.unlink(dest)
+          } catch {}
+          continue
+        }
+        const result = await bundle({
+          entryPoints: [changeEvent.path],
+          bundle: true,
+          minify: true,
+          platform: "browser",
+          format: "esm",
+          write: false,
+        })
+        const code = result.outputFiles[0].text
+        const name = path.basename(changeEvent.path).replace(/\.ts$/, "")
+        yield write({ ctx, slug: name as FullSlug, ext: ".js", content: code })
+      }
    },
-    async *partialEmit() {},
  }
 }
--- a/quartz/plugins/emitters/contentPage.tsx
+++ b/quartz/plugins/emitters/contentPage.tsx
@@ -25,7 +25,7 @@ async function processContent(
 ) {
  const slug = fileData.slug!
  const cfg = ctx.cfg.configuration
-  const externalResources = pageResources(pathToRoot(slug), resources)
+  const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
  const componentData: QuartzComponentProps = {
    ctx,
    fileData,
--- a/quartz/plugins/emitters/folderPage.tsx
+++ b/quartz/plugins/emitters/folderPage.tsx
@@ -38,7 +38,7 @@ async function* processFolderInfo(
    const slug = joinSegments(folder, "index") as FullSlug
    const [tree, file] = folderContent
    const cfg = ctx.cfg.configuration
-    const externalResources = pageResources(pathToRoot(slug), resources)
+    const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
    const componentData: QuartzComponentProps = {
      ctx,
      fileData: file.data,
--- a/quartz/plugins/emitters/index.ts
+++ b/quartz/plugins/emitters/index.ts
@@ -1,7 +1,7 @@
 export { ContentPage } from "./contentPage"
 export { TagPage } from "./tagPage"
 export { FolderPage } from "./folderPage"
-export { ContentIndex as ContentIndex } from "./contentIndex"
+export { ContentIndex } from "./contentIndex"
 export { AliasRedirects } from "./aliases"
 export { Assets } from "./assets"
 export { Static } from "./static"
@@ -10,3 +10,4 @@ export { ComponentResources } from "./componentResources"
 export { NotFoundPage } from "./404"
 export { CNAME } from "./cname"
 export { CustomOgImages } from "./ogImage"
+export { SemanticIndex } from "./semantic"
--- a/quartz/plugins/emitters/semantic.ts
+++ b/quartz/plugins/emitters/semantic.ts
@@ -0,0 +1,235 @@
+import { write } from "./helpers"
+import { QuartzEmitterPlugin } from "../types"
+import { FilePath, FullSlug, joinSegments, QUARTZ } from "../../util/path"
+import { ReadTimeResults } from "reading-time"
+import { GlobalConfiguration } from "../../cfg"
+import { spawn } from "child_process"
+
+const DEFAULT_MODEL_ID = "onnx-community/Qwen3-Embedding-0.6B-ONNX"
+
+const defaults: GlobalConfiguration["semanticSearch"] = {
+  enable: true,
+  model: DEFAULT_MODEL_ID,
+  aot: false,
+  dims: 1024,
+  dtype: "fp32",
+  shardSizeRows: 1024,
+  hnsw: { M: 16, efConstruction: 200 },
+  chunking: {
+    chunkSize: 512,
+    chunkOverlap: 128,
+    noChunking: false,
+  },
+  vllm: {
+    enable: false,
+    vllmUrl:
+      process.env.VLLM_URL || process.env.VLLM_EMBED_URL || "http://127.0.0.1:8000/v1/embeddings",
+    concurrency: parseInt(process.env.VLLM_CONCURRENCY || "8", 10),
+    batchSize: parseInt(process.env.VLLM_BATCH_SIZE || "64", 10),
+  },
+}
+
+type ContentDetails = {
+  slug: string
+  title: string
+  filePath: FilePath
+  content: string
+  readingTime?: Partial<ReadTimeResults>
+}
+
+/**
+ * Check if uv is installed
+ */
+function checkUvInstalled(): Promise<boolean> {
+  return new Promise((resolve) => {
+    const proc = spawn("uv", ["--version"], { shell: true })
+    proc.on("error", () => resolve(false))
+    proc.on("close", (code) => resolve(code === 0))
+  })
+}
+
+/**
+ * Run the Python embedding build script using uv
+ * Script uses PEP 723 inline metadata for dependency management
+ */
+function runEmbedBuild(
+  jsonlPath: string,
+  outDir: string,
+  opts: {
+    model: string
+    dtype: string
+    dims: number
+    shardSizeRows: number
+    chunking: { chunkSize: number; chunkOverlap: number; noChunking: boolean }
+    vllm: { enable: boolean; vllmUrl?: string; concurrency: number; batchSize: number }
+  },
+): Promise<void> {
+  return new Promise((resolve, reject) => {
+    const scriptPath = joinSegments(QUARTZ, "embed_build.py")
+    const args = [
+      "run",
+      scriptPath,
+      "--jsonl",
+      jsonlPath,
+      "--model",
+      opts.model,
+      "--out",
+      outDir,
+      "--dtype",
+      opts.dtype,
+      "--dims",
+      String(opts.dims),
+      "--shard-size",
+      String(opts.shardSizeRows),
+      "--chunk-size",
+      String(opts.chunking.chunkSize),
+      "--chunk-overlap",
+      String(opts.chunking.chunkOverlap),
+    ]
+
+    if (opts.chunking.noChunking) {
+      args.push("--no-chunking")
+    }
+
+    if (opts.vllm.enable) {
+      args.push("--use-vllm")
+      if (opts.vllm.vllmUrl) {
+        args.push("--vllm-url", opts.vllm.vllmUrl)
+      }
+      args.push("--concurrency", String(opts.vllm.concurrency))
+      args.push("--batch-size", String(opts.vllm.batchSize))
+    }
+
+    console.log("\nRunning embedding generation:")
+    console.log(`  uv ${args.join(" ")}`)
+
+    const env = { ...process.env }
+    if (opts.vllm.enable && !env.USE_VLLM) {
+      env.USE_VLLM = "1"
+    }
+
+    const proc = spawn("uv", args, {
+      stdio: "inherit",
+      shell: true,
+      env,
+    })
+
+    proc.on("error", (err) => {
+      reject(new Error(`Failed to spawn uv: ${err.message}`))
+    })
+
+    proc.on("close", (code) => {
+      if (code === 0) {
+        console.log("Embedding generation completed successfully")
+        resolve()
+      } else {
+        reject(new Error(`embed_build.py exited with code ${code}`))
+      }
+    })
+  })
+}
+
+export const SemanticIndex: QuartzEmitterPlugin<Partial<GlobalConfiguration["semanticSearch"]>> = (
+  opts,
+) => {
+  const merged = { ...defaults, ...opts }
+  const o = {
+    enable: merged.enable!,
+    model: merged.model!,
+    aot: merged.aot!,
+    dims: merged.dims!,
+    dtype: merged.dtype!,
+    shardSizeRows: merged.shardSizeRows!,
+    hnsw: {
+      M: merged.hnsw?.M ?? defaults.hnsw!.M!,
+      efConstruction: merged.hnsw?.efConstruction ?? defaults.hnsw!.efConstruction!,
+      efSearch: merged.hnsw?.efSearch,
+    },
+    chunking: {
+      chunkSize: merged.chunking?.chunkSize ?? defaults.chunking!.chunkSize!,
+      chunkOverlap: merged.chunking?.chunkOverlap ?? defaults.chunking!.chunkOverlap!,
+      noChunking: merged.chunking?.noChunking ?? defaults.chunking!.noChunking!,
+    },
+    vllm: {
+      enable: merged.vllm?.enable ?? defaults.vllm!.enable!,
+      vllmUrl: merged.vllm?.vllmUrl ?? defaults.vllm!.vllmUrl,
+      concurrency: merged.vllm?.concurrency ?? defaults.vllm!.concurrency!,
+      batchSize: merged.vllm?.batchSize ?? defaults.vllm!.batchSize!,
+    },
+  }
+
+  if (!o.model) {
+    throw new Error("Semantic search requires a model identifier")
+  }
+
+  return {
+    name: "SemanticIndex",
+    getQuartzComponents() {
+      return []
+    },
+    async *partialEmit() {},
+    async *emit(ctx, content, _resources) {
+      if (!o.enable) return
+
+      const docs: ContentDetails[] = []
+      for (const [_, file] of content) {
+        const slug = file.data.slug!
+        const title = file.data.frontmatter?.title ?? slug
+        const text = file.data.text
+        if (text) {
+          docs.push({
+            slug,
+            title,
+            filePath: file.data.filePath!,
+            content: text,
+            readingTime: file.data.readingTime,
+          })
+        }
+      }
+
+      // Emit JSONL with the exact text used for embeddings
+      const jsonl = docs
+        .map((d) => ({ slug: d.slug, title: d.title, text: d.content }))
+        .map((o) => JSON.stringify(o))
+        .join("\n")
+
+      const jsonlSlug = "embeddings-text" as FullSlug
+      yield write({
+        ctx,
+        slug: jsonlSlug,
+        ext: ".jsonl",
+        content: jsonl,
+      })
+
+      // If aot is false, run the embedding generation script
+      if (!o.aot) {
+        console.log("\nGenerating embeddings (aot=false)...")
+
+        // Check for uv
+        const hasUv = await checkUvInstalled()
+        if (!hasUv) {
+          throw new Error(
+            "uv is required for embedding generation. Install it from https://docs.astral.sh/uv/",
+          )
+        }
+
+        const jsonlPath = joinSegments(ctx.argv.output, "embeddings-text.jsonl")
+        const outDir = joinSegments(ctx.argv.output, "embeddings")
+
+        try {
+          await runEmbedBuild(jsonlPath, outDir, o)
+        } catch (err) {
+          const message = err instanceof Error ? err.message : String(err)
+          throw new Error(`Embedding generation failed: ${message}`)
+        }
+      } else {
+        console.log(
+          "\nSkipping embedding generation (aot=true). Expecting pre-generated embeddings in public/embeddings/",
+        )
+      }
+    },
+    externalResources(_ctx) {
+      return {}
+    },
+  }
+}
--- a/quartz/plugins/emitters/tagPage.tsx
+++ b/quartz/plugins/emitters/tagPage.tsx
@@ -73,7 +73,7 @@ async function processTagPage(
  const slug = joinSegments("tags", tag) as FullSlug
  const [tree, file] = tagContent
  const cfg = ctx.cfg.configuration
-  const externalResources = pageResources(pathToRoot(slug), resources)
+  const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
  const componentData: QuartzComponentProps = {
    ctx,
    fileData: file.data,
--- a/quartz/plugins/transformers/oxhugofm.ts
+++ b/quartz/plugins/transformers/oxhugofm.ts
@@ -1,4 +1,6 @@
 import { QuartzTransformerPlugin } from "../types"
+import rehypeRaw from "rehype-raw"
+import { PluggableList } from "unified"

 export interface Options {
  /** Replace {{ relref }} with quartz wikilinks []() */
@@ -102,5 +104,9 @@ export const OxHugoFlavouredMarkdown: QuartzTransformerPlugin<Partial<Options>>
      }
      return src
    },
+    htmlPlugins() {
+      const plugins: PluggableList = [rehypeRaw]
+      return plugins
+    },
  }
 }
--- a/quartz/workers/semantic.worker.ts
+++ b/quartz/workers/semantic.worker.ts
@@ -0,0 +1,548 @@
+// Unified semantic search worker: handles data loading and query execution
+import { env, pipeline } from "@huggingface/transformers"
+import "onnxruntime-web/webgpu"
+import "onnxruntime-web/wasm"
+
+export {}
+
+type VectorShardMeta = {
+  path: string
+  rows: number
+  rowOffset: number
+  byteLength: number
+  sha256?: string
+  byteStride: number
+}
+
+type LevelSection = {
+  level: number
+  indptr: { offset: number; elements: number; byteLength: number }
+  indices: { offset: number; elements: number; byteLength: number }
+}
+
+type ChunkMetadata = {
+  parentSlug: string
+  chunkId: number
+}
+
+type Manifest = {
+  version: number
+  dims: number
+  dtype: string
+  normalized: boolean
+  rows: number
+  shardSizeRows: number
+  vectors: {
+    dtype: string
+    rows: number
+    dims: number
+    shards: VectorShardMeta[]
+  }
+  ids: string[]
+  titles?: string[]
+  chunkMetadata?: Record<string, ChunkMetadata>
+  hnsw: {
+    M: number
+    efConstruction: number
+    entryPoint: number
+    maxLevel: number
+    graph: {
+      path: string
+      sha256?: string
+      levels: LevelSection[]
+    }
+  }
+}
+
+type InitMessage = {
+  type: "init"
+  cfg: any
+  manifestUrl: string
+  baseUrl?: string
+  disableCache?: boolean
+}
+
+type SearchMessage = { type: "search"; text: string; k: number; seq: number }
+type ResetMessage = { type: "reset" }
+
+type WorkerMessage = InitMessage | SearchMessage | ResetMessage
+
+type ReadyMessage = { type: "ready" }
+
+type ProgressMessage = {
+  type: "progress"
+  loadedRows: number
+  totalRows: number
+}
+
+type SearchHit = { id: number; score: number }
+
+type SearchResultMessage = {
+  type: "search-result"
+  seq: number
+  semantic: SearchHit[]
+}
+
+type ErrorMessage = { type: "error"; seq?: number; message: string }
+
+type WorkerState = "idle" | "loading" | "ready" | "error"
+
+// IndexedDB configuration
+const DB_NAME = "semantic-search-cache"
+const STORE_NAME = "assets"
+const DB_VERSION = 1
+const hasIndexedDB = typeof indexedDB !== "undefined"
+const supportsSharedArrayBuffer = typeof SharedArrayBuffer !== "undefined"
+
+// State
+let state: WorkerState = "idle"
+let manifest: Manifest | null = null
+let cfg: any = null
+let vectorsView: Float32Array | null = null
+let dims = 0
+let rows = 0
+let classifier: any = null
+let envConfigured = false
+let entryPoint = -1
+let maxLevel = 0
+let efDefault = 128
+let levelGraph: { indptr: Uint32Array; indices: Uint32Array }[] = []
+let abortController: AbortController | null = null
+let dbPromise: Promise<IDBDatabase> | null = null
+
+// IndexedDB helpers
+function openDatabase(): Promise<IDBDatabase> {
+  if (!hasIndexedDB) {
+    return Promise.reject(new Error("indexedDB unavailable"))
+  }
+  if (!dbPromise) {
+    dbPromise = new Promise((resolve, reject) => {
+      const req = indexedDB.open(DB_NAME, DB_VERSION)
+      req.onupgradeneeded = () => {
+        const db = req.result
+        if (!db.objectStoreNames.contains(STORE_NAME)) {
+          db.createObjectStore(STORE_NAME)
+        }
+      }
+      req.onsuccess = () => resolve(req.result)
+      req.onerror = () => reject(req.error ?? new Error("failed to open cache store"))
+    })
+  }
+  return dbPromise
+}
+
+async function readAsset(hash: string): Promise<ArrayBuffer | null> {
+  if (!hasIndexedDB) {
+    return null
+  }
+  const db = await openDatabase()
+  return new Promise((resolve, reject) => {
+    const tx = db.transaction(STORE_NAME, "readonly")
+    const store = tx.objectStore(STORE_NAME)
+    const req = store.get(hash)
+    req.onsuccess = () => {
+      const value = req.result
+      if (value instanceof ArrayBuffer) {
+        resolve(value)
+      } else if (value && value.buffer instanceof ArrayBuffer) {
+        resolve(value.buffer as ArrayBuffer)
+      } else {
+        resolve(null)
+      }
+    }
+    req.onerror = () => reject(req.error ?? new Error("failed to read cached asset"))
+  })
+}
+
+async function writeAsset(hash: string, buffer: ArrayBuffer): Promise<void> {
+  if (!hasIndexedDB) {
+    return
+  }
+  const db = await openDatabase()
+  await new Promise<void>((resolve, reject) => {
+    const tx = db.transaction(STORE_NAME, "readwrite")
+    const store = tx.objectStore(STORE_NAME)
+    const req = store.put(buffer, hash)
+    req.onsuccess = () => resolve()
+    req.onerror = () => reject(req.error ?? new Error("failed to cache asset"))
+  })
+}
+
+function toAbsolute(path: string, baseUrl?: string): string {
+  if (path.startsWith("http://") || path.startsWith("https://")) {
+    return path
+  }
+  const base = baseUrl ?? self.location.origin
+  return new URL(path, base).toString()
+}
+
+async function fetchBinary(
+  path: string,
+  disableCache: boolean,
+  sha?: string,
+): Promise<ArrayBuffer> {
+  if (!disableCache && sha && hasIndexedDB) {
+    try {
+      const cached = await readAsset(sha)
+      if (cached) {
+        return cached
+      }
+    } catch {
+      // fall through to network fetch on cache errors
+    }
+  }
+  const res = await fetch(path, { signal: abortController?.signal ?? undefined })
+  if (!res.ok) {
+    throw new Error(`failed to fetch ${path}: ${res.status} ${res.statusText}`)
+  }
+  const payload = await res.arrayBuffer()
+  if (!disableCache && sha && hasIndexedDB) {
+    try {
+      await writeAsset(sha, payload)
+    } catch {
+      // ignore cache write failures
+    }
+  }
+  return payload
+}
+
+async function populateVectors(
+  manifest: Manifest,
+  baseUrl: string | undefined,
+  disableCache: boolean | undefined,
+): Promise<{ buffer: Float32Array; rowsLoaded: number }> {
+  if (manifest.vectors.dtype !== "fp32") {
+    throw new Error(`unsupported embedding dtype '${manifest.vectors.dtype}', regenerate with fp32`)
+  }
+  const rows = manifest.rows
+  const dims = manifest.dims
+  const totalBytes = rows * dims * Float32Array.BYTES_PER_ELEMENT
+  const buffer = supportsSharedArrayBuffer
+    ? new Float32Array(new SharedArrayBuffer(totalBytes))
+    : new Float32Array(totalBytes)
+  let loadedRows = 0
+  for (const shard of manifest.vectors.shards) {
+    const absolute = toAbsolute(shard.path, baseUrl)
+    const payload = await fetchBinary(absolute, Boolean(disableCache), shard.sha256)
+    const view = new Float32Array(payload)
+    if (view.length !== shard.rows * dims) {
+      throw new Error(
+        `shard ${shard.path} has mismatched length (expected ${shard.rows * dims}, got ${view.length})`,
+      )
+    }
+    buffer.set(view, shard.rowOffset * dims)
+    loadedRows = Math.min(rows, shard.rowOffset + shard.rows)
+    const progress: ProgressMessage = {
+      type: "progress",
+      loadedRows,
+      totalRows: rows,
+    }
+    self.postMessage(progress)
+  }
+  return { buffer, rowsLoaded: loadedRows }
+}
+
+async function populateGraph(
+  manifest: Manifest,
+  baseUrl: string | undefined,
+  disableCache: boolean | undefined,
+): Promise<ArrayBuffer> {
+  const graphMeta = manifest.hnsw.graph
+  const absolute = toAbsolute(graphMeta.path, baseUrl)
+  return await fetchBinary(absolute, Boolean(disableCache), graphMeta.sha256)
+}
+
+function configureRuntimeEnv() {
+  if (envConfigured) return
+  env.allowLocalModels = false
+  env.allowRemoteModels = true
+  const wasmBackend = env.backends?.onnx?.wasm
+  if (!wasmBackend) {
+    throw new Error("transformers.js ONNX runtime backend unavailable")
+  }
+  const cdnBase = `https://cdn.jsdelivr.net/npm/@huggingface/transformers@${env.version}/dist/`
+  wasmBackend.wasmPaths = cdnBase
+  envConfigured = true
+}
+
+async function ensureEncoder() {
+  if (classifier) return
+  if (!cfg?.model) {
+    throw new Error("semantic worker missing model identifier")
+  }
+  configureRuntimeEnv()
+  const dtype = typeof cfg?.dtype === "string" && cfg.dtype.length > 0 ? cfg.dtype : "fp32"
+  const pipelineOpts: Record<string, unknown> = {
+    device: "wasm",
+    dtype,
+    local_files_only: false,
+  }
+  classifier = await pipeline("feature-extraction", cfg.model, pipelineOpts)
+  cfg.dtype = dtype
+}
+
+function vectorSlice(id: number): Float32Array {
+  if (!vectorsView) {
+    throw new Error("vector buffer not configured")
+  }
+  const start = id * dims
+  const end = start + dims
+  return vectorsView.subarray(start, end)
+}
+
+function dot(a: Float32Array, b: Float32Array): number {
+  let s = 0
+  for (let i = 0; i < dims; i++) {
+    s += a[i] * b[i]
+  }
+  return s
+}
+
+function neighborsFor(level: number, node: number): Uint32Array {
+  const meta = levelGraph[level]
+  if (!meta) return new Uint32Array()
+  const { indptr, indices } = meta
+  if (node < 0 || node + 1 >= indptr.length) return new Uint32Array()
+  const start = indptr[node]
+  const end = indptr[node + 1]
+  return indices.subarray(start, end)
+}
+
+function insertSortedDescending(arr: SearchHit[], item: SearchHit) {
+  let idx = arr.length
+  while (idx > 0 && arr[idx - 1].score < item.score) {
+    idx -= 1
+  }
+  arr.splice(idx, 0, item)
+}
+
+function bruteForceSearch(query: Float32Array, k: number): SearchHit[] {
+  if (!vectorsView) return []
+  const hits: SearchHit[] = []
+  for (let id = 0; id < rows; id++) {
+    const score = dot(query, vectorSlice(id))
+    if (hits.length < k) {
+      insertSortedDescending(hits, { id, score })
+    } else if (score > hits[hits.length - 1].score) {
+      insertSortedDescending(hits, { id, score })
+      hits.length = k
+    }
+  }
+  return hits
+}
+
+function hnswSearch(query: Float32Array, k: number): SearchHit[] {
+  if (!manifest || !vectorsView || entryPoint < 0 || levelGraph.length === 0) {
+    return bruteForceSearch(query, k)
+  }
+  const ef = Math.max(efDefault, k * 10)
+  let ep = entryPoint
+  let epScore = dot(query, vectorSlice(ep))
+  for (let level = maxLevel; level > 0; level--) {
+    let changed = true
+    while (changed) {
+      changed = false
+      const neigh = neighborsFor(level, ep)
+      for (let i = 0; i < neigh.length; i++) {
+        const candidate = neigh[i]
+        if (candidate >= rows) continue
+        const score = dot(query, vectorSlice(candidate))
+        if (score > epScore) {
+          epScore = score
+          ep = candidate
+          changed = true
+        }
+      }
+    }
+  }
+
+  const visited = new Set<number>()
+  const candidateQueue: SearchHit[] = []
+  const best: SearchHit[] = []
+  insertSortedDescending(candidateQueue, { id: ep, score: epScore })
+  insertSortedDescending(best, { id: ep, score: epScore })
+  visited.add(ep)
+
+  while (candidateQueue.length > 0) {
+    const current = candidateQueue.shift()!
+    const worstBest = best.length >= ef ? best[best.length - 1].score : -Infinity
+    if (current.score < worstBest && best.length >= ef) {
+      break
+    }
+    const neigh = neighborsFor(0, current.id)
+    for (let i = 0; i < neigh.length; i++) {
+      const candidate = neigh[i]
+      if (candidate >= rows || visited.has(candidate)) continue
+      visited.add(candidate)
+      const score = dot(query, vectorSlice(candidate))
+      const hit = { id: candidate, score }
+      insertSortedDescending(candidateQueue, hit)
+      if (best.length < ef || score > best[best.length - 1].score) {
+        insertSortedDescending(best, hit)
+        if (best.length > ef) {
+          best.pop()
+        }
+      }
+    }
+  }
+
+  best.sort((a, b) => b.score - a.score)
+  return best.slice(0, k)
+}
+
+async function embed(text: string, isQuery: boolean = false): Promise<Float32Array> {
+  await ensureEncoder()
+  // Apply model-specific prefixes for asymmetric search
+  let prefixedText = text
+  if (cfg?.model) {
+    const modelName = cfg.model.toLowerCase()
+    switch (true) {
+      case modelName.includes("e5"): {
+        // E5 models require query: or passage: prefix
+        prefixedText = isQuery ? `query: ${text}` : `passage: ${text}`
+        break
+      }
+      case modelName.includes("qwen") && modelName.includes("embedding"): {
+        // Qwen3-Embedding requires task instruction for queries only
+        if (isQuery) {
+          const task = "Given a web search query, retrieve relevant passages that answer the query"
+          prefixedText = `Instruct: ${task}\nQuery: ${text}`
+        }
+        // Documents use plain text (no prefix)
+        break
+      }
+      case modelName.includes("embeddinggemma"): {
+        // embeddinggemma requires specific prefixes
+        prefixedText = isQuery
+          ? `task: search result | query: ${text}`
+          : `title: none | text: ${text}`
+        break
+      }
+      default:
+        break
+    }
+  }
+  const out = await classifier(prefixedText, { pooling: "mean", normalize: true })
+  const data = Array.from(out?.data ?? out) as number[]
+  const vec = new Float32Array(dims)
+  for (let i = 0; i < dims; i++) vec[i] = data[i] ?? 0
+  return vec
+}
+
+async function handleInit(msg: InitMessage) {
+  if (state === "loading" || state === "ready") {
+    throw new Error("worker already initialized or loading")
+  }
+
+  state = "loading"
+  abortController?.abort()
+  abortController = new AbortController()
+
+  try {
+    cfg = msg.cfg
+
+    const manifestUrl = toAbsolute(msg.manifestUrl, msg.baseUrl)
+    const response = await fetch(manifestUrl, { signal: abortController.signal })
+    if (!response.ok) {
+      throw new Error(
+        `failed to fetch manifest ${manifestUrl}: ${response.status} ${response.statusText}`,
+      )
+    }
+    manifest = (await response.json()) as Manifest
+
+    if (manifest.vectors.dtype !== "fp32") {
+      throw new Error(
+        `unsupported embedding dtype '${manifest.vectors.dtype}', regenerate with fp32`,
+      )
+    }
+
+    dims = manifest.dims
+    rows = manifest.rows
+
+    const { buffer: vectorBuffer } = await populateVectors(manifest, msg.baseUrl, msg.disableCache)
+    vectorsView = vectorBuffer
+
+    const graphBuffer = await populateGraph(manifest, msg.baseUrl, msg.disableCache)
+
+    entryPoint = manifest.hnsw.entryPoint
+    maxLevel = manifest.hnsw.maxLevel
+    efDefault = Math.max(64, manifest.hnsw.M * 4)
+    levelGraph = manifest.hnsw.graph.levels.map((level) => {
+      const indptr = new Uint32Array(graphBuffer, level.indptr.offset, level.indptr.elements)
+      const indices = new Uint32Array(graphBuffer, level.indices.offset, level.indices.elements)
+      return { indptr, indices }
+    })
+
+    state = "ready"
+    const ready: ReadyMessage = { type: "ready" }
+    self.postMessage(ready)
+  } catch (err) {
+    state = "error"
+    throw err
+  }
+}
+
+async function handleSearch(msg: SearchMessage) {
+  if (state !== "ready") {
+    throw new Error("worker not ready for search")
+  }
+  if (!manifest || !vectorsView) {
+    throw new Error("semantic worker not configured")
+  }
+
+  const queryVec = await embed(msg.text, true)
+  const semanticHits = hnswSearch(queryVec, Math.max(1, msg.k))
+  const message: SearchResultMessage = {
+    type: "search-result",
+    seq: msg.seq,
+    semantic: semanticHits,
+  }
+  self.postMessage(message)
+}
+
+function handleReset() {
+  abortController?.abort()
+  abortController = null
+  state = "idle"
+  manifest = null
+  cfg = null
+  vectorsView = null
+  dims = 0
+  rows = 0
+  classifier = null
+  envConfigured = false
+  levelGraph = []
+  entryPoint = -1
+  maxLevel = 0
+}
+
+self.onmessage = (event: MessageEvent<WorkerMessage>) => {
+  const data = event.data
+
+  if (data.type === "reset") {
+    handleReset()
+    return
+  }
+
+  if (data.type === "init") {
+    void handleInit(data).catch((err: unknown) => {
+      const message: ErrorMessage = {
+        type: "error",
+        message: err instanceof Error ? err.message : String(err),
+      }
+      self.postMessage(message)
+    })
+    return
+  }
+
+  if (data.type === "search") {
+    void handleSearch(data).catch((err: unknown) => {
+      const message: ErrorMessage = {
+        type: "error",
+        seq: data.seq,
+        message: err instanceof Error ? err.message : String(err),
+      }
+      self.postMessage(message)
+    })
+  }
+}
Author	SHA1	Message	Date
Aaron Pham	eb8a4cce18	revert: redudant changes Signed-off-by: Aaron Pham <contact@aarnphm.xyz>	2025-10-05 20:04:19 -04:00
Aaron Pham	68682a8fe3	chore: revert vault specific branch Signed-off-by: Aaron Pham <contact@aarnphm.xyz>	2025-10-05 19:58:20 -04:00
Aaron Pham	f533902c75	feat: semantic search (1/n) Signed-off-by: Aaron Pham <contact@aarnphm.xyz>	2025-10-05 19:50:52 -04:00
martyone	f14260b2ba	fix(oxhugo): Do not discard embedded HTML (#2151 ) In 'processors/parse.ts' the 'remarkRehype' plugin is used with 'allowDangerousHtml' enabled, but that needs to be combined with (e.g.) 'rehypeRaw' to have any effect on the output.	2025-10-02 10:51:40 -07:00
dependabot[bot]	9ad3481da6	chore(deps): bump the production-dependencies group with 4 updates (#2146 ) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-09-29 22:41:50 -04:00
dependabot[bot]	3ff7ca4155	chore(deps): bump sigstore/cosign-installer in the ci-dependencies group (#2135 ) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-09-28 16:45:18 -04:00
dependabot[bot]	eb6cc6ff8e	chore(deps): bump the production-dependencies group with 9 updates (#2134 ) Bumps the production-dependencies group with 9 updates: \| Package \| From \| To \| \| --- \| --- \| --- \| \| [is-absolute-url](https://github.com/sindresorhus/is-absolute-url) \| `4.0.1` \| `5.0.0` \| \| [pixi.js](https://github.com/pixijs/pixijs) \| `8.13.1` \| `8.13.2` \| \| [preact](https://github.com/preactjs/preact) \| `10.27.1` \| `10.27.2` \| \| [pretty-bytes](https://github.com/sindresorhus/pretty-bytes) \| `7.0.1` \| `7.1.0` \| \| [satori](https://github.com/vercel/satori) \| `0.18.2` \| `0.18.3` \| \| [sharp](https://github.com/lovell/sharp) \| `0.34.3` \| `0.34.4` \| \| [workerpool](https://github.com/josdejong/workerpool) \| `9.3.3` \| `9.3.4` \| \| [@types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node) \| `24.3.1` \| `24.5.2` \| \| [esbuild](https://github.com/evanw/esbuild) \| `0.25.9` \| `0.25.10` \| Updates `is-absolute-url` from 4.0.1 to 5.0.0 - [Release notes](https://github.com/sindresorhus/is-absolute-url/releases) - [Commits](https://github.com/sindresorhus/is-absolute-url/compare/v4.0.1...v5.0.0) Updates `pixi.js` from 8.13.1 to 8.13.2 - [Release notes](https://github.com/pixijs/pixijs/releases) - [Commits](https://github.com/pixijs/pixijs/compare/v8.13.1...v8.13.2) Updates `preact` from 10.27.1 to 10.27.2 - [Release notes](https://github.com/preactjs/preact/releases) - [Commits](https://github.com/preactjs/preact/compare/10.27.1...10.27.2) Updates `pretty-bytes` from 7.0.1 to 7.1.0 - [Release notes](https://github.com/sindresorhus/pretty-bytes/releases) - [Commits](https://github.com/sindresorhus/pretty-bytes/compare/v7.0.1...v7.1.0) Updates `satori` from 0.18.2 to 0.18.3 - [Release notes](https://github.com/vercel/satori/releases) - [Commits](https://github.com/vercel/satori/compare/0.18.2...0.18.3) Updates `sharp` from 0.34.3 to 0.34.4 - [Release notes](https://github.com/lovell/sharp/releases) - [Commits](https://github.com/lovell/sharp/compare/v0.34.3...v0.34.4) Updates `workerpool` from 9.3.3 to 9.3.4 - [Changelog](https://github.com/josdejong/workerpool/blob/master/HISTORY.md) - [Commits](https://github.com/josdejong/workerpool/commits) Updates `@types/node` from 24.3.1 to 24.5.2 - [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases) - [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/node) Updates `esbuild` from 0.25.9 to 0.25.10 - [Release notes](https://github.com/evanw/esbuild/releases) - [Changelog](https://github.com/evanw/esbuild/blob/main/CHANGELOG.md) - [Commits](https://github.com/evanw/esbuild/compare/v0.25.9...v0.25.10) --- updated-dependencies: - dependency-name: is-absolute-url dependency-version: 5.0.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: production-dependencies - dependency-name: pixi.js dependency-version: 8.13.2 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: production-dependencies - dependency-name: preact dependency-version: 10.27.2 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: production-dependencies - dependency-name: pretty-bytes dependency-version: 7.1.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: production-dependencies - dependency-name: satori dependency-version: 0.18.3 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: production-dependencies - dependency-name: sharp dependency-version: 0.34.4 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: production-dependencies - dependency-name: workerpool dependency-version: 9.3.4 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: production-dependencies - dependency-name: "@types/node" dependency-version: 24.5.2 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: production-dependencies - dependency-name: esbuild dependency-version: 0.25.10 dependency-type: direct:development update-type: version-update:semver-patch dependency-group: production-dependencies ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2025-09-22 13:11:44 -07:00