mirror of
https://github.com/jackyzha0/quartz.git
synced 2025-12-17 01:57:58 +01:00
Compare commits
3 Commits
dependabot
...
feat/seman
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
eb8a4cce18 | ||
|
|
68682a8fe3 | ||
|
|
f533902c75 |
BIN
docs/embeddings/hnsw.bin
Normal file
BIN
docs/embeddings/hnsw.bin
Normal file
Binary file not shown.
1
docs/embeddings/manifest.json
Normal file
1
docs/embeddings/manifest.json
Normal file
File diff suppressed because one or more lines are too long
BIN
docs/embeddings/vectors-000.bin
Normal file
BIN
docs/embeddings/vectors-000.bin
Normal file
Binary file not shown.
1
index.d.ts
vendored
1
index.d.ts
vendored
@@ -13,3 +13,4 @@ interface CustomEventMap {
|
||||
|
||||
type ContentIndex = Record<FullSlug, ContentDetails>
|
||||
declare const fetchData: Promise<ContentIndex>
|
||||
declare const semanticCfg: import("./quartz/cfg").GlobalConfiguration["semanticSearch"]
|
||||
|
||||
810
package-lock.json
generated
810
package-lock.json
generated
@@ -11,6 +11,7 @@
|
||||
"dependencies": {
|
||||
"@clack/prompts": "^0.11.0",
|
||||
"@floating-ui/dom": "^1.7.4",
|
||||
"@huggingface/transformers": "^3.7.5",
|
||||
"@myriaddreamin/rehype-typst": "^0.6.0",
|
||||
"@napi-rs/simple-git": "0.1.22",
|
||||
"@tweenjs/tween.js": "^25.0.0",
|
||||
@@ -35,6 +36,7 @@
|
||||
"mdast-util-to-string": "^4.0.0",
|
||||
"micromorph": "^0.4.5",
|
||||
"minimatch": "^10.0.3",
|
||||
"onnxruntime-web": "^1.23.0",
|
||||
"pixi.js": "^8.13.2",
|
||||
"preact": "^10.27.2",
|
||||
"preact-render-to-string": "^6.6.1",
|
||||
@@ -647,6 +649,47 @@
|
||||
"integrity": "sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@huggingface/jinja": {
|
||||
"version": "0.5.1",
|
||||
"resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.1.tgz",
|
||||
"integrity": "sha512-yUZLld4lrM9iFxHCwFQ7D1HW2MWMwSbeB7WzWqFYDWK+rEb+WldkLdAJxUPOmgICMHZLzZGVcVjFh3w/YGubng==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@huggingface/transformers": {
|
||||
"version": "3.7.5",
|
||||
"resolved": "https://registry.npmjs.org/@huggingface/transformers/-/transformers-3.7.5.tgz",
|
||||
"integrity": "sha512-5jvrIwHyRXfOKVaGKYvUZM6ZjJKQXWeKzIOdKBE5pdzPSNzTwBNx5NdWcGElf4Ddv7Dl2mWsvJh+G5RnCUxMmA==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@huggingface/jinja": "^0.5.1",
|
||||
"onnxruntime-node": "1.21.0",
|
||||
"onnxruntime-web": "1.22.0-dev.20250409-89f8206ba4",
|
||||
"sharp": "^0.34.1"
|
||||
}
|
||||
},
|
||||
"node_modules/@huggingface/transformers/node_modules/onnxruntime-common": {
|
||||
"version": "1.22.0-dev.20250409-89f8206ba4",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.22.0-dev.20250409-89f8206ba4.tgz",
|
||||
"integrity": "sha512-vDJMkfCfb0b1A836rgHj+ORuZf4B4+cc2bASQtpeoJLueuFc5DuYwjIZUBrSvx/fO5IrLjLz+oTrB3pcGlhovQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@huggingface/transformers/node_modules/onnxruntime-web": {
|
||||
"version": "1.22.0-dev.20250409-89f8206ba4",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.22.0-dev.20250409-89f8206ba4.tgz",
|
||||
"integrity": "sha512-0uS76OPgH0hWCPrFKlL8kYVV7ckM7t/36HfbgoFw6Nd0CZVVbQC4PkrR8mBX8LtNUFZO25IQBqV2Hx2ho3FlbQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"flatbuffers": "^25.1.24",
|
||||
"guid-typescript": "^1.0.9",
|
||||
"long": "^5.2.3",
|
||||
"onnxruntime-common": "1.22.0-dev.20250409-89f8206ba4",
|
||||
"platform": "^1.3.6",
|
||||
"protobufjs": "^7.2.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/colour": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.0.0.tgz",
|
||||
@@ -1095,6 +1138,18 @@
|
||||
"node": "20 || >=22"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/fs-minipass": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
|
||||
"integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"minipass": "^7.0.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@myriaddreamin/rehype-typst": {
|
||||
"version": "0.6.0",
|
||||
"resolved": "https://registry.npmjs.org/@myriaddreamin/rehype-typst/-/rehype-typst-0.6.0.tgz",
|
||||
@@ -1616,6 +1671,70 @@
|
||||
"integrity": "sha512-nezytU2pw587fQstUu1AsJZDVEynjskwOL+kibwcdxsMBFqPsFFNA7xl0ii/gXuDi6M0xj3mfRJj8pBSc2jCfA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@protobufjs/aspromise": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
|
||||
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/base64": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
|
||||
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/codegen": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
|
||||
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/eventemitter": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
|
||||
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/fetch": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
|
||||
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.1",
|
||||
"@protobufjs/inquire": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@protobufjs/float": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
|
||||
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/inquire": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
|
||||
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/path": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
|
||||
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/pool": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
|
||||
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/utf8": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
|
||||
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@shikijs/core": {
|
||||
"version": "1.26.2",
|
||||
"resolved": "https://registry.npmjs.org/@shikijs/core/-/core-1.26.2.tgz",
|
||||
@@ -2053,7 +2172,6 @@
|
||||
"version": "24.6.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.6.0.tgz",
|
||||
"integrity": "sha512-F1CBxgqwOMc4GKJ7eY22hWhBVQuMYTtqI8L0FcszYcpYX0fzfDGpez22Xau8Mgm7O9fI+zA/TYIdq3tGWfweBA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~7.13.0"
|
||||
@@ -2223,6 +2341,13 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"node_modules/boolean": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz",
|
||||
"integrity": "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw==",
|
||||
"deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info.",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/braces": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
|
||||
@@ -2345,6 +2470,15 @@
|
||||
"url": "https://paulmillr.com/funding/"
|
||||
}
|
||||
},
|
||||
"node_modules/chownr": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz",
|
||||
"integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==",
|
||||
"license": "BlueOak-1.0.0",
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/citeproc": {
|
||||
"version": "2.4.63",
|
||||
"resolved": "https://registry.npmjs.org/citeproc/-/citeproc-2.4.63.tgz",
|
||||
@@ -2861,6 +2995,40 @@
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/define-data-property": {
|
||||
"version": "1.1.4",
|
||||
"resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
|
||||
"integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"es-define-property": "^1.0.0",
|
||||
"es-errors": "^1.3.0",
|
||||
"gopd": "^1.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/define-properties": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz",
|
||||
"integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"define-data-property": "^1.0.1",
|
||||
"has-property-descriptors": "^1.0.0",
|
||||
"object-keys": "^1.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/delaunator": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/delaunator/-/delaunator-5.0.0.tgz",
|
||||
@@ -2886,6 +3054,12 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/detect-node": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.1.0.tgz",
|
||||
"integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/devlop": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz",
|
||||
@@ -2926,6 +3100,30 @@
|
||||
"url": "https://github.com/fb55/entities?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/es-define-property": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
|
||||
"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/es-errors": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
|
||||
"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/es6-error": {
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/es6-error/-/es6-error-4.1.1.tgz",
|
||||
"integrity": "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/esbuild": {
|
||||
"version": "0.25.10",
|
||||
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.10.tgz",
|
||||
@@ -3164,6 +3362,12 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/flatbuffers": {
|
||||
"version": "25.9.23",
|
||||
"resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-25.9.23.tgz",
|
||||
"integrity": "sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/flexsearch": {
|
||||
"version": "0.8.205",
|
||||
"resolved": "https://registry.npmjs.org/flexsearch/-/flexsearch-0.8.205.tgz",
|
||||
@@ -3284,6 +3488,39 @@
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/global-agent": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/global-agent/-/global-agent-3.0.0.tgz",
|
||||
"integrity": "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q==",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"boolean": "^3.0.1",
|
||||
"es6-error": "^4.1.1",
|
||||
"matcher": "^3.0.0",
|
||||
"roarr": "^2.15.3",
|
||||
"semver": "^7.3.2",
|
||||
"serialize-error": "^7.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/globalthis": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz",
|
||||
"integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"define-properties": "^1.2.1",
|
||||
"gopd": "^1.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/globby": {
|
||||
"version": "15.0.0",
|
||||
"resolved": "https://registry.npmjs.org/globby/-/globby-15.0.0.tgz",
|
||||
@@ -3304,6 +3541,18 @@
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/gopd": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
|
||||
"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/gray-matter": {
|
||||
"version": "4.0.3",
|
||||
"resolved": "https://registry.npmjs.org/gray-matter/-/gray-matter-4.0.3.tgz",
|
||||
@@ -3338,6 +3587,12 @@
|
||||
"js-yaml": "bin/js-yaml.js"
|
||||
}
|
||||
},
|
||||
"node_modules/guid-typescript": {
|
||||
"version": "1.0.9",
|
||||
"resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
|
||||
"integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/has-flag": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
|
||||
@@ -3347,6 +3602,18 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/has-property-descriptors": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
|
||||
"integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"es-define-property": "^1.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/hasown": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.0.tgz",
|
||||
@@ -3932,6 +4199,12 @@
|
||||
"js-yaml": "bin/js-yaml.js"
|
||||
}
|
||||
},
|
||||
"node_modules/json-stringify-safe": {
|
||||
"version": "5.0.1",
|
||||
"resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
|
||||
"integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/katex": {
|
||||
"version": "0.16.21",
|
||||
"resolved": "https://registry.npmjs.org/katex/-/katex-0.16.21.tgz",
|
||||
@@ -4232,6 +4505,12 @@
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/long": {
|
||||
"version": "5.3.2",
|
||||
"resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
|
||||
"integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/longest-streak": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/longest-streak/-/longest-streak-3.1.0.tgz",
|
||||
@@ -4250,6 +4529,30 @@
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/matcher": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/matcher/-/matcher-3.0.0.tgz",
|
||||
"integrity": "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"escape-string-regexp": "^4.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/matcher/node_modules/escape-string-regexp": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
|
||||
"integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/mathjax-full": {
|
||||
"version": "3.2.2",
|
||||
"resolved": "https://registry.npmjs.org/mathjax-full/-/mathjax-full-3.2.2.tgz",
|
||||
@@ -5250,6 +5553,27 @@
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/minipass": {
|
||||
"version": "7.1.2",
|
||||
"resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz",
|
||||
"integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==",
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
"node": ">=16 || 14 >=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/minizlib": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.1.0.tgz",
|
||||
"integrity": "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"minipass": "^7.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
},
|
||||
"node_modules/mj-context-menu": {
|
||||
"version": "0.6.1",
|
||||
"resolved": "https://registry.npmjs.org/mj-context-menu/-/mj-context-menu-0.6.1.tgz",
|
||||
@@ -5316,6 +5640,15 @@
|
||||
"webidl-conversions": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/object-keys": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz",
|
||||
"integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/oniguruma-to-es": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/oniguruma-to-es/-/oniguruma-to-es-1.0.0.tgz",
|
||||
@@ -5326,6 +5659,49 @@
|
||||
"regex-recursion": "^5.1.1"
|
||||
}
|
||||
},
|
||||
"node_modules/onnxruntime-common": {
|
||||
"version": "1.21.0",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.21.0.tgz",
|
||||
"integrity": "sha512-Q632iLLrtCAVOTO65dh2+mNbQir/QNTVBG3h/QdZBpns7mZ0RYbLRBgGABPbpU9351AgYy7SJf1WaeVwMrBFPQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/onnxruntime-node": {
|
||||
"version": "1.21.0",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.21.0.tgz",
|
||||
"integrity": "sha512-NeaCX6WW2L8cRCSqy3bInlo5ojjQqu2fD3D+9W5qb5irwxhEyWKXeH2vZ8W9r6VxaMPUan+4/7NDwZMtouZxEw==",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"os": [
|
||||
"win32",
|
||||
"darwin",
|
||||
"linux"
|
||||
],
|
||||
"dependencies": {
|
||||
"global-agent": "^3.0.0",
|
||||
"onnxruntime-common": "1.21.0",
|
||||
"tar": "^7.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/onnxruntime-web": {
|
||||
"version": "1.23.0",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.23.0.tgz",
|
||||
"integrity": "sha512-w0bvC2RwDxphOUFF8jFGZ/dYw+duaX20jM6V4BIZJPCfK4QuCpB/pVREV+hjYbT3x4hyfa2ZbTaWx4e1Vot0fQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"flatbuffers": "^25.1.24",
|
||||
"guid-typescript": "^1.0.9",
|
||||
"long": "^5.2.3",
|
||||
"onnxruntime-common": "1.23.0",
|
||||
"platform": "^1.3.6",
|
||||
"protobufjs": "^7.2.4"
|
||||
}
|
||||
},
|
||||
"node_modules/onnxruntime-web/node_modules/onnxruntime-common": {
|
||||
"version": "1.23.0",
|
||||
"resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.23.0.tgz",
|
||||
"integrity": "sha512-Auz8S9D7vpF8ok7fzTobvD1XdQDftRf/S7pHmjeCr3Xdymi4z1C7zx4vnT6nnUjbpelZdGwda0BmWHCCTMKUTg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/pako": {
|
||||
"version": "0.2.9",
|
||||
"resolved": "https://registry.npmjs.org/pako/-/pako-0.2.9.tgz",
|
||||
@@ -5473,6 +5849,12 @@
|
||||
"url": "https://opencollective.com/pixijs"
|
||||
}
|
||||
},
|
||||
"node_modules/platform": {
|
||||
"version": "1.3.6",
|
||||
"resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
|
||||
"integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/postcss-value-parser": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz",
|
||||
@@ -5543,6 +5925,30 @@
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/protobufjs": {
|
||||
"version": "7.5.4",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz",
|
||||
"integrity": "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==",
|
||||
"hasInstallScript": true,
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.2",
|
||||
"@protobufjs/base64": "^1.1.2",
|
||||
"@protobufjs/codegen": "^2.0.4",
|
||||
"@protobufjs/eventemitter": "^1.1.0",
|
||||
"@protobufjs/fetch": "^1.1.0",
|
||||
"@protobufjs/float": "^1.0.2",
|
||||
"@protobufjs/inquire": "^1.1.0",
|
||||
"@protobufjs/path": "^1.1.2",
|
||||
"@protobufjs/pool": "^1.1.0",
|
||||
"@protobufjs/utf8": "^1.1.0",
|
||||
"@types/node": ">=13.7.0",
|
||||
"long": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/queue-microtask": {
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
|
||||
@@ -6002,6 +6408,29 @@
|
||||
"resolved": "https://registry.npmjs.org/rfdc/-/rfdc-1.4.1.tgz",
|
||||
"integrity": "sha512-q1b3N5QkRUWUl7iyylaaj3kOpIT0N2i9MqIEQXP73GVsN9cw3fdx8X63cEmWhJGi2PPCF23Ijp7ktmd39rawIA=="
|
||||
},
|
||||
"node_modules/roarr": {
|
||||
"version": "2.15.4",
|
||||
"resolved": "https://registry.npmjs.org/roarr/-/roarr-2.15.4.tgz",
|
||||
"integrity": "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A==",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"boolean": "^3.0.1",
|
||||
"detect-node": "^2.0.4",
|
||||
"globalthis": "^1.0.1",
|
||||
"json-stringify-safe": "^5.0.1",
|
||||
"semver-compare": "^1.0.0",
|
||||
"sprintf-js": "^1.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8.0"
|
||||
}
|
||||
},
|
||||
"node_modules/roarr/node_modules/sprintf-js": {
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz",
|
||||
"integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/robust-predicates": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.2.tgz",
|
||||
@@ -6113,326 +6542,6 @@
|
||||
"sass-embedded-win32-x64": "1.79.4"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-android-arm": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-android-arm/-/sass-embedded-android-arm-1.79.4.tgz",
|
||||
"integrity": "sha512-YOVpDGDcwWUQvktpJhYo4zOkknDpdX6ALpaeHDTX6GBUvnZfx+Widh76v+QFUhiJQ/I/hndXg1jv/PKilOHRrw==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-android-arm64": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-android-arm64/-/sass-embedded-android-arm64-1.79.4.tgz",
|
||||
"integrity": "sha512-0JAZ8TtXYv9yI3Yasaq03xvo7DLJOmD+Exb30oJKxXcWTAV9TB0ZWKoIRsFxbCyPxyn7ouxkaCEXQtaTRKrmfw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-android-ia32": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-android-ia32/-/sass-embedded-android-ia32-1.79.4.tgz",
|
||||
"integrity": "sha512-IjO3RoyvNN84ZyfAR5s/a8TIdNPfClb7CLGrswB3BN/NElYIJUJMVHD6+Y8W9QwBIZ8DrK1IdLFSTV8nn82xMA==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-android-riscv64": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-android-riscv64/-/sass-embedded-android-riscv64-1.79.4.tgz",
|
||||
"integrity": "sha512-uOT8nXmKxSwuIdcqvElVWBFcm/+YcIvmwfoKbpuuSOSxUe9eqFzxo+fk7ILhynzf6FBlvRUH5DcjGj+sXtCc3w==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-android-x64": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-android-x64/-/sass-embedded-android-x64-1.79.4.tgz",
|
||||
"integrity": "sha512-W2FQoj3Z2J2DirNs3xSBVvrhMuqLnsqvOPulxOkhL/074+faKOZZnPx2tZ5zsHbY97SonciiU0SV0mm98xI42w==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-darwin-arm64": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-darwin-arm64/-/sass-embedded-darwin-arm64-1.79.4.tgz",
|
||||
"integrity": "sha512-pcYtbN1VUAAcfgyHeX8ySndDWGjIvcq6rldduktPbGGuAlEWFDfnwjTbv0hS945ggdzZ6TFnaFlLEDr0SjKzBA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-darwin-x64": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-darwin-x64/-/sass-embedded-darwin-x64-1.79.4.tgz",
|
||||
"integrity": "sha512-ir8CFTfc4JLx/qCP8LK1/3pWv35nRyAQkUK7lBIKM6hWzztt64gcno9rZIk4SpHr7Z/Bp1IYWWRS4ZT+4HmsbA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-linux-arm": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-linux-arm/-/sass-embedded-linux-arm-1.79.4.tgz",
|
||||
"integrity": "sha512-H/XEE3rY7c+tY0qDaELjPjC6VheAhBo1tPJQ6UHoBEf8xrbT/RT3dWiIS8grp9Vk54RCn05BEB/+POaljvvKGA==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-linux-arm64": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-linux-arm64/-/sass-embedded-linux-arm64-1.79.4.tgz",
|
||||
"integrity": "sha512-XIVn2mCuA422SR2kmKjF6jhjMs1Vrt1DbZ/ktSp+eR0sU4ugu2htg45GajiUFSKKRj7Sc+cBdThq1zPPsDLf1w==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-linux-ia32": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-linux-ia32/-/sass-embedded-linux-ia32-1.79.4.tgz",
|
||||
"integrity": "sha512-3nqZxV4nuUTb1ahLexVl4hsnx1KKwiGdHEf1xHWTZai6fYFMcwyNPrHySCQzFHqb5xiqSpPzzrKjuDhF6+guuQ==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-linux-musl-arm": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-arm/-/sass-embedded-linux-musl-arm-1.79.4.tgz",
|
||||
"integrity": "sha512-HnbU1DEiQdUayioNzxh2WlbTEgQRBPTgIIvof8J63QLmVItUqE7EkWYkSUy4RhO+8NsuN9wzGmGTzFBvTImU7g==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-linux-musl-arm64": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-arm64/-/sass-embedded-linux-musl-arm64-1.79.4.tgz",
|
||||
"integrity": "sha512-C6qX06waPEfDgOHR8jXoYxl0EtIXOyBDyyonrLO3StRjWjGx7XMQj2hA/KXSsV+Hr71fBOsaViosqWXPzTbEiQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-linux-musl-ia32": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-ia32/-/sass-embedded-linux-musl-ia32-1.79.4.tgz",
|
||||
"integrity": "sha512-y5b0fdOPWyhj4c+mc88GvQiC5onRH1V0iNaWNjsiZ+L4hHje6T98nDLrCJn0fz5GQnXjyLCLZduMWbfV0QjHGg==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-linux-musl-riscv64": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-riscv64/-/sass-embedded-linux-musl-riscv64-1.79.4.tgz",
|
||||
"integrity": "sha512-G2M5ADMV9SqnkwpM0S+UzDz7xR2njCOhofku/sDMZABzAjQQWTsAykKoGmzlT98fTw2HbNhb6u74umf2WLhCfw==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-linux-musl-x64": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-linux-musl-x64/-/sass-embedded-linux-musl-x64-1.79.4.tgz",
|
||||
"integrity": "sha512-kQm8dCU3DXf7DtUGWYPiPs03KJYKvFeiZJHhSx993DCM8D2b0wCXWky0S0Z46gf1sEur0SN4Lvnt1WczTqxIBw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-linux-riscv64": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-linux-riscv64/-/sass-embedded-linux-riscv64-1.79.4.tgz",
|
||||
"integrity": "sha512-GaTI/mXYWYSzG5wxtM4H2cozLpATyh+4l+rO9FFKOL8e1sUOLAzTeRdU2nSBYCuRqsxRuTZIwCXhSz9Q3NRuNA==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-linux-x64": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-linux-x64/-/sass-embedded-linux-x64-1.79.4.tgz",
|
||||
"integrity": "sha512-f9laGkqHgC01h99Qt4LsOV+OLMffjvUcTu14hYWqMS9QVX5a4ihMwpf1NoAtTUytb7cVF3rYY/NVGuXt6G3ppQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-win32-arm64": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-win32-arm64/-/sass-embedded-win32-arm64-1.79.4.tgz",
|
||||
"integrity": "sha512-cidBvtaA2cJ6dNlwQEa8qak+ezypurzKs0h0QAHLH324+j/6Jum7LCnQhZRPYJBFjHl+WYd7KwzPnJ2X5USWnQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-win32-ia32": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-win32-ia32/-/sass-embedded-win32-ia32-1.79.4.tgz",
|
||||
"integrity": "sha512-hexdmNTIZGTKNTzlMcdvEXzYuxOJcY89zqgsf45aQ2YMy4y2M8dTOxRI/Vz7p4iRxVp1Jow6LCtaLHrNI2Ordg==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/sass-embedded-win32-x64": {
|
||||
"version": "1.79.4",
|
||||
"resolved": "https://registry.npmjs.org/sass-embedded-win32-x64/-/sass-embedded-win32-x64-1.79.4.tgz",
|
||||
"integrity": "sha512-73yrpiWIbti6DkxhWURklkgSLYKfU9itDmvHxB+oYSb4vQveIApqTwSyTOuIUb/6Da/EsgEpdJ4Lbj4sLaMZWA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/satori": {
|
||||
"version": "0.18.3",
|
||||
"resolved": "https://registry.npmjs.org/satori/-/satori-0.18.3.tgz",
|
||||
@@ -6488,6 +6597,27 @@
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/semver-compare": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/semver-compare/-/semver-compare-1.0.0.tgz",
|
||||
"integrity": "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/serialize-error": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-7.0.1.tgz",
|
||||
"integrity": "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"type-fest": "^0.13.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/serve-handler": {
|
||||
"version": "6.1.6",
|
||||
"resolved": "https://registry.npmjs.org/serve-handler/-/serve-handler-6.1.6.tgz",
|
||||
@@ -6771,6 +6901,22 @@
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/tar": {
|
||||
"version": "7.5.1",
|
||||
"resolved": "https://registry.npmjs.org/tar/-/tar-7.5.1.tgz",
|
||||
"integrity": "sha512-nlGpxf+hv0v7GkWBK2V9spgactGOp0qvfWRxUMjqHyzrt3SgwE48DIv/FhqPHJYLHpgW1opq3nERbz5Anq7n1g==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@isaacs/fs-minipass": "^4.0.0",
|
||||
"chownr": "^3.0.0",
|
||||
"minipass": "^7.1.2",
|
||||
"minizlib": "^3.1.0",
|
||||
"yallist": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/tiny-inflate": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmjs.org/tiny-inflate/-/tiny-inflate-1.0.3.tgz",
|
||||
@@ -6858,6 +7004,18 @@
|
||||
"fsevents": "~2.3.3"
|
||||
}
|
||||
},
|
||||
"node_modules/type-fest": {
|
||||
"version": "0.13.1",
|
||||
"resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz",
|
||||
"integrity": "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg==",
|
||||
"license": "(MIT OR CC0-1.0)",
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript": {
|
||||
"version": "5.9.2",
|
||||
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.2.tgz",
|
||||
@@ -6876,7 +7034,6 @@
|
||||
"version": "7.13.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.13.0.tgz",
|
||||
"integrity": "sha512-Ov2Rr9Sx+fRgagJ5AX0qvItZG/JKKoBRAVITs1zk7IqZGTJUwgUr7qoYBpWwakpWilTZFM98rG/AFRocu10iIQ==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/unicode-trie": {
|
||||
@@ -7211,6 +7368,15 @@
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/yallist": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
|
||||
"integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==",
|
||||
"license": "BlueOak-1.0.0",
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/yargs": {
|
||||
"version": "18.0.0",
|
||||
"resolved": "https://registry.npmjs.org/yargs/-/yargs-18.0.0.tgz",
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
"dependencies": {
|
||||
"@clack/prompts": "^0.11.0",
|
||||
"@floating-ui/dom": "^1.7.4",
|
||||
"@huggingface/transformers": "^3.7.5",
|
||||
"@myriaddreamin/rehype-typst": "^0.6.0",
|
||||
"@napi-rs/simple-git": "0.1.22",
|
||||
"@tweenjs/tween.js": "^25.0.0",
|
||||
@@ -61,6 +62,7 @@
|
||||
"mdast-util-to-string": "^4.0.0",
|
||||
"micromorph": "^0.4.5",
|
||||
"minimatch": "^10.0.3",
|
||||
"onnxruntime-web": "^1.23.0",
|
||||
"pixi.js": "^8.13.2",
|
||||
"preact": "^10.27.2",
|
||||
"preact-render-to-string": "^6.6.1",
|
||||
|
||||
@@ -1,6 +1,18 @@
|
||||
import { QuartzConfig } from "./quartz/cfg"
|
||||
import { GlobalConfiguration, QuartzConfig } from "./quartz/cfg"
|
||||
import * as Plugin from "./quartz/plugins"
|
||||
|
||||
const semanticSearch: GlobalConfiguration["semanticSearch"] = {
|
||||
enable: true,
|
||||
model: "onnx-community/embeddinggemma-300m-ONNX",
|
||||
aot: true,
|
||||
dims: 768,
|
||||
dtype: "fp32",
|
||||
shardSizeRows: 1024,
|
||||
hnsw: { M: 16, efConstruction: 200 },
|
||||
chunking: { chunkSize: 256, chunkOverlap: 64 },
|
||||
vllm: { enable: true, concurrency: 16, batchSize: 128 },
|
||||
}
|
||||
|
||||
/**
|
||||
* Quartz 4 Configuration
|
||||
*
|
||||
@@ -52,6 +64,7 @@ const config: QuartzConfig = {
|
||||
},
|
||||
},
|
||||
},
|
||||
semanticSearch,
|
||||
},
|
||||
plugins: {
|
||||
transformers: [
|
||||
@@ -84,6 +97,7 @@ const config: QuartzConfig = {
|
||||
enableSiteMap: true,
|
||||
enableRSS: true,
|
||||
}),
|
||||
Plugin.SemanticIndex(semanticSearch),
|
||||
Plugin.Assets(),
|
||||
Plugin.Static(),
|
||||
Plugin.Favicon(),
|
||||
|
||||
@@ -78,6 +78,34 @@ export interface GlobalConfiguration {
|
||||
* Region Codes: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
|
||||
*/
|
||||
locale: ValidLocale
|
||||
/** Semantic search configuration */
|
||||
semanticSearch?: {
|
||||
enable: boolean
|
||||
model: string
|
||||
aot: boolean
|
||||
dtype: "fp32" | "fp16"
|
||||
dims: number
|
||||
shardSizeRows: number
|
||||
manifestUrl?: string
|
||||
manifestBaseUrl?: string
|
||||
disableCache?: boolean
|
||||
hnsw: {
|
||||
M: number
|
||||
efConstruction: number
|
||||
efSearch?: number
|
||||
}
|
||||
chunking: {
|
||||
chunkSize: number
|
||||
chunkOverlap: number
|
||||
noChunking?: boolean
|
||||
}
|
||||
vllm?: {
|
||||
enable: boolean
|
||||
vllmUrl?: string
|
||||
concurrency: number
|
||||
batchSize: number
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export interface QuartzConfig {
|
||||
|
||||
@@ -7,10 +7,12 @@ import { i18n } from "../i18n"
|
||||
|
||||
export interface SearchOptions {
|
||||
enablePreview: boolean
|
||||
includeButton: boolean
|
||||
}
|
||||
|
||||
const defaultOptions: SearchOptions = {
|
||||
enablePreview: true,
|
||||
includeButton: true,
|
||||
}
|
||||
|
||||
export default ((userOpts?: Partial<SearchOptions>) => {
|
||||
@@ -29,19 +31,54 @@ export default ((userOpts?: Partial<SearchOptions>) => {
|
||||
</svg>
|
||||
<p>{i18n(cfg.locale).components.search.title}</p>
|
||||
</button>
|
||||
<div class="search-container">
|
||||
<div class="search-space">
|
||||
<input
|
||||
autocomplete="off"
|
||||
class="search-bar"
|
||||
name="search"
|
||||
type="text"
|
||||
aria-label={searchPlaceholder}
|
||||
placeholder={searchPlaceholder}
|
||||
/>
|
||||
<div class="search-layout" data-preview={opts.enablePreview}></div>
|
||||
</div>
|
||||
</div>
|
||||
<search class="search-container">
|
||||
<form class="search-space">
|
||||
<div class="input-container">
|
||||
<input
|
||||
autocomplete="off"
|
||||
class="search-bar"
|
||||
name="search"
|
||||
type="text"
|
||||
aria-label={searchPlaceholder}
|
||||
placeholder={searchPlaceholder}
|
||||
/>
|
||||
<div class="search-mode-toggle" role="radiogroup" aria-label="Search mode">
|
||||
<button
|
||||
type="button"
|
||||
class="mode-option"
|
||||
data-mode="lexical"
|
||||
aria-pressed="true"
|
||||
aria-label="Full-text search"
|
||||
>
|
||||
<svg viewBox="0 0 20 20" role="img" aria-hidden="true">
|
||||
<g fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round">
|
||||
<path d="M4 6h12M4 10h8M4 14h6" />
|
||||
</g>
|
||||
</svg>
|
||||
<span class="sr-only">Full-text</span>
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
class="mode-option"
|
||||
data-mode="semantic"
|
||||
aria-pressed="false"
|
||||
aria-label="Semantic search"
|
||||
>
|
||||
<svg viewBox="0 0 20 20" role="img" aria-hidden="true">
|
||||
<g fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round">
|
||||
<circle cx="5.2" cy="10" r="2.4" />
|
||||
<circle cx="14.8" cy="4.8" r="2.1" />
|
||||
<circle cx="14.8" cy="15.2" r="2.1" />
|
||||
<path d="M7.1 8.7l5.2-2.4M7.1 11.3l5.2 2.4M14.8 6.9v6.2" />
|
||||
</g>
|
||||
</svg>
|
||||
<span class="sr-only">Semantic</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<output class="search-layout" data-preview={opts.enablePreview} />
|
||||
</form>
|
||||
</search>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ const headerRegex = new RegExp(/h[1-6]/)
|
||||
export function pageResources(
|
||||
baseDir: FullSlug | RelativeURL,
|
||||
staticResources: StaticResources,
|
||||
cfg?: GlobalConfiguration,
|
||||
): StaticResources {
|
||||
const contentIndexPath = joinSegments(baseDir, "static/contentIndex.json")
|
||||
const contentIndexScript = `const fetchData = fetch("${contentIndexPath}").then(data => data.json())`
|
||||
@@ -48,6 +49,12 @@ export function pageResources(
|
||||
spaPreserve: true,
|
||||
script: contentIndexScript,
|
||||
},
|
||||
{
|
||||
loadTime: "beforeDOMReady",
|
||||
contentType: "inline",
|
||||
spaPreserve: true,
|
||||
script: `const semanticCfg = ${JSON.stringify(cfg?.semanticSearch ?? {})};`,
|
||||
},
|
||||
...staticResources.js,
|
||||
],
|
||||
additionalHead: staticResources.additionalHead,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import FlexSearch, { DefaultDocumentSearchResults } from "flexsearch"
|
||||
import FlexSearch, { DefaultDocumentSearchResults, Id } from "flexsearch"
|
||||
import { ContentDetails } from "../../plugins/emitters/contentIndex"
|
||||
import { registerEscapeHandler, removeAllChildren } from "./util"
|
||||
import { SemanticClient, type SemanticResult } from "./semantic.inline"
|
||||
import { registerEscapeHandler, removeAllChildren, fetchCanonical } from "./util"
|
||||
import { FullSlug, normalizeRelativeURLs, resolveRelative } from "../../util/path"
|
||||
|
||||
interface Item {
|
||||
@@ -14,43 +15,46 @@ interface Item {
|
||||
|
||||
// Can be expanded with things like "term" in the future
|
||||
type SearchType = "basic" | "tags"
|
||||
let searchType: SearchType = "basic"
|
||||
let currentSearchTerm: string = ""
|
||||
const encoder = (str: string) => {
|
||||
return str
|
||||
.toLowerCase()
|
||||
.split(/\s+/)
|
||||
.filter((token) => token.length > 0)
|
||||
type SearchMode = "lexical" | "semantic"
|
||||
const SEARCH_MODE_STORAGE_KEY = "quartz:search:mode"
|
||||
|
||||
const loadStoredSearchMode = (): SearchMode | null => {
|
||||
if (typeof window === "undefined") {
|
||||
return null
|
||||
}
|
||||
|
||||
try {
|
||||
const stored = window.localStorage.getItem(SEARCH_MODE_STORAGE_KEY)
|
||||
return stored === "lexical" || stored === "semantic" ? stored : null
|
||||
} catch (err) {
|
||||
console.warn("[Search] failed to read stored search mode:", err)
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
let index = new FlexSearch.Document<Item>({
|
||||
encode: encoder,
|
||||
document: {
|
||||
id: "id",
|
||||
tag: "tags",
|
||||
index: [
|
||||
{
|
||||
field: "title",
|
||||
tokenize: "forward",
|
||||
},
|
||||
{
|
||||
field: "content",
|
||||
tokenize: "forward",
|
||||
},
|
||||
{
|
||||
field: "tags",
|
||||
tokenize: "forward",
|
||||
},
|
||||
],
|
||||
},
|
||||
})
|
||||
const persistSearchMode = (mode: SearchMode) => {
|
||||
if (typeof window === "undefined") {
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
window.localStorage.setItem(SEARCH_MODE_STORAGE_KEY, mode)
|
||||
} catch (err) {
|
||||
console.warn("[Search] failed to persist search mode:", err)
|
||||
}
|
||||
}
|
||||
|
||||
let searchMode: SearchMode = "lexical"
|
||||
let currentSearchTerm: string = ""
|
||||
let rawSearchTerm: string = ""
|
||||
let semantic: SemanticClient | null = null
|
||||
let semanticReady = false
|
||||
let semanticInitFailed = false
|
||||
type SimilarityResult = { item: Item; similarity: number }
|
||||
let chunkMetadata: Record<string, { parentSlug: string; chunkId: number }> = {}
|
||||
let manifestIds: string[] = []
|
||||
|
||||
const p = new DOMParser()
|
||||
const fetchContentCache: Map<FullSlug, Element[]> = new Map()
|
||||
const contextWindowWords = 30
|
||||
const numSearchResults = 8
|
||||
const numTagResults = 5
|
||||
|
||||
const tokenizeTerm = (term: string) => {
|
||||
const tokens = term.split(/\s+/).filter((t) => t.trim() !== "")
|
||||
const tokenLen = tokens.length
|
||||
@@ -108,6 +112,102 @@ function highlight(searchTerm: string, text: string, trim?: boolean) {
|
||||
}`
|
||||
}
|
||||
|
||||
// To be used with search and everything else with flexsearch
|
||||
const encoder = (str: string) =>
|
||||
str
|
||||
.toLowerCase()
|
||||
.split(/\s+/)
|
||||
.filter((token) => token.length > 0)
|
||||
|
||||
/**
|
||||
* Get parent document slug for a chunk ID
|
||||
*/
|
||||
function getParentSlug(slug: string): string {
|
||||
const meta = chunkMetadata[slug]
|
||||
return meta ? meta.parentSlug : slug
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate semantic search results from chunks to documents using RRF
|
||||
* @param results Raw semantic results (chunk-level)
|
||||
* @param slugToDocIndex Map from document slug to index in idDataMap
|
||||
* @returns Object with rrfScores (for ranking) and maxScores (for display)
|
||||
*/
|
||||
function aggregateChunkResults(
|
||||
results: SemanticResult[],
|
||||
slugToDocIndex: Map<FullSlug, number>,
|
||||
): { rrfScores: Map<number, number>; maxScores: Map<number, number> } {
|
||||
// Group chunks by parent document
|
||||
const docChunks = new Map<string, Array<{ score: number }>>()
|
||||
|
||||
results.forEach(({ id, score }) => {
|
||||
// id is an index into manifestIds (the chunk IDs from embeddings)
|
||||
const chunkSlug = manifestIds[id]
|
||||
if (!chunkSlug) return
|
||||
|
||||
// Get parent document slug
|
||||
const parentSlug = getParentSlug(chunkSlug)
|
||||
|
||||
if (!docChunks.has(parentSlug)) {
|
||||
docChunks.set(parentSlug, [])
|
||||
}
|
||||
|
||||
docChunks.get(parentSlug)!.push({ score })
|
||||
})
|
||||
|
||||
// Apply RRF for ranking and track max similarity for display
|
||||
const rrfScores = new Map<number, number>()
|
||||
const maxScores = new Map<number, number>()
|
||||
const RRF_K = 60
|
||||
|
||||
for (const [parentSlug, chunks] of docChunks) {
|
||||
const docIdx = slugToDocIndex.get(parentSlug as FullSlug)
|
||||
if (typeof docIdx !== "number") continue
|
||||
|
||||
// Sort chunks by score descending to assign per-document ranks
|
||||
chunks.sort((a, b) => b.score - a.score)
|
||||
|
||||
// RRF formula: sum(1 / (k + rank)) across all chunks, using per-document ranks
|
||||
const rrfScore = chunks.reduce((sum, _, rank) => sum + 1.0 / (RRF_K + rank), 0)
|
||||
|
||||
// Max similarity score for display (original 0-1 range)
|
||||
const maxScore = chunks[0].score
|
||||
|
||||
rrfScores.set(docIdx, rrfScore)
|
||||
maxScores.set(docIdx, maxScore)
|
||||
}
|
||||
|
||||
return { rrfScores, maxScores }
|
||||
}
|
||||
|
||||
// Initialize the FlexSearch Document instance with the appropriate configuration
|
||||
const index = new FlexSearch.Document<Item>({
|
||||
tokenize: "forward",
|
||||
encode: encoder,
|
||||
document: {
|
||||
id: "id",
|
||||
tag: "tags",
|
||||
index: [
|
||||
{
|
||||
field: "title",
|
||||
tokenize: "forward",
|
||||
},
|
||||
{
|
||||
field: "content",
|
||||
tokenize: "forward",
|
||||
},
|
||||
{
|
||||
field: "tags",
|
||||
tokenize: "forward",
|
||||
},
|
||||
],
|
||||
},
|
||||
})
|
||||
|
||||
const p = new DOMParser()
|
||||
const fetchContentCache: Map<FullSlug, Element[]> = new Map()
|
||||
const numSearchResults = 10
|
||||
const numTagResults = 10
|
||||
function highlightHTML(searchTerm: string, el: HTMLElement) {
|
||||
const p = new DOMParser()
|
||||
const tokenizedTerms = tokenizeTerm(searchTerm)
|
||||
@@ -149,7 +249,11 @@ function highlightHTML(searchTerm: string, el: HTMLElement) {
|
||||
return html.body
|
||||
}
|
||||
|
||||
async function setupSearch(searchElement: Element, currentSlug: FullSlug, data: ContentIndex) {
|
||||
async function setupSearch(
|
||||
searchElement: HTMLDivElement,
|
||||
currentSlug: FullSlug,
|
||||
data: ContentIndex,
|
||||
) {
|
||||
const container = searchElement.querySelector(".search-container") as HTMLElement
|
||||
if (!container) return
|
||||
|
||||
@@ -164,12 +268,183 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
|
||||
const searchLayout = searchElement.querySelector(".search-layout") as HTMLElement
|
||||
if (!searchLayout) return
|
||||
|
||||
const searchSpace = searchElement?.querySelector(".search-space") as HTMLFormElement
|
||||
if (!searchSpace) return
|
||||
|
||||
// Create semantic search progress bar
|
||||
const progressBar = document.createElement("div")
|
||||
progressBar.className = "semantic-search-progress"
|
||||
progressBar.style.cssText = `
|
||||
position: absolute;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
height: 2px;
|
||||
width: 0;
|
||||
background: var(--secondary);
|
||||
transition: width 0.3s ease, opacity 0.3s ease;
|
||||
opacity: 0;
|
||||
z-index: 9999;
|
||||
`
|
||||
searchBar.parentElement?.appendChild(progressBar)
|
||||
|
||||
const startSemanticProgress = () => {
|
||||
progressBar.style.opacity = "1"
|
||||
progressBar.style.width = "0"
|
||||
setTimeout(() => {
|
||||
progressBar.style.width = "100%"
|
||||
}, 10)
|
||||
}
|
||||
|
||||
const completeSemanticProgress = () => {
|
||||
progressBar.style.opacity = "0"
|
||||
setTimeout(() => {
|
||||
progressBar.style.width = "0"
|
||||
}, 300)
|
||||
}
|
||||
|
||||
const resetProgressBar = () => {
|
||||
progressBar.style.opacity = "0"
|
||||
progressBar.style.width = "0"
|
||||
}
|
||||
|
||||
const idDataMap = Object.keys(data) as FullSlug[]
|
||||
const slugToIndex = new Map<FullSlug, number>()
|
||||
idDataMap.forEach((slug, idx) => slugToIndex.set(slug, idx))
|
||||
const modeToggle = searchSpace.querySelector(".search-mode-toggle") as HTMLDivElement | null
|
||||
const modeButtons = modeToggle
|
||||
? Array.from(modeToggle.querySelectorAll<HTMLButtonElement>(".mode-option"))
|
||||
: []
|
||||
|
||||
const appendLayout = (el: HTMLElement) => {
|
||||
searchLayout.appendChild(el)
|
||||
}
|
||||
|
||||
const enablePreview = searchLayout.dataset.preview === "true"
|
||||
if (!semantic && !semanticInitFailed) {
|
||||
const client = new SemanticClient(semanticCfg)
|
||||
try {
|
||||
await client.ensureReady()
|
||||
semantic = client
|
||||
semanticReady = true
|
||||
|
||||
// Load chunk metadata and IDs from manifest
|
||||
try {
|
||||
const manifestUrl = "/embeddings/manifest.json"
|
||||
const res = await fetch(manifestUrl)
|
||||
if (res.ok) {
|
||||
const manifest = await res.json()
|
||||
chunkMetadata = manifest.chunkMetadata || {}
|
||||
manifestIds = manifest.ids || []
|
||||
console.debug(
|
||||
`[Search] Loaded manifest: ${manifestIds.length} chunks, ${Object.keys(chunkMetadata).length} chunked documents`,
|
||||
)
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn("[Search] failed to load chunk metadata:", err)
|
||||
chunkMetadata = {}
|
||||
manifestIds = []
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn("[SemanticClient] initialization failed:", err)
|
||||
client.dispose()
|
||||
semantic = null
|
||||
semanticReady = false
|
||||
semanticInitFailed = true
|
||||
}
|
||||
} else if (semantic && !semanticReady) {
|
||||
try {
|
||||
await semantic.ensureReady()
|
||||
semanticReady = true
|
||||
} catch (err) {
|
||||
console.warn("[SemanticClient] became unavailable:", err)
|
||||
semantic.dispose()
|
||||
semantic = null
|
||||
semanticReady = false
|
||||
semanticInitFailed = true
|
||||
}
|
||||
}
|
||||
const storedMode = loadStoredSearchMode()
|
||||
if (storedMode === "semantic") {
|
||||
if (semanticReady) {
|
||||
searchMode = storedMode
|
||||
}
|
||||
} else if (storedMode === "lexical") {
|
||||
searchMode = storedMode
|
||||
}
|
||||
if (!semanticReady && searchMode === "semantic") {
|
||||
searchMode = "lexical"
|
||||
}
|
||||
let searchSeq = 0
|
||||
let runSearchTimer: number | null = null
|
||||
let lastInputAt = 0
|
||||
searchLayout.dataset.mode = searchMode
|
||||
|
||||
const updateModeUI = (mode: SearchMode) => {
|
||||
modeButtons.forEach((button) => {
|
||||
const btnMode = (button.dataset.mode as SearchMode) ?? "lexical"
|
||||
const isActive = btnMode === mode
|
||||
button.classList.toggle("active", isActive)
|
||||
button.setAttribute("aria-pressed", String(isActive))
|
||||
})
|
||||
if (modeToggle) {
|
||||
modeToggle.dataset.mode = mode
|
||||
}
|
||||
searchLayout.dataset.mode = mode
|
||||
}
|
||||
|
||||
const computeDebounceDelay = (term: string): number => {
|
||||
const trimmed = term.trim()
|
||||
const lastTerm = currentSearchTerm
|
||||
const isExtension =
|
||||
lastTerm.length > 0 && trimmed.length > lastTerm.length && trimmed.startsWith(lastTerm)
|
||||
const isRetraction = lastTerm.length > trimmed.length
|
||||
const isReplacement =
|
||||
lastTerm.length > 0 && !trimmed.startsWith(lastTerm) && !lastTerm.startsWith(trimmed)
|
||||
const baseFullQueryDelay = 200
|
||||
const semanticPenalty = searchMode === "semantic" ? 60 : 0
|
||||
|
||||
if (isExtension && trimmed.length > 2) {
|
||||
return baseFullQueryDelay + semanticPenalty
|
||||
}
|
||||
|
||||
if (isReplacement && trimmed.length > 3) {
|
||||
return Math.max(90, baseFullQueryDelay - 80)
|
||||
}
|
||||
|
||||
if (isRetraction) {
|
||||
return 90
|
||||
}
|
||||
|
||||
return baseFullQueryDelay + (searchMode === "semantic" ? 40 : 0)
|
||||
}
|
||||
|
||||
const triggerSearchWithMode = (mode: SearchMode) => {
|
||||
if (mode === "semantic" && !semanticReady) {
|
||||
return
|
||||
}
|
||||
if (searchMode === mode) return
|
||||
searchMode = mode
|
||||
updateModeUI(mode)
|
||||
persistSearchMode(searchMode)
|
||||
if (rawSearchTerm.trim() !== "") {
|
||||
searchLayout.classList.add("display-results")
|
||||
const token = ++searchSeq
|
||||
void runSearch(rawSearchTerm, token)
|
||||
}
|
||||
}
|
||||
|
||||
updateModeUI(searchMode)
|
||||
|
||||
modeButtons.forEach((button) => {
|
||||
const btnMode = (button.dataset.mode as SearchMode) ?? "lexical"
|
||||
if (btnMode === "semantic") {
|
||||
button.disabled = !semanticReady
|
||||
button.setAttribute("aria-disabled", String(!semanticReady))
|
||||
}
|
||||
const handler = () => triggerSearchWithMode(btnMode)
|
||||
button.addEventListener("click", handler)
|
||||
window.addCleanup(() => button.removeEventListener("click", handler))
|
||||
})
|
||||
let preview: HTMLDivElement | undefined = undefined
|
||||
let previewInner: HTMLDivElement | undefined = undefined
|
||||
const results = document.createElement("div")
|
||||
@@ -191,20 +466,23 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
|
||||
removeAllChildren(preview)
|
||||
}
|
||||
searchLayout.classList.remove("display-results")
|
||||
searchType = "basic" // reset search type after closing
|
||||
searchButton.focus()
|
||||
resetProgressBar()
|
||||
}
|
||||
|
||||
function showSearch(searchTypeNew: SearchType) {
|
||||
searchType = searchTypeNew
|
||||
if (sidebar) sidebar.style.zIndex = "1"
|
||||
function showSearch(type: SearchType) {
|
||||
container.classList.add("active")
|
||||
if (type === "tags") {
|
||||
searchBar.value = "#"
|
||||
rawSearchTerm = "#"
|
||||
}
|
||||
searchBar.focus()
|
||||
}
|
||||
|
||||
let currentHover: HTMLInputElement | null = null
|
||||
|
||||
async function shortcutHandler(e: HTMLElementEventMap["keydown"]) {
|
||||
if (e.key === "k" && (e.ctrlKey || e.metaKey) && !e.shiftKey) {
|
||||
if ((e.key === "/" || e.key === "k") && (e.ctrlKey || e.metaKey) && !e.shiftKey) {
|
||||
e.preventDefault()
|
||||
const searchBarOpen = container.classList.contains("active")
|
||||
searchBarOpen ? hideSearch() : showSearch("basic")
|
||||
@@ -214,9 +492,6 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
|
||||
e.preventDefault()
|
||||
const searchBarOpen = container.classList.contains("active")
|
||||
searchBarOpen ? hideSearch() : showSearch("tags")
|
||||
|
||||
// add "#" prefix for tag search
|
||||
searchBar.value = "#"
|
||||
return
|
||||
}
|
||||
|
||||
@@ -226,20 +501,29 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
|
||||
|
||||
// If search is active, then we will render the first result and display accordingly
|
||||
if (!container.classList.contains("active")) return
|
||||
if (e.key === "Enter" && !e.isComposing) {
|
||||
if (e.key === "Enter") {
|
||||
// If result has focus, navigate to that one, otherwise pick first result
|
||||
let anchor: HTMLAnchorElement | undefined
|
||||
if (results.contains(document.activeElement)) {
|
||||
const active = document.activeElement as HTMLInputElement
|
||||
if (active.classList.contains("no-match")) return
|
||||
await displayPreview(active)
|
||||
active.click()
|
||||
anchor = document.activeElement as HTMLAnchorElement
|
||||
if (anchor.classList.contains("no-match")) return
|
||||
await displayPreview(anchor)
|
||||
e.preventDefault()
|
||||
anchor.click()
|
||||
} else {
|
||||
const anchor = document.getElementsByClassName("result-card")[0] as HTMLInputElement | null
|
||||
anchor = document.getElementsByClassName("result-card")[0] as HTMLAnchorElement
|
||||
if (!anchor || anchor.classList.contains("no-match")) return
|
||||
await displayPreview(anchor)
|
||||
e.preventDefault()
|
||||
anchor.click()
|
||||
}
|
||||
} else if (e.key === "ArrowUp" || (e.shiftKey && e.key === "Tab")) {
|
||||
if (anchor !== undefined)
|
||||
window.spaNavigate(new URL(new URL(anchor.href).pathname, window.location.toString()))
|
||||
} else if (
|
||||
e.key === "ArrowUp" ||
|
||||
(e.shiftKey && e.key === "Tab") ||
|
||||
(e.ctrlKey && e.key === "p")
|
||||
) {
|
||||
e.preventDefault()
|
||||
if (results.contains(document.activeElement)) {
|
||||
// If an element in results-container already has focus, focus previous one
|
||||
@@ -252,7 +536,7 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
|
||||
if (prevResult) currentHover = prevResult
|
||||
await displayPreview(prevResult)
|
||||
}
|
||||
} else if (e.key === "ArrowDown" || e.key === "Tab") {
|
||||
} else if (e.key === "ArrowDown" || e.key === "Tab" || (e.ctrlKey && e.key === "n")) {
|
||||
e.preventDefault()
|
||||
// The results should already been focused, so we need to find the next one.
|
||||
// The activeElement is the search bar, so we need to find the first result and focus it.
|
||||
@@ -269,25 +553,33 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
|
||||
}
|
||||
}
|
||||
|
||||
const formatForDisplay = (term: string, id: number) => {
|
||||
const formatForDisplay = (term: string, id: number, renderType: SearchType) => {
|
||||
const slug = idDataMap[id]
|
||||
|
||||
// Check if query contains title words (for boosting exact matches)
|
||||
const queryTokens = tokenizeTerm(term)
|
||||
const titleTokens = tokenizeTerm(data[slug].title ?? "")
|
||||
const titleMatch = titleTokens.some((t) => queryTokens.includes(t))
|
||||
|
||||
return {
|
||||
id,
|
||||
slug,
|
||||
title: searchType === "tags" ? data[slug].title : highlight(term, data[slug].title ?? ""),
|
||||
title: renderType === "tags" ? data[slug].title : highlight(term, data[slug].title ?? ""),
|
||||
content: highlight(term, data[slug].content ?? "", true),
|
||||
tags: highlightTags(term.substring(1), data[slug].tags),
|
||||
tags: highlightTags(term, data[slug].tags, renderType),
|
||||
titleMatch, // Add title match flag for boosting
|
||||
}
|
||||
}
|
||||
|
||||
function highlightTags(term: string, tags: string[]) {
|
||||
if (!tags || searchType !== "tags") {
|
||||
function highlightTags(term: string, tags: string[], renderType: SearchType) {
|
||||
if (!tags || renderType !== "tags") {
|
||||
return []
|
||||
}
|
||||
|
||||
const tagTerm = term.toLowerCase()
|
||||
return tags
|
||||
.map((tag) => {
|
||||
if (tag.toLowerCase().includes(term.toLowerCase())) {
|
||||
if (tag.toLowerCase().includes(tagTerm)) {
|
||||
return `<li><p class="match-tag">#${tag}</p></li>`
|
||||
} else {
|
||||
return `<li><p>#${tag}</p></li>`
|
||||
@@ -300,24 +592,40 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
|
||||
return new URL(resolveRelative(currentSlug, slug), location.toString())
|
||||
}
|
||||
|
||||
const resultToHTML = ({ slug, title, content, tags }: Item) => {
|
||||
const resultToHTML = ({ item, percent }: { item: Item; percent: number | null }) => {
|
||||
const { slug, title, content, tags, target } = item
|
||||
const htmlTags = tags.length > 0 ? `<ul class="tags">${tags.join("")}</ul>` : ``
|
||||
const itemTile = document.createElement("a")
|
||||
const titleContent = target ? highlight(currentSearchTerm, target) : title
|
||||
const subscript = target ? `<b>${slug}</b>` : ``
|
||||
let percentLabel = "—"
|
||||
let percentAttr = ""
|
||||
if (percent !== null && Number.isFinite(percent)) {
|
||||
const bounded = Math.max(0, Math.min(100, percent))
|
||||
percentLabel = `${bounded.toFixed(1)}%`
|
||||
percentAttr = bounded.toFixed(3)
|
||||
}
|
||||
itemTile.classList.add("result-card")
|
||||
itemTile.id = slug
|
||||
itemTile.href = resolveUrl(slug).toString()
|
||||
itemTile.innerHTML = `
|
||||
<h3 class="card-title">${title}</h3>
|
||||
${htmlTags}
|
||||
<p class="card-description">${content}</p>
|
||||
`
|
||||
itemTile.addEventListener("click", (event) => {
|
||||
if (event.altKey || event.ctrlKey || event.metaKey || event.shiftKey) return
|
||||
hideSearch()
|
||||
})
|
||||
itemTile.innerHTML = `<hgroup>
|
||||
<h3>${titleContent}</h3>
|
||||
${subscript}${htmlTags}
|
||||
${searchMode === "semantic" ? `<span class="result-likelihood" title="match likelihood"> ${percentLabel}</span>` : ""}
|
||||
${enablePreview && window.innerWidth > 600 ? "" : `<p>${content}</p>`}
|
||||
</hgroup>`
|
||||
if (percentAttr) itemTile.dataset.scorePercent = percentAttr
|
||||
else delete itemTile.dataset.scorePercent
|
||||
|
||||
const handler = (event: MouseEvent) => {
|
||||
if (event.altKey || event.ctrlKey || event.metaKey || event.shiftKey) return
|
||||
const handler = (evt: MouseEvent) => {
|
||||
if (evt.altKey || evt.ctrlKey || evt.metaKey || evt.shiftKey) return
|
||||
const anchor = evt.currentTarget as HTMLAnchorElement | null
|
||||
if (!anchor) return
|
||||
evt.preventDefault()
|
||||
const href = anchor.getAttribute("href")
|
||||
if (!href) return
|
||||
const url = new URL(href, window.location.toString())
|
||||
window.spaNavigate(url)
|
||||
hideSearch()
|
||||
}
|
||||
|
||||
@@ -335,15 +643,22 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
|
||||
return itemTile
|
||||
}
|
||||
|
||||
async function displayResults(finalResults: Item[]) {
|
||||
async function displayResults(finalResults: SimilarityResult[]) {
|
||||
removeAllChildren(results)
|
||||
if (finalResults.length === 0) {
|
||||
results.innerHTML = `<a class="result-card no-match">
|
||||
<h3>No results.</h3>
|
||||
<p>Try another search term?</p>
|
||||
</a>`
|
||||
currentHover = null
|
||||
} else {
|
||||
results.append(...finalResults.map(resultToHTML))
|
||||
const decorated = finalResults.map(({ item, similarity }) => {
|
||||
if (!Number.isFinite(similarity)) return { item, percent: null }
|
||||
const bounded = Math.max(-1, Math.min(1, similarity))
|
||||
const percent = ((bounded + 1) / 2) * 100
|
||||
return { item, percent }
|
||||
})
|
||||
results.append(...decorated.map(resultToHTML))
|
||||
}
|
||||
|
||||
if (finalResults.length === 0 && preview) {
|
||||
@@ -363,8 +678,8 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
|
||||
return fetchContentCache.get(slug) as Element[]
|
||||
}
|
||||
|
||||
const targetUrl = resolveUrl(slug).toString()
|
||||
const contents = await fetch(targetUrl)
|
||||
const targetUrl = resolveUrl(slug)
|
||||
const contents = await fetchCanonical(targetUrl)
|
||||
.then((res) => res.text())
|
||||
.then((contents) => {
|
||||
if (contents === undefined) {
|
||||
@@ -394,73 +709,296 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
|
||||
const highlights = [...preview.getElementsByClassName("highlight")].sort(
|
||||
(a, b) => b.innerHTML.length - a.innerHTML.length,
|
||||
)
|
||||
highlights[0]?.scrollIntoView({ block: "start" })
|
||||
if (highlights.length > 0) {
|
||||
const highlight = highlights[0]
|
||||
const container = preview
|
||||
if (container && highlight) {
|
||||
// Get the relative positions
|
||||
const containerRect = container.getBoundingClientRect()
|
||||
const highlightRect = highlight.getBoundingClientRect()
|
||||
// Calculate the scroll position relative to the container
|
||||
const relativeTop = highlightRect.top - containerRect.top + container.scrollTop - 20 // 20px buffer
|
||||
// Smoothly scroll the container
|
||||
container.scrollTo({
|
||||
top: relativeTop,
|
||||
behavior: "smooth",
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function onType(e: HTMLElementEventMap["input"]) {
|
||||
async function runSearch(rawTerm: string, token: number) {
|
||||
if (!searchLayout || !index) return
|
||||
currentSearchTerm = (e.target as HTMLInputElement).value
|
||||
searchLayout.classList.toggle("display-results", currentSearchTerm !== "")
|
||||
searchType = currentSearchTerm.startsWith("#") ? "tags" : "basic"
|
||||
const trimmed = rawTerm.trim()
|
||||
if (trimmed === "") {
|
||||
removeAllChildren(results)
|
||||
if (preview) {
|
||||
removeAllChildren(preview)
|
||||
}
|
||||
currentHover = null
|
||||
searchLayout.classList.remove("display-results")
|
||||
resetProgressBar()
|
||||
return
|
||||
}
|
||||
|
||||
let searchResults: DefaultDocumentSearchResults<Item>
|
||||
if (searchType === "tags") {
|
||||
currentSearchTerm = currentSearchTerm.substring(1).trim()
|
||||
const separatorIndex = currentSearchTerm.indexOf(" ")
|
||||
if (separatorIndex != -1) {
|
||||
// search by title and content index and then filter by tag (implemented in flexsearch)
|
||||
const tag = currentSearchTerm.substring(0, separatorIndex)
|
||||
const query = currentSearchTerm.substring(separatorIndex + 1).trim()
|
||||
searchResults = await index.searchAsync({
|
||||
query: query,
|
||||
// return at least 10000 documents, so it is enough to filter them by tag (implemented in flexsearch)
|
||||
const modeForRanking: SearchMode = searchMode
|
||||
const initialType: SearchType = trimmed.startsWith("#") ? "tags" : "basic"
|
||||
let workingType: SearchType = initialType
|
||||
let highlightTerm = trimmed
|
||||
let tagTerm = ""
|
||||
let searchResults: DefaultDocumentSearchResults<Item> = []
|
||||
|
||||
if (initialType === "tags") {
|
||||
tagTerm = trimmed.substring(1).trim()
|
||||
const separatorIndex = tagTerm.indexOf(" ")
|
||||
if (separatorIndex !== -1) {
|
||||
const tag = tagTerm.substring(0, separatorIndex).trim()
|
||||
const query = tagTerm.substring(separatorIndex + 1).trim()
|
||||
const results = await index.searchAsync({
|
||||
query,
|
||||
limit: Math.max(numSearchResults, 10000),
|
||||
index: ["title", "content"],
|
||||
tag: { tags: tag },
|
||||
})
|
||||
for (let searchResult of searchResults) {
|
||||
searchResult.result = searchResult.result.slice(0, numSearchResults)
|
||||
}
|
||||
// set search type to basic and remove tag from term for proper highlightning and scroll
|
||||
searchType = "basic"
|
||||
currentSearchTerm = query
|
||||
if (token !== searchSeq) return
|
||||
searchResults = Object.values(results)
|
||||
workingType = "basic"
|
||||
highlightTerm = query
|
||||
} else {
|
||||
// default search by tags index
|
||||
searchResults = await index.searchAsync({
|
||||
query: currentSearchTerm,
|
||||
const results = await index.searchAsync({
|
||||
query: tagTerm,
|
||||
limit: numSearchResults,
|
||||
index: ["tags"],
|
||||
})
|
||||
if (token !== searchSeq) return
|
||||
searchResults = Object.values(results)
|
||||
highlightTerm = tagTerm
|
||||
}
|
||||
} else if (searchType === "basic") {
|
||||
searchResults = await index.searchAsync({
|
||||
query: currentSearchTerm,
|
||||
} else {
|
||||
const results = await index.searchAsync({
|
||||
query: highlightTerm,
|
||||
limit: numSearchResults,
|
||||
index: ["title", "content"],
|
||||
})
|
||||
if (token !== searchSeq) return
|
||||
searchResults = Object.values(results)
|
||||
}
|
||||
|
||||
const coerceIds = (hit?: DefaultDocumentSearchResults<Item>[number]): number[] => {
|
||||
if (!hit) return []
|
||||
return hit.result
|
||||
.map((value: Id) => {
|
||||
if (typeof value === "number") {
|
||||
return value
|
||||
}
|
||||
const parsed = Number.parseInt(String(value), 10)
|
||||
return Number.isNaN(parsed) ? null : parsed
|
||||
})
|
||||
.filter((value): value is number => value !== null)
|
||||
}
|
||||
|
||||
const getByField = (field: string): number[] => {
|
||||
const results = searchResults.filter((x) => x.field === field)
|
||||
return results.length === 0 ? [] : ([...results[0].result] as number[])
|
||||
const hit = searchResults.find((x) => x.field === field)
|
||||
return coerceIds(hit)
|
||||
}
|
||||
|
||||
// order titles ahead of content
|
||||
const allIds: Set<number> = new Set([
|
||||
...getByField("title"),
|
||||
...getByField("content"),
|
||||
...getByField("tags"),
|
||||
])
|
||||
const finalResults = [...allIds].map((id) => formatForDisplay(currentSearchTerm, id))
|
||||
await displayResults(finalResults)
|
||||
|
||||
currentSearchTerm = highlightTerm
|
||||
|
||||
const candidateItems = new Map<string, Item>()
|
||||
const ensureItem = (id: number): Item | null => {
|
||||
const slug = idDataMap[id]
|
||||
if (!slug) return null
|
||||
const cached = candidateItems.get(slug)
|
||||
if (cached) return cached
|
||||
const item = formatForDisplay(highlightTerm, id, workingType)
|
||||
if (item) {
|
||||
candidateItems.set(slug, item)
|
||||
return item
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
const baseIndices: number[] = []
|
||||
for (const id of allIds) {
|
||||
const item = ensureItem(id)
|
||||
if (!item) continue
|
||||
const idx = slugToIndex.get(item.slug)
|
||||
if (typeof idx === "number") {
|
||||
baseIndices.push(idx)
|
||||
}
|
||||
}
|
||||
|
||||
let semanticIds: number[] = []
|
||||
const semanticSimilarity = new Map<number, number>()
|
||||
|
||||
const integrateIds = (ids: number[]) => {
|
||||
ids.forEach((docId) => {
|
||||
ensureItem(docId)
|
||||
})
|
||||
}
|
||||
|
||||
const orchestrator = semanticReady && semantic ? semantic : null
|
||||
|
||||
const resolveSimilarity = (item: Item): number => {
|
||||
const semanticHit = semanticSimilarity.get(item.id)
|
||||
return semanticHit ?? Number.NaN
|
||||
}
|
||||
|
||||
const render = async () => {
|
||||
if (token !== searchSeq) return
|
||||
const useSemantic = semanticReady && semanticIds.length > 0
|
||||
const weights =
|
||||
modeForRanking === "semantic" && useSemantic
|
||||
? { base: 0.3, semantic: 1.0 }
|
||||
: { base: 1.0, semantic: useSemantic ? 0.3 : 0 }
|
||||
const rrf = new Map<string, number>()
|
||||
const push = (ids: number[], weight: number, applyTitleBoost: boolean = false) => {
|
||||
if (!ids.length || weight <= 0) return
|
||||
ids.forEach((docId, rank) => {
|
||||
const slug = idDataMap[docId]
|
||||
if (!slug) return
|
||||
const item = ensureItem(docId)
|
||||
if (!item) return
|
||||
|
||||
// Apply title boost for FlexSearch results (1.5x boost for exact title matches)
|
||||
let effectiveWeight = weight
|
||||
if (applyTitleBoost && item.titleMatch) {
|
||||
effectiveWeight *= 1.5
|
||||
}
|
||||
|
||||
const prev = rrf.get(slug) ?? 0
|
||||
rrf.set(slug, prev + effectiveWeight / (1 + rank))
|
||||
})
|
||||
}
|
||||
|
||||
push(baseIndices, weights.base, true) // FlexSearch with title boost
|
||||
push(semanticIds, weights.semantic, false) // Semantic without boost
|
||||
|
||||
const rankedEntries = Array.from(candidateItems.values())
|
||||
.map((item) => ({ item, score: rrf.get(item.slug) ?? 0 }))
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, numSearchResults)
|
||||
|
||||
const displayEntries: SimilarityResult[] = []
|
||||
for (const entry of rankedEntries) {
|
||||
const similarity = resolveSimilarity(entry.item)
|
||||
displayEntries.push({ item: entry.item, similarity })
|
||||
}
|
||||
|
||||
await displayResults(displayEntries)
|
||||
}
|
||||
|
||||
await render()
|
||||
|
||||
if (workingType === "tags" || !orchestrator || !semanticReady || highlightTerm.length < 2) {
|
||||
return
|
||||
}
|
||||
|
||||
const showProgress = modeForRanking === "semantic"
|
||||
if (showProgress) {
|
||||
startSemanticProgress()
|
||||
}
|
||||
|
||||
try {
|
||||
const { semantic: semRes } = await orchestrator.search(
|
||||
highlightTerm,
|
||||
numSearchResults * 3, // Request more chunks to ensure good document coverage
|
||||
)
|
||||
if (token !== searchSeq) {
|
||||
if (showProgress) completeSemanticProgress()
|
||||
return
|
||||
}
|
||||
|
||||
// Aggregate chunk results to document level using RRF
|
||||
const { rrfScores: semRrfScores, maxScores: semMaxScores } = aggregateChunkResults(
|
||||
semRes,
|
||||
slugToIndex,
|
||||
)
|
||||
|
||||
// Use RRF scores for ranking
|
||||
semanticIds = Array.from(semRrfScores.entries())
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, numSearchResults)
|
||||
.map(([docIdx]) => docIdx)
|
||||
|
||||
// Use max chunk similarity for display (0-1 range)
|
||||
semanticSimilarity.clear()
|
||||
semMaxScores.forEach((score, docIdx) => {
|
||||
semanticSimilarity.set(docIdx, score)
|
||||
})
|
||||
|
||||
integrateIds(semanticIds)
|
||||
if (showProgress) completeSemanticProgress()
|
||||
} catch (err) {
|
||||
console.warn("[SemanticClient] search failed:", err)
|
||||
if (showProgress) completeSemanticProgress()
|
||||
orchestrator.dispose()
|
||||
semantic = null
|
||||
semanticReady = false
|
||||
semanticInitFailed = true
|
||||
if (searchMode === "semantic") {
|
||||
searchMode = "lexical"
|
||||
updateModeUI(searchMode)
|
||||
}
|
||||
modeButtons.forEach((button) => {
|
||||
if ((button.dataset.mode as SearchMode) === "semantic") {
|
||||
button.disabled = true
|
||||
button.setAttribute("aria-disabled", "true")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
await render()
|
||||
}
|
||||
|
||||
function onType(e: HTMLElementEventMap["input"]) {
|
||||
if (!searchLayout || !index) return
|
||||
rawSearchTerm = (e.target as HTMLInputElement).value
|
||||
const hasQuery = rawSearchTerm.trim() !== ""
|
||||
searchLayout.classList.toggle("display-results", hasQuery)
|
||||
const term = rawSearchTerm
|
||||
const token = ++searchSeq
|
||||
if (runSearchTimer !== null) {
|
||||
window.clearTimeout(runSearchTimer)
|
||||
runSearchTimer = null
|
||||
}
|
||||
if (!hasQuery) {
|
||||
void runSearch("", token)
|
||||
return
|
||||
}
|
||||
const now = performance.now()
|
||||
lastInputAt = now
|
||||
const delay = computeDebounceDelay(term)
|
||||
const scheduledAt = lastInputAt
|
||||
runSearchTimer = window.setTimeout(() => {
|
||||
if (scheduledAt !== lastInputAt) {
|
||||
return
|
||||
}
|
||||
runSearchTimer = null
|
||||
void runSearch(term, token)
|
||||
}, delay)
|
||||
}
|
||||
|
||||
document.addEventListener("keydown", shortcutHandler)
|
||||
window.addCleanup(() => document.removeEventListener("keydown", shortcutHandler))
|
||||
searchButton.addEventListener("click", () => showSearch("basic"))
|
||||
window.addCleanup(() => searchButton.removeEventListener("click", () => showSearch("basic")))
|
||||
const openHandler = () => showSearch("basic")
|
||||
searchButton.addEventListener("click", openHandler)
|
||||
window.addCleanup(() => searchButton.removeEventListener("click", openHandler))
|
||||
searchBar.addEventListener("input", onType)
|
||||
window.addCleanup(() => searchBar.removeEventListener("input", onType))
|
||||
window.addCleanup(() => {
|
||||
if (runSearchTimer !== null) {
|
||||
window.clearTimeout(runSearchTimer)
|
||||
runSearchTimer = null
|
||||
}
|
||||
resetProgressBar()
|
||||
})
|
||||
|
||||
registerEscapeHandler(container, hideSearch)
|
||||
await fillDocument(data)
|
||||
@@ -468,17 +1006,17 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
|
||||
|
||||
/**
|
||||
* Fills flexsearch document with data
|
||||
* @param index index to fill
|
||||
* @param data data to fill index with
|
||||
*/
|
||||
let indexPopulated = false
|
||||
async function fillDocument(data: ContentIndex) {
|
||||
if (indexPopulated) return
|
||||
let id = 0
|
||||
const promises: Array<Promise<unknown>> = []
|
||||
const promises = []
|
||||
for (const [slug, fileData] of Object.entries<ContentDetails>(data)) {
|
||||
promises.push(
|
||||
index.addAsync(id++, {
|
||||
//@ts-ignore
|
||||
index.addAsync({
|
||||
id,
|
||||
slug: slug as FullSlug,
|
||||
title: fileData.title,
|
||||
@@ -486,6 +1024,7 @@ async function fillDocument(data: ContentIndex) {
|
||||
tags: fileData.tags,
|
||||
}),
|
||||
)
|
||||
id++
|
||||
}
|
||||
|
||||
await Promise.all(promises)
|
||||
@@ -495,7 +1034,9 @@ async function fillDocument(data: ContentIndex) {
|
||||
document.addEventListener("nav", async (e: CustomEventMap["nav"]) => {
|
||||
const currentSlug = e.detail.url
|
||||
const data = await fetchData
|
||||
const searchElement = document.getElementsByClassName("search")
|
||||
const searchElement = document.getElementsByClassName(
|
||||
"search",
|
||||
) as HTMLCollectionOf<HTMLDivElement>
|
||||
for (const element of searchElement) {
|
||||
await setupSearch(element, currentSlug, data)
|
||||
}
|
||||
|
||||
182
quartz/components/scripts/semantic.inline.ts
Normal file
182
quartz/components/scripts/semantic.inline.ts
Normal file
@@ -0,0 +1,182 @@
|
||||
export type SemanticResult = { id: number; score: number }
|
||||
|
||||
type ProgressMessage = {
|
||||
type: "progress"
|
||||
loadedRows: number
|
||||
totalRows: number
|
||||
}
|
||||
|
||||
type ReadyMessage = { type: "ready" }
|
||||
|
||||
type ResultMessage = {
|
||||
type: "search-result"
|
||||
seq: number
|
||||
semantic: SemanticResult[]
|
||||
}
|
||||
|
||||
type ErrorMessage = { type: "error"; seq?: number; message: string }
|
||||
|
||||
type SearchPayload = {
|
||||
semantic: SemanticResult[]
|
||||
}
|
||||
|
||||
type PendingResolver = {
|
||||
resolve: (payload: SearchPayload) => void
|
||||
reject: (err: Error) => void
|
||||
}
|
||||
|
||||
export class SemanticClient {
|
||||
private ready: Promise<void>
|
||||
private resolveReady!: () => void
|
||||
private worker: Worker | null = null
|
||||
private pending = new Map<number, PendingResolver>()
|
||||
private seq = 0
|
||||
private disposed = false
|
||||
private readySettled = false
|
||||
private configured = false
|
||||
private lastError: Error | null = null
|
||||
|
||||
constructor(private cfg?: any) {
|
||||
this.ready = new Promise((resolve) => {
|
||||
this.resolveReady = () => {
|
||||
if (this.readySettled) return
|
||||
this.readySettled = true
|
||||
resolve()
|
||||
}
|
||||
})
|
||||
|
||||
if (this.cfg?.enable === false) {
|
||||
this.lastError = new Error("semantic search disabled by configuration")
|
||||
this.resolveReady()
|
||||
return
|
||||
}
|
||||
|
||||
this.boot()
|
||||
}
|
||||
|
||||
private boot() {
|
||||
try {
|
||||
this.worker = new Worker("/semantic.worker.js", { type: "module" })
|
||||
} catch (err) {
|
||||
this.handleFatal(err)
|
||||
return
|
||||
}
|
||||
this.setupWorker()
|
||||
this.startInit()
|
||||
}
|
||||
|
||||
private setupWorker() {
|
||||
if (!this.worker) return
|
||||
this.worker.onmessage = (
|
||||
event: MessageEvent<ProgressMessage | ReadyMessage | ResultMessage | ErrorMessage>,
|
||||
) => {
|
||||
const msg = event.data
|
||||
if (msg.type === "progress") {
|
||||
// Progress updates during initialization - can be logged if needed
|
||||
return
|
||||
}
|
||||
if (msg.type === "ready") {
|
||||
this.configured = true
|
||||
this.lastError = null
|
||||
this.resolveReady()
|
||||
return
|
||||
}
|
||||
if (msg.type === "search-result") {
|
||||
const pending = this.pending.get(msg.seq)
|
||||
if (pending) {
|
||||
this.pending.delete(msg.seq)
|
||||
pending.resolve({ semantic: msg.semantic ?? [] })
|
||||
}
|
||||
return
|
||||
}
|
||||
if (msg.type === "error") {
|
||||
if (typeof msg.seq === "number") {
|
||||
const pending = this.pending.get(msg.seq)
|
||||
if (pending) {
|
||||
this.pending.delete(msg.seq)
|
||||
pending.reject(new Error(msg.message))
|
||||
}
|
||||
} else {
|
||||
this.handleFatal(msg.message)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private startInit() {
|
||||
if (!this.worker) return
|
||||
const manifestUrl =
|
||||
typeof this.cfg?.manifestUrl === "string" && this.cfg.manifestUrl.length > 0
|
||||
? this.cfg.manifestUrl
|
||||
: "/embeddings/manifest.json"
|
||||
const disableCache = Boolean(this.cfg?.disableCache)
|
||||
const baseUrl =
|
||||
typeof this.cfg?.manifestBaseUrl === "string" ? this.cfg.manifestBaseUrl : undefined
|
||||
this.worker.postMessage({
|
||||
type: "init",
|
||||
cfg: this.cfg,
|
||||
manifestUrl,
|
||||
baseUrl,
|
||||
disableCache,
|
||||
})
|
||||
}
|
||||
|
||||
private rejectAll(err: Error, fatal = false) {
|
||||
for (const [id, pending] of this.pending.entries()) {
|
||||
pending.reject(err)
|
||||
this.pending.delete(id)
|
||||
}
|
||||
if (fatal) {
|
||||
this.lastError = err
|
||||
this.configured = false
|
||||
if (!this.readySettled) {
|
||||
this.resolveReady()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private handleFatal(err: unknown) {
|
||||
const error = err instanceof Error ? err : new Error(String(err))
|
||||
console.error("[SemanticClient] initialization failure:", error)
|
||||
this.rejectAll(error, true)
|
||||
if (this.worker) {
|
||||
this.worker.postMessage({ type: "reset" })
|
||||
this.worker.terminate()
|
||||
this.worker = null
|
||||
}
|
||||
}
|
||||
|
||||
async ensureReady() {
|
||||
await this.ready
|
||||
if (!this.configured) {
|
||||
throw this.lastError ?? new Error("semantic search unavailable")
|
||||
}
|
||||
}
|
||||
|
||||
async search(text: string, k: number): Promise<SearchPayload> {
|
||||
if (this.disposed) {
|
||||
throw new Error("semantic client has been disposed")
|
||||
}
|
||||
await this.ensureReady()
|
||||
if (!this.worker || !this.configured) {
|
||||
throw this.lastError ?? new Error("worker unavailable")
|
||||
}
|
||||
return new Promise<SearchPayload>((resolve, reject) => {
|
||||
const seq = ++this.seq
|
||||
this.pending.set(seq, { resolve, reject })
|
||||
this.worker?.postMessage({ type: "search", text, k, seq })
|
||||
})
|
||||
}
|
||||
|
||||
dispose() {
|
||||
if (this.disposed) return
|
||||
this.disposed = true
|
||||
this.rejectAll(new Error("semantic client disposed"))
|
||||
if (this.worker) {
|
||||
this.worker.postMessage({ type: "reset" })
|
||||
this.worker.terminate()
|
||||
}
|
||||
this.worker = null
|
||||
this.configured = false
|
||||
}
|
||||
}
|
||||
@@ -77,16 +77,97 @@
|
||||
margin-bottom: 2em;
|
||||
}
|
||||
|
||||
& > input {
|
||||
& > .input-container {
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
position: relative;
|
||||
box-sizing: border-box;
|
||||
padding: 0.5em 1em;
|
||||
font-family: var(--bodyFont);
|
||||
color: var(--dark);
|
||||
font-size: 1.1em;
|
||||
border: 1px solid var(--lightgray);
|
||||
|
||||
&:focus {
|
||||
outline: none;
|
||||
.search-bar {
|
||||
flex: 1 1 auto;
|
||||
min-width: 0;
|
||||
box-sizing: border-box;
|
||||
padding: 0.5em 1em;
|
||||
font-family: var(--bodyFont);
|
||||
color: var(--dark);
|
||||
font-size: 1.1em;
|
||||
border: none;
|
||||
background: transparent;
|
||||
|
||||
&:focus {
|
||||
outline: none;
|
||||
}
|
||||
}
|
||||
|
||||
.semantic-search-progress {
|
||||
position: absolute;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
height: 2px;
|
||||
background-color: var(--secondary);
|
||||
width: 0;
|
||||
opacity: 0;
|
||||
transition:
|
||||
width 0.3s ease,
|
||||
opacity 0.2s ease;
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
.search-mode-toggle {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
border-radius: 9999px;
|
||||
height: 1.4rem;
|
||||
background-color: color-mix(in srgb, var(--darkgray) 12%, transparent);
|
||||
margin-right: 1rem;
|
||||
|
||||
.mode-option {
|
||||
border: none;
|
||||
background: transparent;
|
||||
font: inherit;
|
||||
color: var(--gray);
|
||||
border-radius: 9999px;
|
||||
cursor: pointer;
|
||||
transition:
|
||||
background-color 0.2s ease,
|
||||
color 0.2s ease;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
width: 1.5rem;
|
||||
height: 1.5rem;
|
||||
position: relative;
|
||||
|
||||
&:focus-visible {
|
||||
outline: 2px solid var(--tertiary);
|
||||
outline-offset: 2px;
|
||||
}
|
||||
|
||||
&.active {
|
||||
background-color: var(--secondary);
|
||||
color: var(--light);
|
||||
}
|
||||
|
||||
svg {
|
||||
width: 18px;
|
||||
height: 18px;
|
||||
}
|
||||
|
||||
.sr-only {
|
||||
position: absolute;
|
||||
width: 1px;
|
||||
height: 1px;
|
||||
padding: 0;
|
||||
margin: -1px;
|
||||
overflow: hidden;
|
||||
clip: rect(0, 0, 0, 0);
|
||||
white-space: nowrap;
|
||||
border: 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
542
quartz/embed_build.py
Normal file
542
quartz/embed_build.py
Normal file
@@ -0,0 +1,542 @@
|
||||
# /// script
|
||||
# requires-python = ">=3.11"
|
||||
# dependencies = [
|
||||
# "langchain-text-splitters",
|
||||
# "numpy",
|
||||
# "openai",
|
||||
# "sentence-transformers",
|
||||
# "tiktoken",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os, json, argparse, hashlib, math, random, logging
|
||||
|
||||
from pathlib import Path
|
||||
from functools import lru_cache
|
||||
from collections.abc import Iterable
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import tiktoken, numpy as np
|
||||
|
||||
from openai import OpenAI
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
DEFAULT_VLLM_URL = os.environ.get("VLLM_URL") or os.environ.get("VLLM_EMBED_URL") or "http://127.0.0.1:8000/v1"
|
||||
|
||||
|
||||
def resolve_vllm_base_url(url: str) -> str:
|
||||
if not url:
|
||||
raise ValueError("vLLM URL must be non-empty")
|
||||
|
||||
trimmed = url.rstrip("/")
|
||||
if trimmed.endswith("/v1/embeddings"):
|
||||
trimmed = trimmed[: -len("/embeddings")]
|
||||
elif trimmed.endswith("/embeddings"):
|
||||
trimmed = trimmed[: trimmed.rfind("/")]
|
||||
|
||||
if not trimmed.endswith("/v1"):
|
||||
trimmed = f"{trimmed}/v1"
|
||||
|
||||
return trimmed
|
||||
|
||||
|
||||
def load_jsonl(fp: str) -> Iterable[dict]:
|
||||
with open(fp, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
yield json.loads(line)
|
||||
|
||||
|
||||
def l2_normalize_rows(x: np.ndarray) -> np.ndarray:
|
||||
# x: [N, D]
|
||||
norms = np.linalg.norm(x, ord=2, axis=1, keepdims=True)
|
||||
norms[norms == 0] = 1.0
|
||||
return x / norms
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_tiktoken_encoder():
|
||||
# Get the o200k_base tokenizer (GPT-4o) with caching
|
||||
# change this if you want something else.
|
||||
return tiktoken.get_encoding("o200k_base")
|
||||
|
||||
|
||||
def count_tokens(text: str) -> int:
|
||||
# Count tokens using o200k_base encoding
|
||||
encoder = get_tiktoken_encoder()
|
||||
return len(encoder.encode(text))
|
||||
|
||||
|
||||
def get_text_splitter(chunk_size: int, overlap: int):
|
||||
encoder = get_tiktoken_encoder()
|
||||
return RecursiveCharacterTextSplitter(
|
||||
chunk_size=chunk_size * 4, # character approximation
|
||||
chunk_overlap=overlap * 4,
|
||||
separators=["\n\n", "\n", ". ", " ", ""],
|
||||
length_function=lambda t: len(encoder.encode(t)),
|
||||
is_separator_regex=False,
|
||||
)
|
||||
|
||||
|
||||
def chunk_document(
|
||||
doc: dict, max_tokens: int = 512, overlap_tokens: int = 128, min_chunk_size: int = 100
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Chunk a document if it exceeds max_tokens
|
||||
|
||||
Args:
|
||||
doc: {'slug': str, 'title': str, 'text': str}
|
||||
max_tokens: Maximum tokens per chunk
|
||||
overlap_tokens: Overlap between chunks
|
||||
min_chunk_size: Minimum chunk size (avoid tiny chunks)
|
||||
|
||||
Returns:
|
||||
List of chunk dicts with metadata
|
||||
"""
|
||||
text = doc["text"]
|
||||
token_count = count_tokens(text)
|
||||
|
||||
# No chunking needed
|
||||
if token_count <= max_tokens:
|
||||
return [
|
||||
{
|
||||
"slug": doc["slug"],
|
||||
"title": doc.get("title", doc["slug"]),
|
||||
"text": text,
|
||||
"chunk_id": 0,
|
||||
"parent_slug": doc["slug"],
|
||||
"is_chunked": False,
|
||||
}
|
||||
]
|
||||
|
||||
# Apply chunking
|
||||
splitter = get_text_splitter(max_tokens, overlap_tokens)
|
||||
raw_chunks = splitter.split_text(text)
|
||||
|
||||
# Filter out tiny chunks
|
||||
valid_chunks = [c for c in raw_chunks if count_tokens(c) >= min_chunk_size]
|
||||
|
||||
return [
|
||||
{
|
||||
"slug": f"{doc['slug']}#chunk{i}",
|
||||
"title": doc.get("title", doc["slug"]),
|
||||
"text": chunk,
|
||||
"chunk_id": i,
|
||||
"parent_slug": doc["slug"],
|
||||
"is_chunked": True,
|
||||
}
|
||||
for i, chunk in enumerate(valid_chunks)
|
||||
]
|
||||
|
||||
|
||||
def write_shards(vectors: np.ndarray, shard_size: int, dtype: str, out_dir: Path) -> list[dict]:
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
rows, dims = vectors.shape
|
||||
shards_meta: list[dict] = []
|
||||
np_dtype = np.float16 if dtype == "fp16" else np.float32
|
||||
bytes_per_value = np.dtype(np_dtype).itemsize
|
||||
row_offset = 0
|
||||
for si, start in enumerate(range(0, rows, shard_size)):
|
||||
end = min(start + shard_size, rows)
|
||||
shard = vectors[start:end] # [n, dims]
|
||||
bin_path = out_dir / f"vectors-{si:03d}.bin"
|
||||
payload = shard.astype(np_dtype, copy=False).tobytes(order="C")
|
||||
digest = hashlib.sha256(payload).hexdigest()
|
||||
with open(bin_path, "wb") as f:
|
||||
f.write(payload)
|
||||
shard_rows = int(shard.shape[0])
|
||||
shards_meta.append(
|
||||
{
|
||||
"path": f"/embeddings/{bin_path.name}",
|
||||
"rows": shard_rows,
|
||||
"rowOffset": row_offset,
|
||||
"byteLength": len(payload),
|
||||
"sha256": digest,
|
||||
"byteStride": dims * bytes_per_value,
|
||||
},
|
||||
)
|
||||
row_offset += shard_rows
|
||||
return shards_meta
|
||||
|
||||
|
||||
def write_hnsw_graph(levels: list[list[list[int]]], rows: int, out_path: Path) -> tuple[list[dict], str]:
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
offset = 0
|
||||
meta: list[dict] = []
|
||||
digest = hashlib.sha256()
|
||||
with open(out_path, "wb") as f:
|
||||
for lvl in levels:
|
||||
indptr = np.zeros(rows + 1, dtype=np.uint32)
|
||||
edge_accum: list[int] = []
|
||||
for idx in range(rows):
|
||||
neighbors = lvl[idx] if idx < len(lvl) else []
|
||||
indptr[idx + 1] = indptr[idx] + len(neighbors)
|
||||
edge_accum.extend(neighbors)
|
||||
indptr_bytes = indptr.tobytes(order="C")
|
||||
indptr_offset = offset
|
||||
f.write(indptr_bytes)
|
||||
digest.update(indptr_bytes)
|
||||
offset += len(indptr_bytes)
|
||||
|
||||
if edge_accum:
|
||||
indices = np.asarray(edge_accum, dtype=np.uint32)
|
||||
indices_bytes = indices.tobytes(order="C")
|
||||
else:
|
||||
indices = np.zeros(0, dtype=np.uint32)
|
||||
indices_bytes = indices.tobytes(order="C")
|
||||
indices_offset = offset
|
||||
f.write(indices_bytes)
|
||||
digest.update(indices_bytes)
|
||||
offset += len(indices_bytes)
|
||||
|
||||
meta.append(
|
||||
{
|
||||
"level": len(meta),
|
||||
"indptr": {
|
||||
"offset": indptr_offset,
|
||||
"elements": int(indptr.shape[0]),
|
||||
"byteLength": len(indptr_bytes),
|
||||
},
|
||||
"indices": {
|
||||
"offset": indices_offset,
|
||||
"elements": int(indices.shape[0]),
|
||||
"byteLength": len(indices_bytes),
|
||||
},
|
||||
},
|
||||
)
|
||||
return meta, digest.hexdigest()
|
||||
|
||||
|
||||
|
||||
def embed_vllm(
|
||||
texts: list[str],
|
||||
model_id: str,
|
||||
vllm_url: str,
|
||||
batch_size: int = 64,
|
||||
concurrency: int = 8,
|
||||
) -> np.ndarray:
|
||||
base_url = resolve_vllm_base_url(vllm_url)
|
||||
api_key = os.environ.get("VLLM_API_KEY") or os.environ.get("OPENAI_API_KEY") or "not-set"
|
||||
client = OpenAI(base_url=base_url, api_key=api_key, timeout=300)
|
||||
|
||||
def list_available_models() -> list[str]:
|
||||
models: list[str] = []
|
||||
page = client.models.list()
|
||||
models.extend(model.id for model in page.data)
|
||||
while getattr(page, "has_more", False) and page.data:
|
||||
cursor = page.data[-1].id
|
||||
page = client.models.list(after=cursor)
|
||||
models.extend(model.id for model in page.data)
|
||||
return models
|
||||
|
||||
try:
|
||||
available_models = list_available_models()
|
||||
except Exception as exc:
|
||||
raise RuntimeError(f"failed to query {base_url}/models: {exc}") from exc
|
||||
|
||||
if model_id not in available_models:
|
||||
suggestions = ", ".join(sorted(available_models)) if available_models else "<none>"
|
||||
logger.warning(
|
||||
"model '%s' not served by vLLM at %s. Available models: %s. Use the first model, results may differ during semantic search (you can omit this message if your weights is a ONNX checkpoint of the same model.)", model_id, base_url, suggestions,
|
||||
)
|
||||
model_id = available_models[0]
|
||||
|
||||
# Apply model-specific prefixes for documents (asymmetric search)
|
||||
model_lower = model_id.lower()
|
||||
if "e5" in model_lower:
|
||||
# E5 models: use "passage:" prefix for documents
|
||||
prefixed = [f"passage: {t}" for t in texts]
|
||||
elif "qwen" in model_lower and "embedding" in model_lower:
|
||||
# Qwen3-Embedding: documents use plain text (no prefix)
|
||||
prefixed = texts
|
||||
elif "embeddinggemma" in model_lower:
|
||||
# embeddinggemma: use "title: none | text:" prefix for documents
|
||||
prefixed = [f"title: none | text: {t}" for t in texts]
|
||||
else:
|
||||
# Default: no prefix for unknown models
|
||||
prefixed = texts
|
||||
|
||||
print(
|
||||
"Embedding"
|
||||
f" {len(prefixed)} texts with vLLM"
|
||||
f" (model={model_id}, batch_size={batch_size}, concurrency={concurrency})",
|
||||
)
|
||||
|
||||
# Create batches
|
||||
batches = []
|
||||
for i in range(0, len(prefixed), batch_size):
|
||||
batch = prefixed[i : i + batch_size]
|
||||
batches.append((i, batch))
|
||||
|
||||
# Function to send a single batch request
|
||||
def send_batch(batch_info: tuple[int, list[str]]) -> tuple[int, list[np.ndarray]]:
|
||||
idx, batch = batch_info
|
||||
response = client.embeddings.create(model=model_id, input=batch)
|
||||
embeddings = [np.asarray(item.embedding, dtype=np.float32) for item in response.data]
|
||||
return (idx, embeddings)
|
||||
|
||||
# Send batches concurrently (or sequentially if only 1 batch)
|
||||
results: dict[int, list[np.ndarray]] = {}
|
||||
if len(batches) == 1:
|
||||
# Single batch - no need for threading
|
||||
idx, embeddings = send_batch(batches[0])
|
||||
results[idx] = embeddings
|
||||
else:
|
||||
# Multiple batches - use concurrent requests
|
||||
with ThreadPoolExecutor(max_workers=concurrency) as executor:
|
||||
futures = {executor.submit(send_batch, batch_info): batch_info[0] for batch_info in batches}
|
||||
completed = 0
|
||||
for future in as_completed(futures):
|
||||
idx, embeddings = future.result()
|
||||
results[idx] = embeddings
|
||||
completed += 1
|
||||
if completed % max(1, len(batches) // 10) == 0 or completed == len(batches):
|
||||
print(f" Completed {completed}/{len(batches)} batches ({completed * 100 // len(batches)}%)")
|
||||
|
||||
# Reconstruct in order
|
||||
out: list[np.ndarray] = []
|
||||
for i in sorted(results.keys()):
|
||||
out.extend(results[i])
|
||||
|
||||
return np.stack(out, axis=0)
|
||||
|
||||
|
||||
def embed_hf(texts: list[str], model_id: str, device: str) -> np.ndarray:
|
||||
# Prefer sentence-transformers for E5 and similar embed models
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
model = SentenceTransformer(model_id, device=device)
|
||||
|
||||
# Apply model-specific prefixes for documents (asymmetric search)
|
||||
model_lower = model_id.lower()
|
||||
if "e5" in model_lower:
|
||||
# E5 models: use "passage:" prefix for documents
|
||||
prefixed = [f"passage: {t}" for t in texts]
|
||||
elif "qwen" in model_lower and "embedding" in model_lower:
|
||||
# Qwen3-Embedding: documents use plain text (no prefix)
|
||||
prefixed = texts
|
||||
elif "embeddinggemma" in model_lower:
|
||||
# embeddinggemma: use "title: none | text:" prefix for documents
|
||||
prefixed = [f"title: none | text: {t}" for t in texts]
|
||||
else:
|
||||
# Default: no prefix for unknown models
|
||||
prefixed = texts
|
||||
|
||||
vecs = model.encode(
|
||||
prefixed,
|
||||
batch_size=64,
|
||||
normalize_embeddings=True,
|
||||
convert_to_numpy=True,
|
||||
show_progress_bar=True,
|
||||
)
|
||||
return vecs.astype(np.float32, copy=False)
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--jsonl", default="public/embeddings-text.jsonl")
|
||||
ap.add_argument("--model", default=os.environ.get("SEM_MODEL", "intfloat/multilingual-e5-large"))
|
||||
ap.add_argument("--dims", type=int, default=int(os.environ.get("SEM_DIMS", "1024")))
|
||||
ap.add_argument("--dtype", choices=["fp16", "fp32"], default=os.environ.get("SEM_DTYPE", "fp32"))
|
||||
ap.add_argument("--shard-size", type=int, default=int(os.environ.get("SEM_SHARD", "1024")))
|
||||
ap.add_argument("--out", default="public/embeddings")
|
||||
ap.add_argument("--use-vllm", action="store_true", default=bool(os.environ.get("USE_VLLM", "")))
|
||||
ap.add_argument(
|
||||
"--vllm-url",
|
||||
default=DEFAULT_VLLM_URL,
|
||||
help="Base URL for the vLLM OpenAI-compatible server (accepts either /v1 or /v1/embeddings)",
|
||||
)
|
||||
ap.add_argument("--chunk-size", type=int, default=512, help="Max tokens per chunk")
|
||||
ap.add_argument("--chunk-overlap", type=int, default=128, help="Overlap tokens between chunks")
|
||||
ap.add_argument("--no-chunking", action="store_true", help="Disable chunking (embed full docs)")
|
||||
ap.add_argument(
|
||||
"--concurrency",
|
||||
type=int,
|
||||
default=int(os.environ.get("VLLM_CONCURRENCY", "8")),
|
||||
help="Number of concurrent requests to vLLM (default: 8)",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--batch-size",
|
||||
type=int,
|
||||
default=int(os.environ.get("VLLM_BATCH_SIZE", "64")),
|
||||
help="Batch size for vLLM requests (default: 64)",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
|
||||
recs = list(load_jsonl(args.jsonl))
|
||||
if not recs:
|
||||
print("No input found in public/embeddings-text.jsonl; run the site build first to emit JSONL.")
|
||||
return
|
||||
|
||||
# Apply chunking
|
||||
if args.no_chunking:
|
||||
chunks = recs
|
||||
chunk_metadata = {}
|
||||
print(f"Chunking disabled. Processing {len(chunks)} full documents")
|
||||
else:
|
||||
chunks = []
|
||||
chunk_metadata = {}
|
||||
for rec in recs:
|
||||
doc_chunks = chunk_document(rec, max_tokens=args.chunk_size, overlap_tokens=args.chunk_overlap)
|
||||
chunks.extend(doc_chunks)
|
||||
# Build chunk metadata map
|
||||
for chunk in doc_chunks:
|
||||
if chunk["is_chunked"]:
|
||||
chunk_metadata[chunk["slug"]] = {
|
||||
"parentSlug": chunk["parent_slug"],
|
||||
"chunkId": chunk["chunk_id"],
|
||||
}
|
||||
chunked_count = sum(1 for c in chunks if c.get("is_chunked", False))
|
||||
print(f"Chunked {len(recs)} documents into {len(chunks)} chunks ({chunked_count} chunked, {len(chunks) - chunked_count} unchanged)")
|
||||
print(f" Chunk size: {args.chunk_size} tokens, overlap: {args.chunk_overlap} tokens")
|
||||
|
||||
ids = [c["slug"] for c in chunks]
|
||||
titles = [c.get("title", c["slug"]) for c in chunks]
|
||||
texts = [c["text"] for c in chunks]
|
||||
|
||||
if args.use_vllm:
|
||||
vecs = embed_vllm(
|
||||
texts,
|
||||
args.model,
|
||||
args.vllm_url,
|
||||
batch_size=args.batch_size,
|
||||
concurrency=args.concurrency,
|
||||
)
|
||||
else:
|
||||
device = "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu"
|
||||
vecs = embed_hf(texts, args.model, device)
|
||||
|
||||
# Coerce dims and re-normalize
|
||||
if vecs.shape[1] != args.dims:
|
||||
if vecs.shape[1] > args.dims:
|
||||
vecs = vecs[:, : args.dims]
|
||||
else:
|
||||
vecs = np.pad(vecs, ((0, 0), (0, args.dims - vecs.shape[1])))
|
||||
vecs = l2_normalize_rows(vecs.astype(np.float32, copy=False))
|
||||
|
||||
out_dir = Path(args.out)
|
||||
shards = write_shards(vecs, args.shard_size, args.dtype, out_dir)
|
||||
|
||||
# Build a lightweight HNSW graph and store it in a compact binary layout
|
||||
def hnsw_build(data: np.ndarray, M: int = 16, efC: int = 200, seed: int = 0) -> dict:
|
||||
rng = random.Random(seed)
|
||||
N, D = data.shape
|
||||
levels: list[list[list[int]]] = [] # levels[L][i] = neighbors of node i at level L
|
||||
|
||||
# random level assignment using 1/e distribution
|
||||
node_levels = []
|
||||
for _ in range(N):
|
||||
lvl = 0
|
||||
while rng.random() < 1 / math.e:
|
||||
lvl += 1
|
||||
node_levels.append(lvl)
|
||||
max_level = max(node_levels) if N > 0 else 0
|
||||
for _ in range(max_level + 1):
|
||||
levels.append([[] for _ in range(N)])
|
||||
|
||||
def sim(i: int, j: int) -> float:
|
||||
return float((data[i] * data[j]).sum())
|
||||
|
||||
entry = 0 if N > 0 else -1
|
||||
|
||||
def search_layer(q: int, ep: int, ef: int, L: int) -> list[int]:
|
||||
if ep < 0:
|
||||
return []
|
||||
visited = set()
|
||||
cand: list[tuple[float, int]] = []
|
||||
top: list[tuple[float, int]] = []
|
||||
def push(node: int):
|
||||
if node in visited:
|
||||
return
|
||||
visited.add(node)
|
||||
cand.append((sim(q, node), node))
|
||||
push(ep)
|
||||
while cand:
|
||||
cand.sort(reverse=True)
|
||||
s, v = cand.pop(0)
|
||||
if len(top) >= ef and s <= top[-1][0]:
|
||||
break
|
||||
top.append((s, v))
|
||||
for u in levels[L][v]:
|
||||
push(u)
|
||||
top.sort(reverse=True)
|
||||
return [n for _, n in top]
|
||||
|
||||
for i in range(N):
|
||||
if i == 0:
|
||||
continue
|
||||
lvl = node_levels[i]
|
||||
ep = entry
|
||||
for L in range(max_level, lvl, -1):
|
||||
c = search_layer(i, ep, 1, L)
|
||||
if c:
|
||||
ep = c[0]
|
||||
for L in range(min(max_level, lvl), -1, -1):
|
||||
W = search_layer(i, ep, efC, L)
|
||||
# Select top M by similarity
|
||||
neigh = sorted(((sim(i, j), j) for j in W if j != i), reverse=True)[:M]
|
||||
for _, e in neigh:
|
||||
if e not in levels[L][i]:
|
||||
levels[L][i].append(e)
|
||||
if i not in levels[L][e]:
|
||||
levels[L][e].append(i)
|
||||
|
||||
# trim neighbors to M
|
||||
for L in range(len(levels)):
|
||||
for i in range(N):
|
||||
if len(levels[L][i]) > M:
|
||||
# keep top M by sim
|
||||
nb = levels[L][i]
|
||||
nb = sorted(nb, key=lambda j: sim(i, j), reverse=True)[:M]
|
||||
levels[L][i] = nb
|
||||
|
||||
return {
|
||||
"M": M,
|
||||
"efConstruction": efC,
|
||||
"entryPoint": entry,
|
||||
"maxLevel": max_level,
|
||||
"levels": levels,
|
||||
}
|
||||
|
||||
hnsw = hnsw_build(vecs, M=16, efC=200)
|
||||
hnsw_meta, hnsw_sha = write_hnsw_graph(hnsw["levels"], int(vecs.shape[0]), out_dir / "hnsw.bin")
|
||||
|
||||
manifest = {
|
||||
"version": 2,
|
||||
"dims": args.dims,
|
||||
"dtype": args.dtype,
|
||||
"normalized": True,
|
||||
"rows": int(vecs.shape[0]),
|
||||
"shardSizeRows": args.shard_size,
|
||||
"vectors": {
|
||||
"dtype": args.dtype,
|
||||
"rows": int(vecs.shape[0]),
|
||||
"dims": args.dims,
|
||||
"shards": shards,
|
||||
},
|
||||
"ids": ids,
|
||||
"titles": titles,
|
||||
"chunkMetadata": chunk_metadata,
|
||||
"hnsw": {
|
||||
"M": hnsw["M"],
|
||||
"efConstruction": hnsw["efConstruction"],
|
||||
"entryPoint": hnsw["entryPoint"],
|
||||
"maxLevel": hnsw["maxLevel"],
|
||||
"graph": {
|
||||
"path": "/embeddings/hnsw.bin",
|
||||
"sha256": hnsw_sha,
|
||||
"levels": hnsw_meta,
|
||||
},
|
||||
},
|
||||
}
|
||||
(out_dir / "manifest.json").write_text(json.dumps(manifest, ensure_ascii=False), encoding="utf-8")
|
||||
print(f"Wrote {len(shards)} vector shard(s), HNSW graph, and manifest to {out_dir}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -40,7 +40,7 @@ export const NotFoundPage: QuartzEmitterPlugin = () => {
|
||||
description: notFound,
|
||||
frontmatter: { title: notFound, tags: [] },
|
||||
})
|
||||
const externalResources = pageResources(path, resources)
|
||||
const externalResources = pageResources(path, resources, ctx.cfg.configuration)
|
||||
const componentData: QuartzComponentProps = {
|
||||
ctx,
|
||||
fileData: vfile.data,
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import { FullSlug, joinSegments } from "../../util/path"
|
||||
import { QuartzEmitterPlugin } from "../types"
|
||||
import path from "path"
|
||||
import fs from "node:fs/promises"
|
||||
import { globby } from "globby"
|
||||
|
||||
// @ts-ignore
|
||||
import spaRouterScript from "../../components/scripts/spa.inline"
|
||||
@@ -16,7 +19,7 @@ import {
|
||||
processGoogleFonts,
|
||||
} from "../../util/theme"
|
||||
import { Features, transform } from "lightningcss"
|
||||
import { transform as transpile } from "esbuild"
|
||||
import { transform as transpile, build as bundle } from "esbuild"
|
||||
import { write } from "./helpers"
|
||||
|
||||
type ComponentResources = {
|
||||
@@ -357,7 +360,47 @@ export const ComponentResources: QuartzEmitterPlugin = () => {
|
||||
ext: ".js",
|
||||
content: postscript,
|
||||
})
|
||||
|
||||
// Bundle all worker files
|
||||
const workerFiles = await globby(["quartz/**/*.worker.ts"])
|
||||
for (const src of workerFiles) {
|
||||
const result = await bundle({
|
||||
entryPoints: [src],
|
||||
bundle: true,
|
||||
minify: true,
|
||||
platform: "browser",
|
||||
format: "esm",
|
||||
write: false,
|
||||
})
|
||||
const code = result.outputFiles[0].text
|
||||
const name = path.basename(src).replace(/\.ts$/, "")
|
||||
yield write({ ctx, slug: name as FullSlug, ext: ".js", content: code })
|
||||
}
|
||||
},
|
||||
async *partialEmit(ctx, _content, _resources, changeEvents) {
|
||||
// Handle worker file changes in incremental builds
|
||||
for (const changeEvent of changeEvents) {
|
||||
if (!/\.worker\.ts$/.test(changeEvent.path)) continue
|
||||
if (changeEvent.type === "delete") {
|
||||
const name = path.basename(changeEvent.path).replace(/\.ts$/, "")
|
||||
const dest = joinSegments(ctx.argv.output, `${name}.js`)
|
||||
try {
|
||||
await fs.unlink(dest)
|
||||
} catch {}
|
||||
continue
|
||||
}
|
||||
const result = await bundle({
|
||||
entryPoints: [changeEvent.path],
|
||||
bundle: true,
|
||||
minify: true,
|
||||
platform: "browser",
|
||||
format: "esm",
|
||||
write: false,
|
||||
})
|
||||
const code = result.outputFiles[0].text
|
||||
const name = path.basename(changeEvent.path).replace(/\.ts$/, "")
|
||||
yield write({ ctx, slug: name as FullSlug, ext: ".js", content: code })
|
||||
}
|
||||
},
|
||||
async *partialEmit() {},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ async function processContent(
|
||||
) {
|
||||
const slug = fileData.slug!
|
||||
const cfg = ctx.cfg.configuration
|
||||
const externalResources = pageResources(pathToRoot(slug), resources)
|
||||
const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
|
||||
const componentData: QuartzComponentProps = {
|
||||
ctx,
|
||||
fileData,
|
||||
|
||||
@@ -38,7 +38,7 @@ async function* processFolderInfo(
|
||||
const slug = joinSegments(folder, "index") as FullSlug
|
||||
const [tree, file] = folderContent
|
||||
const cfg = ctx.cfg.configuration
|
||||
const externalResources = pageResources(pathToRoot(slug), resources)
|
||||
const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
|
||||
const componentData: QuartzComponentProps = {
|
||||
ctx,
|
||||
fileData: file.data,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
export { ContentPage } from "./contentPage"
|
||||
export { TagPage } from "./tagPage"
|
||||
export { FolderPage } from "./folderPage"
|
||||
export { ContentIndex as ContentIndex } from "./contentIndex"
|
||||
export { ContentIndex } from "./contentIndex"
|
||||
export { AliasRedirects } from "./aliases"
|
||||
export { Assets } from "./assets"
|
||||
export { Static } from "./static"
|
||||
@@ -10,3 +10,4 @@ export { ComponentResources } from "./componentResources"
|
||||
export { NotFoundPage } from "./404"
|
||||
export { CNAME } from "./cname"
|
||||
export { CustomOgImages } from "./ogImage"
|
||||
export { SemanticIndex } from "./semantic"
|
||||
|
||||
235
quartz/plugins/emitters/semantic.ts
Normal file
235
quartz/plugins/emitters/semantic.ts
Normal file
@@ -0,0 +1,235 @@
|
||||
import { write } from "./helpers"
|
||||
import { QuartzEmitterPlugin } from "../types"
|
||||
import { FilePath, FullSlug, joinSegments, QUARTZ } from "../../util/path"
|
||||
import { ReadTimeResults } from "reading-time"
|
||||
import { GlobalConfiguration } from "../../cfg"
|
||||
import { spawn } from "child_process"
|
||||
|
||||
const DEFAULT_MODEL_ID = "onnx-community/Qwen3-Embedding-0.6B-ONNX"
|
||||
|
||||
const defaults: GlobalConfiguration["semanticSearch"] = {
|
||||
enable: true,
|
||||
model: DEFAULT_MODEL_ID,
|
||||
aot: false,
|
||||
dims: 1024,
|
||||
dtype: "fp32",
|
||||
shardSizeRows: 1024,
|
||||
hnsw: { M: 16, efConstruction: 200 },
|
||||
chunking: {
|
||||
chunkSize: 512,
|
||||
chunkOverlap: 128,
|
||||
noChunking: false,
|
||||
},
|
||||
vllm: {
|
||||
enable: false,
|
||||
vllmUrl:
|
||||
process.env.VLLM_URL || process.env.VLLM_EMBED_URL || "http://127.0.0.1:8000/v1/embeddings",
|
||||
concurrency: parseInt(process.env.VLLM_CONCURRENCY || "8", 10),
|
||||
batchSize: parseInt(process.env.VLLM_BATCH_SIZE || "64", 10),
|
||||
},
|
||||
}
|
||||
|
||||
type ContentDetails = {
|
||||
slug: string
|
||||
title: string
|
||||
filePath: FilePath
|
||||
content: string
|
||||
readingTime?: Partial<ReadTimeResults>
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if uv is installed
|
||||
*/
|
||||
function checkUvInstalled(): Promise<boolean> {
|
||||
return new Promise((resolve) => {
|
||||
const proc = spawn("uv", ["--version"], { shell: true })
|
||||
proc.on("error", () => resolve(false))
|
||||
proc.on("close", (code) => resolve(code === 0))
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the Python embedding build script using uv
|
||||
* Script uses PEP 723 inline metadata for dependency management
|
||||
*/
|
||||
function runEmbedBuild(
|
||||
jsonlPath: string,
|
||||
outDir: string,
|
||||
opts: {
|
||||
model: string
|
||||
dtype: string
|
||||
dims: number
|
||||
shardSizeRows: number
|
||||
chunking: { chunkSize: number; chunkOverlap: number; noChunking: boolean }
|
||||
vllm: { enable: boolean; vllmUrl?: string; concurrency: number; batchSize: number }
|
||||
},
|
||||
): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const scriptPath = joinSegments(QUARTZ, "embed_build.py")
|
||||
const args = [
|
||||
"run",
|
||||
scriptPath,
|
||||
"--jsonl",
|
||||
jsonlPath,
|
||||
"--model",
|
||||
opts.model,
|
||||
"--out",
|
||||
outDir,
|
||||
"--dtype",
|
||||
opts.dtype,
|
||||
"--dims",
|
||||
String(opts.dims),
|
||||
"--shard-size",
|
||||
String(opts.shardSizeRows),
|
||||
"--chunk-size",
|
||||
String(opts.chunking.chunkSize),
|
||||
"--chunk-overlap",
|
||||
String(opts.chunking.chunkOverlap),
|
||||
]
|
||||
|
||||
if (opts.chunking.noChunking) {
|
||||
args.push("--no-chunking")
|
||||
}
|
||||
|
||||
if (opts.vllm.enable) {
|
||||
args.push("--use-vllm")
|
||||
if (opts.vllm.vllmUrl) {
|
||||
args.push("--vllm-url", opts.vllm.vllmUrl)
|
||||
}
|
||||
args.push("--concurrency", String(opts.vllm.concurrency))
|
||||
args.push("--batch-size", String(opts.vllm.batchSize))
|
||||
}
|
||||
|
||||
console.log("\nRunning embedding generation:")
|
||||
console.log(` uv ${args.join(" ")}`)
|
||||
|
||||
const env = { ...process.env }
|
||||
if (opts.vllm.enable && !env.USE_VLLM) {
|
||||
env.USE_VLLM = "1"
|
||||
}
|
||||
|
||||
const proc = spawn("uv", args, {
|
||||
stdio: "inherit",
|
||||
shell: true,
|
||||
env,
|
||||
})
|
||||
|
||||
proc.on("error", (err) => {
|
||||
reject(new Error(`Failed to spawn uv: ${err.message}`))
|
||||
})
|
||||
|
||||
proc.on("close", (code) => {
|
||||
if (code === 0) {
|
||||
console.log("Embedding generation completed successfully")
|
||||
resolve()
|
||||
} else {
|
||||
reject(new Error(`embed_build.py exited with code ${code}`))
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export const SemanticIndex: QuartzEmitterPlugin<Partial<GlobalConfiguration["semanticSearch"]>> = (
|
||||
opts,
|
||||
) => {
|
||||
const merged = { ...defaults, ...opts }
|
||||
const o = {
|
||||
enable: merged.enable!,
|
||||
model: merged.model!,
|
||||
aot: merged.aot!,
|
||||
dims: merged.dims!,
|
||||
dtype: merged.dtype!,
|
||||
shardSizeRows: merged.shardSizeRows!,
|
||||
hnsw: {
|
||||
M: merged.hnsw?.M ?? defaults.hnsw!.M!,
|
||||
efConstruction: merged.hnsw?.efConstruction ?? defaults.hnsw!.efConstruction!,
|
||||
efSearch: merged.hnsw?.efSearch,
|
||||
},
|
||||
chunking: {
|
||||
chunkSize: merged.chunking?.chunkSize ?? defaults.chunking!.chunkSize!,
|
||||
chunkOverlap: merged.chunking?.chunkOverlap ?? defaults.chunking!.chunkOverlap!,
|
||||
noChunking: merged.chunking?.noChunking ?? defaults.chunking!.noChunking!,
|
||||
},
|
||||
vllm: {
|
||||
enable: merged.vllm?.enable ?? defaults.vllm!.enable!,
|
||||
vllmUrl: merged.vllm?.vllmUrl ?? defaults.vllm!.vllmUrl,
|
||||
concurrency: merged.vllm?.concurrency ?? defaults.vllm!.concurrency!,
|
||||
batchSize: merged.vllm?.batchSize ?? defaults.vllm!.batchSize!,
|
||||
},
|
||||
}
|
||||
|
||||
if (!o.model) {
|
||||
throw new Error("Semantic search requires a model identifier")
|
||||
}
|
||||
|
||||
return {
|
||||
name: "SemanticIndex",
|
||||
getQuartzComponents() {
|
||||
return []
|
||||
},
|
||||
async *partialEmit() {},
|
||||
async *emit(ctx, content, _resources) {
|
||||
if (!o.enable) return
|
||||
|
||||
const docs: ContentDetails[] = []
|
||||
for (const [_, file] of content) {
|
||||
const slug = file.data.slug!
|
||||
const title = file.data.frontmatter?.title ?? slug
|
||||
const text = file.data.text
|
||||
if (text) {
|
||||
docs.push({
|
||||
slug,
|
||||
title,
|
||||
filePath: file.data.filePath!,
|
||||
content: text,
|
||||
readingTime: file.data.readingTime,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Emit JSONL with the exact text used for embeddings
|
||||
const jsonl = docs
|
||||
.map((d) => ({ slug: d.slug, title: d.title, text: d.content }))
|
||||
.map((o) => JSON.stringify(o))
|
||||
.join("\n")
|
||||
|
||||
const jsonlSlug = "embeddings-text" as FullSlug
|
||||
yield write({
|
||||
ctx,
|
||||
slug: jsonlSlug,
|
||||
ext: ".jsonl",
|
||||
content: jsonl,
|
||||
})
|
||||
|
||||
// If aot is false, run the embedding generation script
|
||||
if (!o.aot) {
|
||||
console.log("\nGenerating embeddings (aot=false)...")
|
||||
|
||||
// Check for uv
|
||||
const hasUv = await checkUvInstalled()
|
||||
if (!hasUv) {
|
||||
throw new Error(
|
||||
"uv is required for embedding generation. Install it from https://docs.astral.sh/uv/",
|
||||
)
|
||||
}
|
||||
|
||||
const jsonlPath = joinSegments(ctx.argv.output, "embeddings-text.jsonl")
|
||||
const outDir = joinSegments(ctx.argv.output, "embeddings")
|
||||
|
||||
try {
|
||||
await runEmbedBuild(jsonlPath, outDir, o)
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
throw new Error(`Embedding generation failed: ${message}`)
|
||||
}
|
||||
} else {
|
||||
console.log(
|
||||
"\nSkipping embedding generation (aot=true). Expecting pre-generated embeddings in public/embeddings/",
|
||||
)
|
||||
}
|
||||
},
|
||||
externalResources(_ctx) {
|
||||
return {}
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -73,7 +73,7 @@ async function processTagPage(
|
||||
const slug = joinSegments("tags", tag) as FullSlug
|
||||
const [tree, file] = tagContent
|
||||
const cfg = ctx.cfg.configuration
|
||||
const externalResources = pageResources(pathToRoot(slug), resources)
|
||||
const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
|
||||
const componentData: QuartzComponentProps = {
|
||||
ctx,
|
||||
fileData: file.data,
|
||||
|
||||
548
quartz/workers/semantic.worker.ts
Normal file
548
quartz/workers/semantic.worker.ts
Normal file
@@ -0,0 +1,548 @@
|
||||
// Unified semantic search worker: handles data loading and query execution
|
||||
import { env, pipeline } from "@huggingface/transformers"
|
||||
import "onnxruntime-web/webgpu"
|
||||
import "onnxruntime-web/wasm"
|
||||
|
||||
export {}
|
||||
|
||||
type VectorShardMeta = {
|
||||
path: string
|
||||
rows: number
|
||||
rowOffset: number
|
||||
byteLength: number
|
||||
sha256?: string
|
||||
byteStride: number
|
||||
}
|
||||
|
||||
type LevelSection = {
|
||||
level: number
|
||||
indptr: { offset: number; elements: number; byteLength: number }
|
||||
indices: { offset: number; elements: number; byteLength: number }
|
||||
}
|
||||
|
||||
type ChunkMetadata = {
|
||||
parentSlug: string
|
||||
chunkId: number
|
||||
}
|
||||
|
||||
type Manifest = {
|
||||
version: number
|
||||
dims: number
|
||||
dtype: string
|
||||
normalized: boolean
|
||||
rows: number
|
||||
shardSizeRows: number
|
||||
vectors: {
|
||||
dtype: string
|
||||
rows: number
|
||||
dims: number
|
||||
shards: VectorShardMeta[]
|
||||
}
|
||||
ids: string[]
|
||||
titles?: string[]
|
||||
chunkMetadata?: Record<string, ChunkMetadata>
|
||||
hnsw: {
|
||||
M: number
|
||||
efConstruction: number
|
||||
entryPoint: number
|
||||
maxLevel: number
|
||||
graph: {
|
||||
path: string
|
||||
sha256?: string
|
||||
levels: LevelSection[]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type InitMessage = {
|
||||
type: "init"
|
||||
cfg: any
|
||||
manifestUrl: string
|
||||
baseUrl?: string
|
||||
disableCache?: boolean
|
||||
}
|
||||
|
||||
type SearchMessage = { type: "search"; text: string; k: number; seq: number }
|
||||
type ResetMessage = { type: "reset" }
|
||||
|
||||
type WorkerMessage = InitMessage | SearchMessage | ResetMessage
|
||||
|
||||
type ReadyMessage = { type: "ready" }
|
||||
|
||||
type ProgressMessage = {
|
||||
type: "progress"
|
||||
loadedRows: number
|
||||
totalRows: number
|
||||
}
|
||||
|
||||
type SearchHit = { id: number; score: number }
|
||||
|
||||
type SearchResultMessage = {
|
||||
type: "search-result"
|
||||
seq: number
|
||||
semantic: SearchHit[]
|
||||
}
|
||||
|
||||
type ErrorMessage = { type: "error"; seq?: number; message: string }
|
||||
|
||||
type WorkerState = "idle" | "loading" | "ready" | "error"
|
||||
|
||||
// IndexedDB configuration
|
||||
const DB_NAME = "semantic-search-cache"
|
||||
const STORE_NAME = "assets"
|
||||
const DB_VERSION = 1
|
||||
const hasIndexedDB = typeof indexedDB !== "undefined"
|
||||
const supportsSharedArrayBuffer = typeof SharedArrayBuffer !== "undefined"
|
||||
|
||||
// State
|
||||
let state: WorkerState = "idle"
|
||||
let manifest: Manifest | null = null
|
||||
let cfg: any = null
|
||||
let vectorsView: Float32Array | null = null
|
||||
let dims = 0
|
||||
let rows = 0
|
||||
let classifier: any = null
|
||||
let envConfigured = false
|
||||
let entryPoint = -1
|
||||
let maxLevel = 0
|
||||
let efDefault = 128
|
||||
let levelGraph: { indptr: Uint32Array; indices: Uint32Array }[] = []
|
||||
let abortController: AbortController | null = null
|
||||
let dbPromise: Promise<IDBDatabase> | null = null
|
||||
|
||||
// IndexedDB helpers
|
||||
function openDatabase(): Promise<IDBDatabase> {
|
||||
if (!hasIndexedDB) {
|
||||
return Promise.reject(new Error("indexedDB unavailable"))
|
||||
}
|
||||
if (!dbPromise) {
|
||||
dbPromise = new Promise((resolve, reject) => {
|
||||
const req = indexedDB.open(DB_NAME, DB_VERSION)
|
||||
req.onupgradeneeded = () => {
|
||||
const db = req.result
|
||||
if (!db.objectStoreNames.contains(STORE_NAME)) {
|
||||
db.createObjectStore(STORE_NAME)
|
||||
}
|
||||
}
|
||||
req.onsuccess = () => resolve(req.result)
|
||||
req.onerror = () => reject(req.error ?? new Error("failed to open cache store"))
|
||||
})
|
||||
}
|
||||
return dbPromise
|
||||
}
|
||||
|
||||
async function readAsset(hash: string): Promise<ArrayBuffer | null> {
|
||||
if (!hasIndexedDB) {
|
||||
return null
|
||||
}
|
||||
const db = await openDatabase()
|
||||
return new Promise((resolve, reject) => {
|
||||
const tx = db.transaction(STORE_NAME, "readonly")
|
||||
const store = tx.objectStore(STORE_NAME)
|
||||
const req = store.get(hash)
|
||||
req.onsuccess = () => {
|
||||
const value = req.result
|
||||
if (value instanceof ArrayBuffer) {
|
||||
resolve(value)
|
||||
} else if (value && value.buffer instanceof ArrayBuffer) {
|
||||
resolve(value.buffer as ArrayBuffer)
|
||||
} else {
|
||||
resolve(null)
|
||||
}
|
||||
}
|
||||
req.onerror = () => reject(req.error ?? new Error("failed to read cached asset"))
|
||||
})
|
||||
}
|
||||
|
||||
async function writeAsset(hash: string, buffer: ArrayBuffer): Promise<void> {
|
||||
if (!hasIndexedDB) {
|
||||
return
|
||||
}
|
||||
const db = await openDatabase()
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const tx = db.transaction(STORE_NAME, "readwrite")
|
||||
const store = tx.objectStore(STORE_NAME)
|
||||
const req = store.put(buffer, hash)
|
||||
req.onsuccess = () => resolve()
|
||||
req.onerror = () => reject(req.error ?? new Error("failed to cache asset"))
|
||||
})
|
||||
}
|
||||
|
||||
function toAbsolute(path: string, baseUrl?: string): string {
|
||||
if (path.startsWith("http://") || path.startsWith("https://")) {
|
||||
return path
|
||||
}
|
||||
const base = baseUrl ?? self.location.origin
|
||||
return new URL(path, base).toString()
|
||||
}
|
||||
|
||||
async function fetchBinary(
|
||||
path: string,
|
||||
disableCache: boolean,
|
||||
sha?: string,
|
||||
): Promise<ArrayBuffer> {
|
||||
if (!disableCache && sha && hasIndexedDB) {
|
||||
try {
|
||||
const cached = await readAsset(sha)
|
||||
if (cached) {
|
||||
return cached
|
||||
}
|
||||
} catch {
|
||||
// fall through to network fetch on cache errors
|
||||
}
|
||||
}
|
||||
const res = await fetch(path, { signal: abortController?.signal ?? undefined })
|
||||
if (!res.ok) {
|
||||
throw new Error(`failed to fetch ${path}: ${res.status} ${res.statusText}`)
|
||||
}
|
||||
const payload = await res.arrayBuffer()
|
||||
if (!disableCache && sha && hasIndexedDB) {
|
||||
try {
|
||||
await writeAsset(sha, payload)
|
||||
} catch {
|
||||
// ignore cache write failures
|
||||
}
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
async function populateVectors(
|
||||
manifest: Manifest,
|
||||
baseUrl: string | undefined,
|
||||
disableCache: boolean | undefined,
|
||||
): Promise<{ buffer: Float32Array; rowsLoaded: number }> {
|
||||
if (manifest.vectors.dtype !== "fp32") {
|
||||
throw new Error(`unsupported embedding dtype '${manifest.vectors.dtype}', regenerate with fp32`)
|
||||
}
|
||||
const rows = manifest.rows
|
||||
const dims = manifest.dims
|
||||
const totalBytes = rows * dims * Float32Array.BYTES_PER_ELEMENT
|
||||
const buffer = supportsSharedArrayBuffer
|
||||
? new Float32Array(new SharedArrayBuffer(totalBytes))
|
||||
: new Float32Array(totalBytes)
|
||||
let loadedRows = 0
|
||||
for (const shard of manifest.vectors.shards) {
|
||||
const absolute = toAbsolute(shard.path, baseUrl)
|
||||
const payload = await fetchBinary(absolute, Boolean(disableCache), shard.sha256)
|
||||
const view = new Float32Array(payload)
|
||||
if (view.length !== shard.rows * dims) {
|
||||
throw new Error(
|
||||
`shard ${shard.path} has mismatched length (expected ${shard.rows * dims}, got ${view.length})`,
|
||||
)
|
||||
}
|
||||
buffer.set(view, shard.rowOffset * dims)
|
||||
loadedRows = Math.min(rows, shard.rowOffset + shard.rows)
|
||||
const progress: ProgressMessage = {
|
||||
type: "progress",
|
||||
loadedRows,
|
||||
totalRows: rows,
|
||||
}
|
||||
self.postMessage(progress)
|
||||
}
|
||||
return { buffer, rowsLoaded: loadedRows }
|
||||
}
|
||||
|
||||
async function populateGraph(
|
||||
manifest: Manifest,
|
||||
baseUrl: string | undefined,
|
||||
disableCache: boolean | undefined,
|
||||
): Promise<ArrayBuffer> {
|
||||
const graphMeta = manifest.hnsw.graph
|
||||
const absolute = toAbsolute(graphMeta.path, baseUrl)
|
||||
return await fetchBinary(absolute, Boolean(disableCache), graphMeta.sha256)
|
||||
}
|
||||
|
||||
function configureRuntimeEnv() {
|
||||
if (envConfigured) return
|
||||
env.allowLocalModels = false
|
||||
env.allowRemoteModels = true
|
||||
const wasmBackend = env.backends?.onnx?.wasm
|
||||
if (!wasmBackend) {
|
||||
throw new Error("transformers.js ONNX runtime backend unavailable")
|
||||
}
|
||||
const cdnBase = `https://cdn.jsdelivr.net/npm/@huggingface/transformers@${env.version}/dist/`
|
||||
wasmBackend.wasmPaths = cdnBase
|
||||
envConfigured = true
|
||||
}
|
||||
|
||||
async function ensureEncoder() {
|
||||
if (classifier) return
|
||||
if (!cfg?.model) {
|
||||
throw new Error("semantic worker missing model identifier")
|
||||
}
|
||||
configureRuntimeEnv()
|
||||
const dtype = typeof cfg?.dtype === "string" && cfg.dtype.length > 0 ? cfg.dtype : "fp32"
|
||||
const pipelineOpts: Record<string, unknown> = {
|
||||
device: "wasm",
|
||||
dtype,
|
||||
local_files_only: false,
|
||||
}
|
||||
classifier = await pipeline("feature-extraction", cfg.model, pipelineOpts)
|
||||
cfg.dtype = dtype
|
||||
}
|
||||
|
||||
function vectorSlice(id: number): Float32Array {
|
||||
if (!vectorsView) {
|
||||
throw new Error("vector buffer not configured")
|
||||
}
|
||||
const start = id * dims
|
||||
const end = start + dims
|
||||
return vectorsView.subarray(start, end)
|
||||
}
|
||||
|
||||
function dot(a: Float32Array, b: Float32Array): number {
|
||||
let s = 0
|
||||
for (let i = 0; i < dims; i++) {
|
||||
s += a[i] * b[i]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
function neighborsFor(level: number, node: number): Uint32Array {
|
||||
const meta = levelGraph[level]
|
||||
if (!meta) return new Uint32Array()
|
||||
const { indptr, indices } = meta
|
||||
if (node < 0 || node + 1 >= indptr.length) return new Uint32Array()
|
||||
const start = indptr[node]
|
||||
const end = indptr[node + 1]
|
||||
return indices.subarray(start, end)
|
||||
}
|
||||
|
||||
function insertSortedDescending(arr: SearchHit[], item: SearchHit) {
|
||||
let idx = arr.length
|
||||
while (idx > 0 && arr[idx - 1].score < item.score) {
|
||||
idx -= 1
|
||||
}
|
||||
arr.splice(idx, 0, item)
|
||||
}
|
||||
|
||||
function bruteForceSearch(query: Float32Array, k: number): SearchHit[] {
|
||||
if (!vectorsView) return []
|
||||
const hits: SearchHit[] = []
|
||||
for (let id = 0; id < rows; id++) {
|
||||
const score = dot(query, vectorSlice(id))
|
||||
if (hits.length < k) {
|
||||
insertSortedDescending(hits, { id, score })
|
||||
} else if (score > hits[hits.length - 1].score) {
|
||||
insertSortedDescending(hits, { id, score })
|
||||
hits.length = k
|
||||
}
|
||||
}
|
||||
return hits
|
||||
}
|
||||
|
||||
function hnswSearch(query: Float32Array, k: number): SearchHit[] {
|
||||
if (!manifest || !vectorsView || entryPoint < 0 || levelGraph.length === 0) {
|
||||
return bruteForceSearch(query, k)
|
||||
}
|
||||
const ef = Math.max(efDefault, k * 10)
|
||||
let ep = entryPoint
|
||||
let epScore = dot(query, vectorSlice(ep))
|
||||
for (let level = maxLevel; level > 0; level--) {
|
||||
let changed = true
|
||||
while (changed) {
|
||||
changed = false
|
||||
const neigh = neighborsFor(level, ep)
|
||||
for (let i = 0; i < neigh.length; i++) {
|
||||
const candidate = neigh[i]
|
||||
if (candidate >= rows) continue
|
||||
const score = dot(query, vectorSlice(candidate))
|
||||
if (score > epScore) {
|
||||
epScore = score
|
||||
ep = candidate
|
||||
changed = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const visited = new Set<number>()
|
||||
const candidateQueue: SearchHit[] = []
|
||||
const best: SearchHit[] = []
|
||||
insertSortedDescending(candidateQueue, { id: ep, score: epScore })
|
||||
insertSortedDescending(best, { id: ep, score: epScore })
|
||||
visited.add(ep)
|
||||
|
||||
while (candidateQueue.length > 0) {
|
||||
const current = candidateQueue.shift()!
|
||||
const worstBest = best.length >= ef ? best[best.length - 1].score : -Infinity
|
||||
if (current.score < worstBest && best.length >= ef) {
|
||||
break
|
||||
}
|
||||
const neigh = neighborsFor(0, current.id)
|
||||
for (let i = 0; i < neigh.length; i++) {
|
||||
const candidate = neigh[i]
|
||||
if (candidate >= rows || visited.has(candidate)) continue
|
||||
visited.add(candidate)
|
||||
const score = dot(query, vectorSlice(candidate))
|
||||
const hit = { id: candidate, score }
|
||||
insertSortedDescending(candidateQueue, hit)
|
||||
if (best.length < ef || score > best[best.length - 1].score) {
|
||||
insertSortedDescending(best, hit)
|
||||
if (best.length > ef) {
|
||||
best.pop()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
best.sort((a, b) => b.score - a.score)
|
||||
return best.slice(0, k)
|
||||
}
|
||||
|
||||
async function embed(text: string, isQuery: boolean = false): Promise<Float32Array> {
|
||||
await ensureEncoder()
|
||||
// Apply model-specific prefixes for asymmetric search
|
||||
let prefixedText = text
|
||||
if (cfg?.model) {
|
||||
const modelName = cfg.model.toLowerCase()
|
||||
switch (true) {
|
||||
case modelName.includes("e5"): {
|
||||
// E5 models require query: or passage: prefix
|
||||
prefixedText = isQuery ? `query: ${text}` : `passage: ${text}`
|
||||
break
|
||||
}
|
||||
case modelName.includes("qwen") && modelName.includes("embedding"): {
|
||||
// Qwen3-Embedding requires task instruction for queries only
|
||||
if (isQuery) {
|
||||
const task = "Given a web search query, retrieve relevant passages that answer the query"
|
||||
prefixedText = `Instruct: ${task}\nQuery: ${text}`
|
||||
}
|
||||
// Documents use plain text (no prefix)
|
||||
break
|
||||
}
|
||||
case modelName.includes("embeddinggemma"): {
|
||||
// embeddinggemma requires specific prefixes
|
||||
prefixedText = isQuery
|
||||
? `task: search result | query: ${text}`
|
||||
: `title: none | text: ${text}`
|
||||
break
|
||||
}
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
const out = await classifier(prefixedText, { pooling: "mean", normalize: true })
|
||||
const data = Array.from(out?.data ?? out) as number[]
|
||||
const vec = new Float32Array(dims)
|
||||
for (let i = 0; i < dims; i++) vec[i] = data[i] ?? 0
|
||||
return vec
|
||||
}
|
||||
|
||||
async function handleInit(msg: InitMessage) {
|
||||
if (state === "loading" || state === "ready") {
|
||||
throw new Error("worker already initialized or loading")
|
||||
}
|
||||
|
||||
state = "loading"
|
||||
abortController?.abort()
|
||||
abortController = new AbortController()
|
||||
|
||||
try {
|
||||
cfg = msg.cfg
|
||||
|
||||
const manifestUrl = toAbsolute(msg.manifestUrl, msg.baseUrl)
|
||||
const response = await fetch(manifestUrl, { signal: abortController.signal })
|
||||
if (!response.ok) {
|
||||
throw new Error(
|
||||
`failed to fetch manifest ${manifestUrl}: ${response.status} ${response.statusText}`,
|
||||
)
|
||||
}
|
||||
manifest = (await response.json()) as Manifest
|
||||
|
||||
if (manifest.vectors.dtype !== "fp32") {
|
||||
throw new Error(
|
||||
`unsupported embedding dtype '${manifest.vectors.dtype}', regenerate with fp32`,
|
||||
)
|
||||
}
|
||||
|
||||
dims = manifest.dims
|
||||
rows = manifest.rows
|
||||
|
||||
const { buffer: vectorBuffer } = await populateVectors(manifest, msg.baseUrl, msg.disableCache)
|
||||
vectorsView = vectorBuffer
|
||||
|
||||
const graphBuffer = await populateGraph(manifest, msg.baseUrl, msg.disableCache)
|
||||
|
||||
entryPoint = manifest.hnsw.entryPoint
|
||||
maxLevel = manifest.hnsw.maxLevel
|
||||
efDefault = Math.max(64, manifest.hnsw.M * 4)
|
||||
levelGraph = manifest.hnsw.graph.levels.map((level) => {
|
||||
const indptr = new Uint32Array(graphBuffer, level.indptr.offset, level.indptr.elements)
|
||||
const indices = new Uint32Array(graphBuffer, level.indices.offset, level.indices.elements)
|
||||
return { indptr, indices }
|
||||
})
|
||||
|
||||
state = "ready"
|
||||
const ready: ReadyMessage = { type: "ready" }
|
||||
self.postMessage(ready)
|
||||
} catch (err) {
|
||||
state = "error"
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
async function handleSearch(msg: SearchMessage) {
|
||||
if (state !== "ready") {
|
||||
throw new Error("worker not ready for search")
|
||||
}
|
||||
if (!manifest || !vectorsView) {
|
||||
throw new Error("semantic worker not configured")
|
||||
}
|
||||
|
||||
const queryVec = await embed(msg.text, true)
|
||||
const semanticHits = hnswSearch(queryVec, Math.max(1, msg.k))
|
||||
const message: SearchResultMessage = {
|
||||
type: "search-result",
|
||||
seq: msg.seq,
|
||||
semantic: semanticHits,
|
||||
}
|
||||
self.postMessage(message)
|
||||
}
|
||||
|
||||
function handleReset() {
|
||||
abortController?.abort()
|
||||
abortController = null
|
||||
state = "idle"
|
||||
manifest = null
|
||||
cfg = null
|
||||
vectorsView = null
|
||||
dims = 0
|
||||
rows = 0
|
||||
classifier = null
|
||||
envConfigured = false
|
||||
levelGraph = []
|
||||
entryPoint = -1
|
||||
maxLevel = 0
|
||||
}
|
||||
|
||||
self.onmessage = (event: MessageEvent<WorkerMessage>) => {
|
||||
const data = event.data
|
||||
|
||||
if (data.type === "reset") {
|
||||
handleReset()
|
||||
return
|
||||
}
|
||||
|
||||
if (data.type === "init") {
|
||||
void handleInit(data).catch((err: unknown) => {
|
||||
const message: ErrorMessage = {
|
||||
type: "error",
|
||||
message: err instanceof Error ? err.message : String(err),
|
||||
}
|
||||
self.postMessage(message)
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
if (data.type === "search") {
|
||||
void handleSearch(data).catch((err: unknown) => {
|
||||
const message: ErrorMessage = {
|
||||
type: "error",
|
||||
seq: data.seq,
|
||||
message: err instanceof Error ? err.message : String(err),
|
||||
}
|
||||
self.postMessage(message)
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user