Hi Everyone, I'm trying to setup typesense search ...
# community-help
f
Hi Everyone, I'm trying to setup typesense search with docusaurus. I have already deployed typesense server 28.0 and typesense scrapper 0.11.0 I'm using
"docusaurus-theme-search-typesense": "0.24.0"
in my
"@docusaurus/core": "3.7.0"
The search bar on top of page is working as expected and i can see the results, however on search page it does not works. It throw the error .
Copy code
Error: 400 - Could not find a filter field named `docusaurus_tag` in the schema.
Can someone help me with this issue ?
1
My frontend typesenseSearchParameters
Copy code
typesenseSearchParameters: {
        query_by:
          "hierarchy.lvl0,hierarchy.lvl1,hierarchy.lvl2,hierarchy.lvl3,hierarchy.lvl4,hierarchy.lvl5,hierarchy.lvl6,content",
        query_by_weights: "8,6,5,4,3,2,1,1", // Boost headings higher than body content
        highlight_full_fields: "hierarchy.lvl0,hierarchy.lvl1,hierarchy.lvl2",
        highlight_affix_num_tokens: 10,
        snippet_threshold: 30,
        sort_by: "_text_match:desc",
        typo_tokens_threshold: 1,
        drop_tokens_threshold: 1,
        num_typos: 2,
        exhaustive_search: false,
        prefix: "true,true,true,true,true,true,true,true",
        filter_by: "", // Leave empty unless you want to restrict based on version, tags, etc.
        per_page: 20,
        prioritize_exact_match: true,
        prioritize_token_position: true,
        enable_overrides: true,
        enable_highlight_v1: true,
      },
      contextualSearch: true,
My Config File:
Copy code
{
  "index_name": "devx_docs_search_index",
  "start_urls": [
    {
      "url": "MY_URL",
      "selectors_key": "docusaurus"
    }
  ],
  "sitemap_urls": [
    "MY_URL/sitemap.xml"
  ],
  "sitemap_alternate_links": true,
  "stop_urls": [
    "/changelog",
    "/test",
    "/tests"
  ],
  "render_js": true,
  "selectors": {
    "docusaurus": {
      "lvl0": {
        "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]",
        "type": "xpath",
        "global": true,
        "default_value": "Documentation"
      },
      "lvl1": "article h1",
      "lvl2": "article h2",
      "lvl3": "article h3",
      "lvl4": "article h4",
      "lvl5": "article h5, article td:first-child",
      "lvl6": "article h6",
      "text": "article p, article li, article td:last-child, article code, article blockquote"
    }
  },
  "strip_chars": " .,;:#",
  "custom_settings": {
    "separatorsToIndex": "_",
    "attributesForFaceting": [
      "language",
      "version",
      "type",
      "tags"
    ],
    "attributesToRetrieve": [
      "hierarchy",
      "content",
      "anchor",
      "url",
      "url_without_anchor",
      "type"
    ],
    "searchableAttributes": [
      "unordered(hierarchy.lvl0)",
      "unordered(hierarchy.lvl1)",
      "unordered(hierarchy.lvl2)",
      "unordered(hierarchy.lvl3)",
      "unordered(hierarchy.lvl4)",
      "unordered(hierarchy.lvl5)",
      "unordered(hierarchy.lvl6)",
      "content"
    ],
    "ranking": [
      "desc(item_priority)",
      "typo",
      "geo",
      "words",
      "proximity",
      "attribute",
      "exact",
      "custom"
    ],
    "typoTolerance": "min",
    "removeStopWords": true,
    "ignorePlurals": true,
    "highlightPreTag": "<mark>",
    "highlightPostTag": "</mark>",
    "minWordSizefor1Typo": 4,
    "minWordSizefor2Typos": 8,
    "field_definitions": [
      {
        "name": "anchor",
        "type": "string",
        "optional": true
      },
      {
        "name": "content",
        "type": "string",
        "optional": true
      },
      {
        "name": "url",
        "type": "string",
        "facet": true
      },
      {
        "name": "url_without_anchor",
        "type": "string",
        "facet": true,
        "optional": true
      },
      {
        "name": "version",
        "type": "string[]",
        "facet": true,
        "optional": true
      },
      {
        "name": "hierarchy.lvl0",
        "type": "string",
        "facet": true,
        "optional": true
      },
      {
        "name": "hierarchy.lvl1",
        "type": "string",
        "facet": true,
        "optional": true
      },
      {
        "name": "hierarchy.lvl2",
        "type": "string",
        "facet": true,
        "optional": true
      },
      {
        "name": "hierarchy.lvl3",
        "type": "string",
        "facet": true,
        "optional": true
      },
      {
        "name": "hierarchy.lvl4",
        "type": "string",
        "facet": true,
        "optional": true
      },
      {
        "name": "hierarchy.lvl5",
        "type": "string",
        "facet": true,
        "optional": true
      },
      {
        "name": "hierarchy.lvl6",
        "type": "string",
        "facet": true,
        "optional": true
      },
      {
        "name": "type",
        "type": "string",
        "facet": true,
        "optional": true
      },
      {
        "name": "language",
        "type": "string",
        "facet": true,
        "optional": true
      },
      {
        "name": "tags",
        "type": "string[]",
        "facet": true,
        "optional": true
      },
      {
        "name": "item_priority",
        "type": "int64"
      },
      {
        "name": "embedding",
        "type": "float[]",
        "embed": {
          "from": [
            "content",
            "hierarchy.lvl0",
            "hierarchy.lvl1",
            "hierarchy.lvl2",
            "hierarchy.lvl3",
            "hierarchy.lvl4",
            "hierarchy.lvl5",
            "hierarchy.lvl6",
            "tags"
          ],
          "model_config": {
            "model_name": "ts/all-MiniLM-L12-v2"
          }
        }
      }
    ]
  }
}
j
In the field definition in the scraper config you want to add
docusaurus_tag
as a new field
Or actually
.*_tag
And then rerun the scraper
f
@Jason Bosco Thanks for answering Did you mean something like this ? If yes what should be the datatype for it ?
Copy code
{
  "name": "docusaurus_tag",
  "type": "",
  "facet": true,
  "optional": true
}
j
This is more generic:
Copy code
{"name": ".*_tag", "type": "string", "facet": true, "optional": true},
👍 1
f
Not sure if this is related but i'm getting this error now after adding *_tag field
Copy code
typesense.exceptions.RequestMalformed: [Errno 400] Property `embed.from` can only refer to string, string array or image (for supported models) fields.
j
Inside the
embed.from
property regex field names are not supported. So there you have to explicitly mention docusaurus_tag if you want embeddings to be generated from it
1
f
@Jason Bosco Thanks for help, it's now working as expected.
👍 1