crapthings
11/21/2023, 6:36 AMconst fs = require('fs');
const _ = require('lodash')
const Typesense = require('typesense')
const { load, cut } = require('@node-rs/jieba');
const { log } = require('console');
load()
const txt = fs.readFileSync('./5.txt', 'utf8')
const databaseName123 = 'okaytest123'
const client = new Typesense.Client({
'nodes': [{
'host': 'localhost',
'port': '8108',
'protocol': 'http'
}],
'apiKey': 'xyz',
'connectionTimeoutSeconds': 300
})
const schema = {
'name': databaseName123,
'fields': [
{ name: 'title', type: 'string', locale: 'zh' },
{ name: 'text', type: 'string' },
{
name: 'embedding',
type: 'float[]',
embed: {
from: [
'title',
],
model_config: {
model_name: 'ts/all-MiniLM-L12-v2'
}
}
}
]
}
;(async function () {
try {
await client.collections(databaseName123).delete()
} catch {
}
try {
await client.collections().create(schema)
} catch (ex) {
console.log(ex)
}
const text = cut(txt).join(' ')
fs.writeFileSync('./cut.txt', text)
try {
await client.collections(databaseName123).documents().import([
{ id: '1', title: txt, text },
])
} catch (ex) {
console.log(ex)
}
const resp = await client.collections(databaseName123).documents().search({
q: `孔明`,
// query_by: 'embedding,title',
query_by: 'text',
// query_by: 'title',
pre_segmented_query: true,
// drop_tokens_threshold: 0, typo_tokens_threshold: 0, prefix: false
// sort_by: '_vector_distance:desc'
}).catch(console.log)
// const result = _.map(resp.hits, (item) => {
// console.log(JSON.stringify(item.highlights, null, 2))
// // return {
// // title: item.document.title,
// // tags: item.document.tags,
// // distance: item.vector_distance,
// // }
// })
console.log(JSON.stringify(resp, null, 2))
// console.log(JSON.stringify(resp, null, 2))
// console.log(await client.collections().retrieve())
} ())