diff --git a/README.md b/README.md index e698d77..fb4ad21 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,13 @@ # elasticsearch-config +这是 OI wiki 的搜索服务器后端,同时能够支持将 build 出来的静态页面发布。 + +`/home/ubuntu/OI-wiki` 是项目源文件储存的地方,用来更新索引;`/var/www/OI-wiki` 是 build 出来的静态文件,用于发布网站。 + +`webhook` 文件夹内的代码会监听 github 上的更新,然后事实更新上面两个文件夹内的仓库,同时更新索引。第一次启动 `webhook` 会清空之前的索引并新建一个。 + +`api` 文件夹内的代码是一个搜索服务器,会在 `localhost:8000` 下启动,api 为 `/?s=manach`。 + 部署需要有 es 环境并安装好 ik 与 pinyin 插件,并将 `/plugins/ik/config/stopword.dic` 与 `/plugins/ik/config/extra_stopword.dic` 内容清空(禁用停用词): ``` @@ -7,8 +15,13 @@ npm install git clone https://github.com/OI-wiki/OI-wiki.git /home/ubuntu/OI-wiki cd /home/ubuntu/OI-wiki git remote add gh https://github.com/OI-wiki/OI-wiki.git +git clone https://github.com/OI-wiki/OI-wiki.git /var/www/OI-wiki -b gh-pages ``` +直接修改代码或创建环境变量 `GITHUB_PATH` 与 `GITHUB_SECRET` 表示 webhook 的地址与密钥。Github 上的 Content type 配置为 `application/json`。 + +同时也需要修改代码或创建环境变量 `SEARCH_SECRET` 表示搜索服务器的密钥,使用这个密钥可以获取 es 的状态。 + 初始化并启动 webhook: ``` diff --git a/api/main.js b/api/main.js index 0fe92c7..559b482 100644 --- a/api/main.js +++ b/api/main.js @@ -4,11 +4,12 @@ var client = new elasticsearch.Client({ }); const express = require('express'); const app = express(); +const SEARCH_SECRET = process.env.SEARCH_SECRET || 'SEARCH_SECRET'; app.set('port', process.env.PORT || 8000); app.get('/status', function(req, res) { - if (req.query.s == MY_SECRET_KEY) { + if (req.query.s == SEARCH_SECRET) { client.ping({ requestTimeout: 1000, }, function (error) { @@ -24,11 +25,6 @@ app.get('/status', function(req, res) { }); app.get('/', function(req, res) { - // if (!req.headers.referer || req.headers.referer.indexOf('oi-wiki.org') < 0) { - // res.send([]); - //return; - //} - // console.log(req.headers); if (!req.query.s) { res.send([]); return; @@ -37,7 +33,6 @@ app.get('/', function(req, res) { console.log(keyword); client.search({ index: "oiwiki", - type: "article", from: 0, size: 12, body: { @@ -49,7 +44,7 @@ app.get('/', function(req, res) { title: { query: keyword, minimum_should_match: "75%", - boost: 4 + boost: 3 } } }, @@ -58,7 +53,7 @@ app.get('/', function(req, res) { h2: { query: keyword, minimum_should_match: "75%", - boost: 3 + boost: 2 } } }, @@ -79,6 +74,24 @@ app.get('/', function(req, res) { boost: 3 } } + }, + { + match: { + bold: { + query: keyword, + minimum_should_match: "75%", + boost: 2 + } + } + }, + { + match: { + standard_content: { + query: keyword, + minimum_should_match: "75%", + boost: 4 + } + } } ], tie_breaker: 0.3 diff --git a/build.sh b/build.sh deleted file mode 100644 index 4dd045b..0000000 --- a/build.sh +++ /dev/null @@ -1,24 +0,0 @@ -curl -X DELETE "http://localhost:9200/oiwiki" -curl -H'Content-Type: application/json' -XPUT "http://localhost:9200/oiwiki" -d' -{ - "settings": { - "analysis": { - "analyzer": { - "default": { - "tokenizer": "ik_max_word", - "filter": "custom_pinyin" - }, - "default_search": { - "tokenizer": "ik_max_word" - } - }, - "filter": { - "custom_pinyin": { - "type": "pinyin", - "keep_original": true, - "limit_first_letter_length": 16 - } - } - } - } -}' \ No newline at end of file diff --git a/update.sh b/update.sh deleted file mode 100644 index ccf91cf..0000000 --- a/update.sh +++ /dev/null @@ -1,9 +0,0 @@ -cd /var/www/OI-wiki -# sleep 50s -git fetch origin gh-pages -git reset origin/gh-pages --hard -echo $USER - -cd /home/ubuntu/OI-wiki -git fetch gh master -git reset gh/master --hard \ No newline at end of file diff --git a/webhook/index.js b/webhook/index.js index 8244e56..e76a3d8 100644 --- a/webhook/index.js +++ b/webhook/index.js @@ -1,24 +1,31 @@ -var http = require('http'); -var YAML = require('yaml'); -var createHandler = require('github-webhook-handler'); -var handler = createHandler({ path: '/MY_SECRET_PATH', secret: 'MY_SECRET_KEY' }); +const http = require('http'); +const YAML = require('yaml'); +const createHandler = require('github-webhook-handler'); +const simpleGit = require('simple-git'); const elasticsearch = require('elasticsearch'); +const fs = require('fs'); +const path = require('path'); +const remarkLib = require('remark'); +const math = require('remark-math'); +const strip = require('strip-markdown-math'); + + +const REPO_DIR = '/home/ubuntu/OI-wiki'; +const WEB_DIR = '/var/www/OI-wiki'; +const GITHUB_PATH = process.env.GITHUB_PATH || '/GITHUB_PATH'; +const GITHUB_SECRET = process.env.GITHUB_SECRET || 'GITHUB_SECRET'; + +const gitRepo = simpleGit({ baseDir: REPO_DIR }); +const gitWeb = simpleGit({ baseDir: WEB_DIR }); + var client = new elasticsearch.Client({ host: 'localhost:9200', }); - -let remark = require('remark'); -let strip = require('strip-markdown-math'); -const math = require("remark-math"); -remark = remark() +const remark = remarkLib() .use(math) .use(strip); -const fs = require('fs'); -const { exec } = require('child_process'); - - /** * Traversal all articles. * @@ -46,15 +53,15 @@ function traversalArticle(data, callback) { function getContent(filename, data) { let file; try { - file = String(fs.readFileSync(`/home/ubuntu/OI-wiki/docs/` + filename)); + file = String(fs.readFileSync(`/OI-wiki/docs/` + filename)); } catch (e) { console.error(`Error reading file ${filename}:`, e); - return ['', '', '']; + return ['', '', '', '']; } - const h1reg = /^# .+$/gm, h2reg = /^## .+$/gm, authorreg = /author:[^\n]*/gm; + const h1reg = /^# .+$/gm, h2reg = /^## .+$/gm, authorreg = /author:[^\n]*/gm, boldreg = /\*\*(.*?)\*\*|__(.*?)__/g; const lines = file.split('\n').filter(e => !e.match(authorreg)); - let others = lines.filter((e) => !e.match(h1reg) && !e.match(h2reg)); + let content = lines.filter((e) => !e.match(h1reg) && !e.match(h2reg)); let title = lines[0] && lines[0].match(h1reg) ? lines[0].replace('# ', '') : ''; traversalArticle(data['nav'], (key, value) => { @@ -62,21 +69,28 @@ function getContent(filename, data) { }); const h2 = lines.filter(e => e.match(h2reg)).map(e => e.replace(/^## /, '')); - others = others.map(e => e.replace(/^##+ /, '')); + let bold = []; + let match; + while ((match = boldreg.exec(file)) !== null) { + // match[1] is the content in **bold**, match[2] is the content in __bold__ + bold.push(match[1] || match[2]); + } + + content = content.map(e => e.replace(/^##+ /, '')); - remark.process(others.join('\n'), (err, file) => { + remark.process(content.join('\n'), (err, file) => { if (err) { console.error('Remark processing error:', err); return; } - others = String(file) + content = String(file) .replace('"', "") .replace("\\n\\n", "\\n"); }); - others.replace() + content.replace() - return [title, others, h2.join('\n')]; + return [title, content, h2.join('\n'), bold.join('\n')]; } /** @@ -85,34 +99,37 @@ function getContent(filename, data) { * @param modified - Array of added/modified files * @param removed - Array of removed files */ -function updateContent(modified, removed) { - const file = String(fs.readFileSync(`/home/ubuntu/OI-wiki/mkdocs.yml`)); +async function updateContent(modified, removed) { + const file = String(fs.readFileSync(path.join(REPO_DIR, 'mkdocs.yml'))); const data = YAML.parse(file.replaceAll('!!python/name:', '')); let ops = []; modified.forEach((filename) => { - ops.push({ index: { _index: 'oiwiki', _type: 'article', _id: filename } }); - let [title, article, h2] = getContent(filename, data); - ops.push({ - title: title, - content: article, - url: '/' + filename.replace('/index.md', '/').replace('.md', '/'), - h2: h2, - }); + let [title, article, h2, bold] = getContent(filename, data); + if (title != '') { + ops.push({ index: { _index: 'oiwiki', _id: filename } }); + ops.push({ + title: title, + content: article, + url: '/' + filename.replace('/index.md', '/').replace('.md', '/'), + h2: h2, + bold: bold, + standard_content: article, + }); + } }); removed.forEach((filename) => { - ops.push({ delete: { _index: 'oiwiki', _type: 'article', _id: filename } }); - }); - client.bulk({ body: ops, refresh: 'true' }, function (err, res) { - if (err) { - console.error('Failed Bulk opoeration', err); - res.statusCode = 504; - res.end('elasticsearch bulk op failed'); - return; - } - console.debug('Elasticsearch bulk op success'); + ops.push({ delete: { _index: 'oiwiki', _id: filename } }); }); + const res = await client.bulk({ body: ops, refresh: 'true' }); + if (res.errors) { + console.error('Bulk operation had errors:', res); + } else { + console.log('Bulk operation succeeded.'); + } } +const handler = createHandler({ path: GITHUB_PATH, secret: GITHUB_SECRET }); + http.createServer((req, res) => { handler(req, res, (err) => { res.statusCode = 404; @@ -125,49 +142,101 @@ handler.on('error', (err) => { console.error('Handler error:', err.message); }); -handler.on('push', (event) => { +handler.on('push', async (event) => { if (event.payload.ref !== 'refs/heads/master') return; console.debug('Received a push event for %s to %s', event.payload.repository.name, event.payload.ref); - exec('bash update.sh', (err) => { - if (err) { - console.error('Shell command execution error:', err); - return; - } - let removed = new Set([]), modified = new Set([]); - event.payload.commits.forEach((e) => { - e.added.forEach(w => { - modified.add(w); - removed.delete(w); - }); - e.modified.forEach(w => modified.add(w)); - e.removed.forEach(w => { - removed.add(w); - modified.delete(w); - }); + await gitWeb.fetch('origin', 'gh-pages'); + await gitWeb.reset(['origin/gh-pages', '--hard']); + console.log('update website'); + + await gitRepo.fetch('gh', 'master'); + await gitRepo.reset(['gh/master', '--hard']); + console.log('update repo'); + + let removed = new Set([]), modified = new Set([]); + event.payload.commits.forEach((e) => { + e.added.forEach(w => { + modified.add(w); + removed.delete(w); + }); + e.modified.forEach(w => modified.add(w)); + e.removed.forEach(w => { + removed.add(w); + modified.delete(w); }); + }); - removed = [...removed].filter(e => e.startsWith('docs') && e.endsWith('md')); - modified = [...modified].filter(e => e.startsWith('docs') && e.endsWith('md')); + removed = [...removed].filter(e => e.startsWith('docs') && e.endsWith('md')); + modified = [...modified].filter(e => e.startsWith('docs') && e.endsWith('md')); - removed.forEach((file, index) => removed[index] = file.replace('docs/', '')); - modified.forEach((file, index) => modified[index] = file.replace('docs/', '')); + removed.forEach((file, index) => removed[index] = file.replace('docs/', '')); + modified.forEach((file, index) => modified[index] = file.replace('docs/', '')); - updateContent(modified, removed); - }); + await updateContent(modified, removed); + console.log('index update'); }); -function init() { - exec(`bash build.sh`, () => { - let modified = []; - const file = String(fs.readFileSync(`/home/ubuntu/OI-wiki/mkdocs.yml`)); - const data = YAML.parse(file.replaceAll('!!python/name:', '')); - traversalArticle(data['nav'], (key, value) => modified.push(value)); - updateContent(modified, []); - }); +/** + * recreate index (delete if exists, then create) + */ +async function recreateIndex() { + const exists = await client.indices.exists({ index: 'oiwiki' }); + if (exists) { + await client.indices.delete({ index: 'oiwiki' }); + console.log('Deleted existing index'); + } + + const body = { + settings: { + analysis: { + analyzer: { + pinyin_analyzer: { + type: 'custom', + tokenizer: 'ik_max_word', + filter: ['pinyin_filter'] + } + }, + filter: { + pinyin_filter: { + type: 'pinyin', + keep_separate_first_letter: true, + keep_full_pinyin: true, + keep_original: true, + first_letter: 'prefix', + limit_first_letter_length: 16, + lowercase: true, + remove_duplicated_term: true + } + } + } + }, + mappings: { + properties: { + content: { type: 'text', analyzer: 'pinyin_analyzer' }, + h2: { type: 'text', analyzer: 'pinyin_analyzer' }, + title: { type: 'text', analyzer: 'pinyin_analyzer' }, + url: { type: 'text' }, + bold: { type: 'text', analyzer: 'pinyin_analyzer' }, + standard_content: { type: 'text', analyzer: 'simple' } + } + } + }; + + await client.indices.create({ index: 'oiwiki', body }); + console.log('Created index'); +} + +async function init() { + await recreateIndex(); + const file = String(fs.readFileSync(path.join(REPO_DIR, 'mkdocs.yml'))); + const data = YAML.parse(file.replaceAll('!!python/name:', '')); + let modified = []; + traversalArticle(data['nav'], (key, value) => modified.push(value)); + await updateContent(modified, []); } init(); diff --git a/webhook/test.js b/webhook/test.js index feab134..4c11b23 100644 --- a/webhook/test.js +++ b/webhook/test.js @@ -6,7 +6,6 @@ var client = new elasticsearch.Client({ let keyword = "线段"; const response = client.search({ index: "oiwiki", - type: "article", from: 0, size: 10, body: { @@ -48,6 +47,24 @@ const response = client.search({ boost: 3 } } + }, + { + match: { + bold: { + query: keyword, + minimum_should_match: "75%", + boost: 3 + } + } + }, + { + match: { + standard_content: { + query: keyword, + minimum_should_match: "75%", + boost: 2 + } + } } ], tie_breaker: 0.3