Skip to content
This repository was archived by the owner on Nov 8, 2022. It is now read-only.

Commit fb002fc

Browse files
authored
feat(block-task): cite task (#396)
* chore: basic thought * refactor(article): add thread to meta when create * refactor(cited-article): wip * refactor(cited-article): wip * refactor(cited-article): wip * refactor(cited-article): wip * refactor(cited-article): wip * refactor(cited-article): doc & naming adjust * refactor(cited-article): more test * refactor(cited-article): fix default meta && clean up
1 parent a6128c5 commit fb002fc

File tree

25 files changed

+686
-15
lines changed

25 files changed

+686
-15
lines changed

config/config.exs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ config :pre_commit, commands: ["format"], verbose: false
3131
# of this file so it overrides the configuration defined above.
3232

3333
config :groupher_server, :general,
34+
site_host: "https://coderplanets.com",
3435
page_size: 30,
3536
inner_page_size: 5,
3637
# today is not include

lib/groupher_server/cms/delegates/article_curd.ex

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ defmodule GroupherServer.CMS.Delegate.ArticleCURD do
66

77
import GroupherServer.CMS.Helper.Matcher
88

9-
import Helper.Utils, only: [done: 1, pick_by: 2, module_to_atom: 1, get_config: 2, ensure: 2]
9+
import Helper.Utils,
10+
only: [done: 1, pick_by: 2, module_to_atom: 1, get_config: 2, ensure: 2, module_to_upcase: 1]
1011

1112
import GroupherServer.CMS.Delegate.Helper, only: [mark_viewer_emotion_states: 2]
1213
import Helper.ErrorCode
@@ -17,7 +18,7 @@ defmodule GroupherServer.CMS.Delegate.ArticleCURD do
1718

1819
alias Accounts.Model.User
1920
alias CMS.Model.{Author, Community, PinnedArticle, Embeds}
20-
alias CMS.Delegate.{ArticleCommunity, ArticleComment, ArticleTag, CommunityCURD}
21+
alias CMS.Delegate.{ArticleCommunity, ArticleComment, ArticleTag, CommunityCURD, CiteTasks}
2122

2223
alias Ecto.Multi
2324

@@ -163,6 +164,9 @@ defmodule GroupherServer.CMS.Delegate.ArticleCURD do
163164
|> Multi.run(:update_user_published_meta, fn _, _ ->
164165
Accounts.update_published_states(uid, thread)
165166
end)
167+
|> Multi.run(:block_tasks, fn _, %{create_article: article} ->
168+
Later.run({CiteTasks, :handle, [article]})
169+
end)
166170
# TODO: run mini tasks
167171
|> Multi.run(:mention_users, fn _, %{create_article: article} ->
168172
# article.body |> Jason.decode!() |> 各种小 task
@@ -384,6 +388,7 @@ defmodule GroupherServer.CMS.Delegate.ArticleCURD do
384388
defp do_create_article(model, attrs, %Author{id: author_id}, %Community{id: community_id}) do
385389
# special article like Repo do not have :body, assign it with default-empty rich text
386390
body = Map.get(attrs, :body, Converter.Article.default_rich_text())
391+
meta = @default_article_meta |> Map.merge(%{thread: module_to_upcase(model)})
387392
attrs = attrs |> Map.merge(%{body: body})
388393

389394
with {:ok, attrs} <- add_rich_text_attrs(attrs) do
@@ -392,7 +397,7 @@ defmodule GroupherServer.CMS.Delegate.ArticleCURD do
392397
|> Ecto.Changeset.put_change(:emotions, @default_emotions)
393398
|> Ecto.Changeset.put_change(:author_id, author_id)
394399
|> Ecto.Changeset.put_change(:original_community_id, community_id)
395-
|> Ecto.Changeset.put_embed(:meta, @default_article_meta)
400+
|> Ecto.Changeset.put_embed(:meta, meta)
396401
|> Repo.insert()
397402
end
398403
end
@@ -444,7 +449,7 @@ defmodule GroupherServer.CMS.Delegate.ArticleCURD do
444449

445450
# create done
446451
defp result({:ok, %{set_active_at_timestamp: result}}) do
447-
Later.exec({__MODULE__, :notify_admin_new_article, [result]})
452+
Later.run({__MODULE__, :notify_admin_new_article, [result]})
448453
{:ok, result}
449454
end
450455

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
defmodule GroupherServer.CMS.Delegate.CiteTasks do
2+
@moduledoc """
3+
run tasks in every article blocks if need
4+
5+
current task: "cite link" and "mention"
6+
7+
## cite link
8+
9+
我被站内哪些文章或评论引用了,是值得关注的事
10+
我引用了谁不重要,帖子里链接已经表明了, 这和 github issue 的双向链接不一样,因为一般不需要关注这个
11+
帖子是否解决,是否被 merge 等状态。
12+
13+
基本结构:
14+
15+
cited_thread, cited_article_id, [xxx_article]_id, [block_id, block2_id],
16+
17+
POST post_333 -> cited_article_333, [block_id, block2_id]]
18+
19+
cited_type, cited_content_id, [contents]_id, [block_id, cited_block_id],
20+
21+
cited_type: thread or comment
22+
content: article or comment
23+
# cited_article_comment_id, [xxx_article]_id, [block_id, block2_id, ...],
24+
"""
25+
26+
import Ecto.Query, warn: false
27+
import Helper.Utils, only: [get_config: 2, thread_of_article: 1, done: 1]
28+
import GroupherServer.CMS.Helper.Matcher
29+
import Helper.ErrorCode
30+
31+
alias GroupherServer.{CMS, Repo}
32+
alias CMS.Model.CitedContent
33+
alias Helper.ORM
34+
35+
alias Ecto.Multi
36+
37+
@site_host get_config(:general, :site_host)
38+
@article_threads get_config(:article, :threads)
39+
@valid_article_prefix Enum.map(@article_threads, &"#{@site_host}/#{&1}/")
40+
41+
def handle(%{body: body} = article) do
42+
with {:ok, %{"blocks" => blocks}} <- Jason.decode(body),
43+
article <- Repo.preload(article, author: :user) do
44+
Multi.new()
45+
|> Multi.run(:delete_all_cited_contents, fn _, _ ->
46+
delete_all_cited_contents(article)
47+
end)
48+
|> Multi.run(:update_cited_info, fn _, _ ->
49+
blocks
50+
|> Enum.reduce([], &(&2 ++ parse_cited_info_per_block(article, &1)))
51+
|> merge_same_cited_article_block
52+
|> update_cited_info
53+
end)
54+
|> Repo.transaction()
55+
|> result()
56+
end
57+
end
58+
59+
# delete all records before insert_all, this will dynamiclly update
60+
# those cited info when update article
61+
# 插入引用记录之前先全部清除,这样可以在更新文章的时候自动计算引用信息
62+
defp delete_all_cited_contents(article) do
63+
with {:ok, thread} <- thread_of_article(article),
64+
{:ok, info} <- match(thread) do
65+
query = from(c in CitedContent, where: field(c, ^info.foreign_key) == ^article.id)
66+
67+
ORM.delete_all(query, :if_exist)
68+
end
69+
end
70+
71+
# defp batch_done
72+
73+
defp update_cited_info(cited_contents) do
74+
clean_cited_contents = Enum.map(cited_contents, &Map.delete(&1, :cited_article))
75+
# IO.inspect(clean_cited_contents, label: "clean_cited_contents")
76+
with true <- {0, nil} !== Repo.insert_all(CitedContent, clean_cited_contents) do
77+
update_citing_count(cited_contents)
78+
else
79+
_ -> {:error, "insert cited content error"}
80+
end
81+
end
82+
83+
defp update_citing_count(cited_contents) do
84+
Enum.all?(cited_contents, fn content ->
85+
count_query = from(c in CitedContent, where: c.cited_by_id == ^content.cited_by_id)
86+
count = Repo.aggregate(count_query, :count)
87+
88+
cited_article = content.cited_article
89+
meta = Map.merge(cited_article.meta, %{citing_count: count})
90+
91+
case cited_article |> ORM.update_meta(meta) do
92+
{:ok, _} -> true
93+
{:error, _} -> false
94+
end
95+
end)
96+
|> done
97+
end
98+
99+
@doc """
100+
merge same cited article in different blocks
101+
e.g:
102+
[
103+
%{
104+
block_linker: ["block-zByQI"],
105+
cited_by_id: 190058,
106+
cited_by_type: "POST",
107+
post_id: 190059,
108+
user_id: 1413053
109+
},
110+
%{
111+
block_linker: ["block-zByQI", "block-ZgKJs"],
112+
cited_by_id: 190057,
113+
cited_by_type: "POST",
114+
post_id: 190059,
115+
user_id: 1413053
116+
},
117+
]
118+
"""
119+
defp merge_same_cited_article_block(cited_contents) do
120+
cited_contents
121+
|> Enum.reduce([], fn content, acc ->
122+
case Enum.find_index(acc, &(&1.cited_by_id == content.cited_by_id)) do
123+
nil ->
124+
acc ++ [content]
125+
126+
index ->
127+
List.update_at(
128+
acc,
129+
index,
130+
&Map.merge(&1, %{block_linker: &1.block_linker ++ content.block_linker})
131+
)
132+
end
133+
end)
134+
end
135+
136+
@doc """
137+
return fmt like:
138+
[
139+
%{
140+
block_linker: ["block-ZgKJs"],
141+
cited_by_id: 190057,
142+
cited_by_type: "POST",
143+
cited_article: #loaded,
144+
post_id: 190059,
145+
user_id: 1413053
146+
}
147+
...
148+
]
149+
"""
150+
defp parse_cited_info_per_block(article, %{"id" => block_id, "data" => %{"text" => text}}) do
151+
links_in_block = Floki.find(text, "a[href]")
152+
153+
Enum.reduce(links_in_block, [], fn link, acc ->
154+
with {:ok, cited_article} <- parse_cited_article(link),
155+
# do not cite artilce itself
156+
true <- article.id !== cited_article.id do
157+
List.insert_at(acc, 0, shape_cited_content(article, cited_article, block_id))
158+
else
159+
_ -> acc
160+
end
161+
end)
162+
|> Enum.uniq()
163+
end
164+
165+
defp shape_cited_content(article, cited_article, block_id) do
166+
{:ok, thread} = thread_of_article(article)
167+
{:ok, info} = match(thread)
168+
169+
%{
170+
cited_by_id: cited_article.id,
171+
cited_by_type: cited_article.meta.thread,
172+
# used for updating citing_count, avoid load again
173+
cited_article: cited_article,
174+
block_linker: [block_id],
175+
user_id: article.author.user.id
176+
}
177+
|> Map.put(info.foreign_key, article.id)
178+
end
179+
180+
defp parse_cited_article({"a", attrs, _}) do
181+
with {:ok, link} <- parse_link(attrs),
182+
true <- is_site_article_link?(link) do
183+
load_cited_article_from_url(link)
184+
end
185+
end
186+
187+
@doc """
188+
parse link from Floki parse result
189+
190+
e.g:
191+
[{"href", "https://coderplanets.com/post/190220", "bla", "bla"}] ->
192+
{:ok, "https://coderplanets.com/post/190220"}
193+
"""
194+
defp parse_link(attrs) do
195+
with {"href", link} <- Enum.find(attrs, fn {a, _v} -> a == "href" end) do
196+
{:ok, link}
197+
else
198+
_ -> {:error, "invalid fmt"}
199+
end
200+
end
201+
202+
# 检测是否是站内文章的链接
203+
defp is_site_article_link?(url) do
204+
Enum.any?(@valid_article_prefix, &String.starts_with?(url, &1))
205+
end
206+
207+
# get cited article from url
208+
# e.g: https://coderplanets.com/post/189993 -> ORM.find(Post, 189993)
209+
defp load_cited_article_from_url(url) do
210+
%{path: path} = URI.parse(url)
211+
path_list = path |> String.split("/")
212+
thread = path_list |> Enum.at(1) |> String.downcase() |> String.to_atom()
213+
article_id = path_list |> Enum.at(2)
214+
215+
with {:ok, info} <- match(thread) do
216+
ORM.find(info.model, article_id)
217+
end
218+
end
219+
220+
defp result({:ok, %{update_cited_info: result}}), do: {:ok, result}
221+
222+
defp result({:error, :update_cited_info, _result, _steps}) do
223+
{:error, [message: "cited article", code: ecode(:cite_artilce)]}
224+
end
225+
end

lib/groupher_server/cms/helper/macros.ex

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,6 @@ defmodule GroupherServer.CMS.Helper.Macros do
208208

209209
field(:active_at, :utc_datetime_usec)
210210
# TODO:
211-
# reference_articles
212211
# related_articles
213212
timestamps()
214213
end
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
defmodule GroupherServer.CMS.Model.CitedContent do
2+
@moduledoc false
3+
alias __MODULE__
4+
5+
use Ecto.Schema
6+
use Accessible
7+
8+
import Ecto.Changeset
9+
import GroupherServer.CMS.Helper.Macros
10+
11+
alias GroupherServer.{Accounts, CMS}
12+
alias Accounts.Model.User
13+
14+
alias CMS.Model.ArticleComment
15+
16+
@timestamps_opts [type: :utc_datetime_usec]
17+
18+
@required_fields ~w(cited_by_type cited_by_id user_id)a
19+
@article_cast_fields general_article_fields(:cast)
20+
@optional_fields ~w(article_comment_id block_linker)a ++ @article_cast_fields
21+
22+
@type t :: %CitedContent{}
23+
schema "cited_contents" do
24+
field(:cited_by_type, :string)
25+
field(:cited_by_id, :id)
26+
27+
belongs_to(:author, User, foreign_key: :user_id)
28+
belongs_to(:article_comment, ArticleComment, foreign_key: :article_comment_id)
29+
30+
article_belongs_to_fields()
31+
32+
field(:block_linker, {:array, :string})
33+
# content.block_linker = ["block-eee_block-bbb", "block-eee_block-bbb"]
34+
timestamps()
35+
end
36+
37+
@doc false
38+
def changeset(%CitedContent{} = cited_content, attrs) do
39+
cited_content
40+
|> cast(attrs, @optional_fields ++ @required_fields)
41+
|> validate_required(@required_fields)
42+
end
43+
44+
@doc false
45+
def update_changeset(%CitedContent{} = cited_content, attrs) do
46+
cited_content
47+
|> cast(attrs, @optional_fields ++ @required_fields)
48+
end
49+
end

lib/groupher_server/cms/models/embeds/article_meta.ex

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@ defmodule GroupherServer.CMS.Model.Embeds.ArticleMeta do
66
use Accessible
77
import Ecto.Changeset
88

9-
@optional_fields ~w(is_edited is_comment_locked upvoted_user_ids collected_user_ids viewed_user_ids reported_user_ids reported_count is_sinked can_undo_sink last_active_at)a
9+
@optional_fields ~w(thread is_edited is_comment_locked upvoted_user_ids collected_user_ids viewed_user_ids reported_user_ids reported_count is_sinked can_undo_sink last_active_at)a
1010

1111
@doc "for test usage"
1212
def default_meta() do
1313
%{
14+
thread: "POST",
1415
is_edited: false,
1516
is_comment_locked: false,
1617
upvoted_user_ids: [],
@@ -20,11 +21,13 @@ defmodule GroupherServer.CMS.Model.Embeds.ArticleMeta do
2021
reported_count: 0,
2122
is_sinked: false,
2223
can_undo_sink: true,
23-
last_active_at: nil
24+
last_active_at: nil,
25+
citing_count: 0
2426
}
2527
end
2628

2729
embedded_schema do
30+
field(:thread, :string)
2831
field(:is_edited, :boolean, default: false)
2932
field(:is_comment_locked, :boolean, default: false)
3033
# reaction history
@@ -38,6 +41,7 @@ defmodule GroupherServer.CMS.Model.Embeds.ArticleMeta do
3841
field(:can_undo_sink, :boolean, default: false)
3942
# if undo_sink, can recover last active_at from here
4043
field(:last_active_at, :utc_datetime_usec)
44+
field(:citing_count, :integer, default: 0)
4145
end
4246

4347
def changeset(struct, params) do

0 commit comments

Comments
 (0)