Skip to content

Commit 3c1a427

Browse files
committed
#579 remove rdf search
1 parent 59ac5f0 commit 3c1a427

File tree

8 files changed

+30
-180
lines changed

8 files changed

+30
-180
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ See [STATUS.md](server/STATUS.md) to learn more about which features will remain
1818
- Refactor static file asset hosting #578
1919
- Meta tags server side #577
2020
- Include JSON-AD in initial response, speed up first render #511
21+
- Remove feature to index external RDF files and search them #579
2122

2223
## [v0.34.0] - 2022-10-31
2324

server/README.md

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ https://user-images.githubusercontent.com/2183313/139728539-d69b899f-6f9b-44cb-a
3636
- [Table of contents](#table-of-contents)
3737
- [When should you use this](#when-should-you-use-this)
3838
- [When _not_ to use this](#when-not-to-use-this)
39-
- [Installation & getting started](#installation--getting-started)
39+
- [Installation \& getting started](#installation--getting-started)
4040
- [1. Run using docker](#1-run-using-docker)
4141
- [2. Install desktop build (macOS only)](#2-install-desktop-build-macos-only)
4242
- [3. Run pre-compiled binary](#3-run-pre-compiled-binary)
@@ -45,19 +45,19 @@ https://user-images.githubusercontent.com/2183313/139728539-d69b899f-6f9b-44cb-a
4545
- [Initial setup and configuration](#initial-setup-and-configuration)
4646
- [Running using a tunneling service (easy mode)](#running-using-a-tunneling-service-easy-mode)
4747
- [HTTPS Setup on a VPS (static IP required)](#https-setup-on-a-vps-static-ip-required)
48+
- [HTTPS Setup using external HTTPS proxy](#https-setup-using-external-https-proxy)
4849
- [Usage](#usage)
4950
- [Using Atomic-Server with the browser GUI](#using-atomic-server-with-the-browser-gui)
5051
- [Use `atomic-cli` as client](#use-atomic-cli-as-client)
5152
- [API](#api)
52-
- [FAQ & Troubleshooting](#faq--troubleshooting)
53+
- [FAQ \& Troubleshooting](#faq--troubleshooting)
5354
- [Can / should I create backups?](#can--should-i-create-backups)
5455
- [I lost the key / secret to my Root Agent, and the `/setup` invite is no longer usable! What now?](#i-lost-the-key--secret-to-my-root-agent-and-the-setup-invite-is-no-longer-usable-what-now)
5556
- [How do I migrate my data to a new domain?](#how-do-i-migrate-my-data-to-a-new-domain)
5657
- [How do I reset my database?](#how-do-i-reset-my-database)
5758
- [How do I make my data private, yet available online?](#how-do-i-make-my-data-private-yet-available-online)
5859
- [Items are missing in my Collections / Search results](#items-are-missing-in-my-collections--search-results)
5960
- [I get a `failed to retrieve` error when opening](#i-get-a-failed-to-retrieve-error-when-opening)
60-
- [What is `rdf-search` mode?](#what-is-rdf-search-mode)
6161
- [Can I embed Atomic-Server in another application?](#can-i-embed-atomic-server-in-another-application)
6262
- [Where is my data stored on my machine?](#where-is-my-data-stored-on-my-machine)
6363

@@ -279,11 +279,6 @@ Also, if you can, recreate and describe the indexing issue in the issue tracker,
279279

280280
Try re-initializing atomic server `atomic-server --initialize`.
281281

282-
### What is `rdf-search` mode?
283-
284-
This turns `atomic-server` into a full-text search server that indexed RDF Turtle documents.
285-
Check out [the readme](./rdf-search.md).
286-
287282
### Can I embed Atomic-Server in another application?
288283

289284
Yes. This is what I'm doing with the Tauri desktop distribution of Atomic-Server.

server/example_requests.http

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,6 @@ Accept: application/ld+json
2222
GET http://localhost:9883/search?q=Foo&include=true HTTP/1.1
2323
Accept: application/ld+json
2424

25-
### Index at (RDF) document for search
26-
POST http://localhost:9883/search HTTP/1.1
27-
Content-Type: text/turtle
28-
29-
@prefix schema: <http://schema.org/> .
30-
<http://example.com/foo> a schema:Person ;
31-
schema:name "Foo" .
32-
<http://example.com/bar> a schema:Person ;
33-
schema:name "asdfsajhdfgbasdf" .
34-
3525
### Send a Commit
3626
### The hard part here is setting the correct signature.
3727
### Use a library (@tomic/lib for JS, and atomic_lib for Rust).

server/rdf-search.md

Lines changed: 0 additions & 56 deletions
This file was deleted.

server/src/config.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,6 @@ pub struct Opts {
6969
#[clap(long, env = "ATOMIC_DATA_DIR")]
7070
pub data_dir: Option<PathBuf>,
7171

72-
/// CAUTION: Makes data publicly readable on the `/search` endpoint. When enabled, it allows POSTing to the /search endpoint and returns search results as single triples, without performing authentication checks. See https://github.com/atomicdata-dev/atomic-data-rust/blob/master/server/rdf-search.md
73-
#[clap(long, env = "ATOMIC_RDF_SEARCH")]
74-
pub rdf_search: bool,
75-
7672
/// By default, Atomic-Server keeps previous versions of resources indexed in Search. When enabling this flag, previous versions of resources are removed from the search index when their values are updated.
7773
#[clap(long, env = "ATOMIC_REMOVE_PREVIOUS_SEARCH")]
7874
pub remove_previous_search: bool,

server/src/handlers/search.rs

Lines changed: 23 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ pub async fn search_query(
8888
.search(&query, &TopDocs::with_limit(initial_results_limit))
8989
.map_err(|e| format!("Error with creating search results: {} ", e))?;
9090

91-
let (subjects, _atoms) = docs_to_resources(top_docs, &fields, &searcher)?;
91+
let subjects = docs_to_resources(top_docs, &fields, &searcher)?;
9292

9393
// Create a valid atomic data resource.
9494
// You'd think there would be a simpler way of getting the requested URL...
@@ -101,92 +101,35 @@ pub async fn search_query(
101101
let mut results_resource = atomic_lib::plugins::search::search_endpoint().to_resource(store)?;
102102
results_resource.set_subject(subject.clone());
103103

104-
if appstate.config.opts.rdf_search {
105-
// Always return all subjects in `--rdf-search` mode, don't do authentication
106-
results_resource.set_propval(urls::ENDPOINT_RESULTS.into(), subjects.into(), store)?;
107-
} else {
108-
// Default case: return full resources, do authentication
109-
let mut resources: Vec<Resource> = Vec::new();
110-
111-
// This is a pretty expensive operation. We need to check the rights for the subjects to prevent data leaks.
112-
// But we could probably do some things to speed this up: make it async / parallel, check admin rights.
113-
// https://github.com/atomicdata-dev/atomic-data-rust/issues/279
114-
// https://github.com/atomicdata-dev/atomic-data-rust/issues/280
115-
let for_agent = crate::helpers::get_client_agent(req.headers(), &appstate, subject)?;
116-
for s in subjects {
117-
match store.get_resource_extended(&s, true, for_agent.as_deref()) {
118-
Ok(r) => {
119-
if resources.len() < limit {
120-
resources.push(r);
121-
} else {
122-
break;
123-
}
124-
}
125-
Err(_e) => {
126-
tracing::debug!("Skipping search result: {} : {}", s, _e);
127-
continue;
104+
// Default case: return full resources, do authentication
105+
let mut resources: Vec<Resource> = Vec::new();
106+
107+
// This is a pretty expensive operation. We need to check the rights for the subjects to prevent data leaks.
108+
// But we could probably do some things to speed this up: make it async / parallel, check admin rights.
109+
// https://github.com/atomicdata-dev/atomic-data-rust/issues/279
110+
// https://github.com/atomicdata-dev/atomic-data-rust/issues/280
111+
let for_agent = crate::helpers::get_client_agent(req.headers(), &appstate, subject)?;
112+
for s in subjects {
113+
match store.get_resource_extended(&s, true, for_agent.as_deref()) {
114+
Ok(r) => {
115+
if resources.len() < limit {
116+
resources.push(r);
117+
} else {
118+
break;
128119
}
129120
}
121+
Err(_e) => {
122+
tracing::debug!("Skipping search result: {} : {}", s, _e);
123+
continue;
124+
}
130125
}
131-
results_resource.set_propval(urls::ENDPOINT_RESULTS.into(), resources.into(), store)?;
132126
}
127+
results_resource.set_propval(urls::ENDPOINT_RESULTS.into(), resources.into(), store)?;
133128
let mut builder = HttpResponse::Ok();
134129
// TODO: support other serialization options
135130
Ok(builder.body(results_resource.to_json_ad()?))
136131
}
137132

138-
/// Posts an N-Triples RDF document to index the triples in search
139-
#[tracing::instrument(skip(appstate))]
140-
pub async fn search_index_rdf(
141-
appstate: web::Data<AppState>,
142-
body: String,
143-
) -> AtomicServerResult<HttpResponse> {
144-
// Parse Turtle
145-
use rio_api::parser::TriplesParser;
146-
use rio_turtle::{TurtleError, TurtleParser};
147-
148-
let mut writer = appstate.search_state.writer.write()?;
149-
let fields = crate::search::get_schema_fields(&appstate.search_state)?;
150-
151-
TurtleParser::new(body.as_ref(), None)
152-
.parse_all(&mut |t| {
153-
match (
154-
get_inner_value(t.subject.into()),
155-
get_inner_value(t.predicate.into()),
156-
get_inner_value(t.object),
157-
) {
158-
(Some(s), Some(p), Some(o)) => {
159-
crate::search::add_triple(&writer, s, p, o, None, &fields).ok();
160-
}
161-
_ => return Ok(()),
162-
};
163-
Ok(()) as Result<(), TurtleError>
164-
})
165-
.map_err(|e| format!("Error parsing turtle: {}", e))?;
166-
167-
// Store the changes to the writer
168-
writer.commit()?;
169-
let mut builder = HttpResponse::Ok();
170-
Ok(builder.body("Added turtle to store"))
171-
}
172-
173-
// Returns the innver value of a Term in an RDF triple. If it's a blanknode or triple inside a triple, it will return None.
174-
use rio_api::model::Term;
175-
fn get_inner_value(t: Term) -> Option<String> {
176-
match t {
177-
Term::Literal(lit) => match lit {
178-
rio_api::model::Literal::Simple { value } => Some(value.into()),
179-
rio_api::model::Literal::LanguageTaggedString { value, language: _ } => {
180-
Some(value.into())
181-
}
182-
rio_api::model::Literal::Typed { value, datatype: _ } => Some(value.into()),
183-
},
184-
Term::NamedNode(nn) => Some(nn.iri.into()),
185-
Term::BlankNode(_bn) => None,
186-
Term::Triple(_) => None,
187-
}
188-
}
189-
190133
#[derive(Debug, std::hash::Hash, Eq, PartialEq)]
191134
pub struct StringAtom {
192135
pub subject: String,
@@ -286,29 +229,18 @@ fn docs_to_resources(
286229
docs: Vec<(f32, tantivy::DocAddress)>,
287230
fields: &Fields,
288231
searcher: &tantivy::LeasedItem<tantivy::Searcher>,
289-
) -> Result<(Vec<String>, Vec<StringAtom>), AtomicServerError> {
232+
) -> Result<Vec<String>, AtomicServerError> {
290233
let mut subjects: HashSet<String> = HashSet::new();
291-
// These are not used at this moment, but would be quite useful in RDF context.
292-
let mut atoms: HashSet<StringAtom> = HashSet::new();
293234

294235
// convert found documents to resources
295236
for (_score, doc_address) in docs {
296237
let retrieved_doc = searcher.doc(doc_address)?;
297238
let subject_val = retrieved_doc.get_first(fields.subject).ok_or("No 'subject' in search doc found. This is required when indexing. Run with --rebuild-index")?;
298-
let prop_val = retrieved_doc.get_first(fields.property).ok_or("No 'property' in search doc found. This is required when indexing. Run with --rebuild-index")?;
299-
let value_val = retrieved_doc.get_first(fields.value).ok_or("No 'value' in search doc found. This is required when indexing. Run with --rebuild-index")?;
300239

301240
let subject = unpack_value(subject_val, &retrieved_doc, "Subject".to_string())?;
302-
let property = unpack_value(prop_val, &retrieved_doc, "Property".to_string())?;
303-
let value = unpack_value(value_val, &retrieved_doc, "Value".to_string())?;
304241

305242
subjects.insert(subject.clone());
306-
atoms.insert(StringAtom {
307-
subject,
308-
property,
309-
value,
310-
});
311243
}
312244

313-
Ok((subjects.into_iter().collect(), atoms.into_iter().collect()))
245+
Ok(subjects.into_iter().collect())
314246
}

server/src/routes.rs

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Contains routing logic, sends the client to the correct handler.
22
//! We should try to minimize what happens in here, since most logic should be defined in Atomic Data - not in the server itself.
33
4-
use crate::{config::Config, content_types, handlers};
4+
use crate::{content_types, handlers};
55
use actix_web::{guard, http::Method, web};
66
use actix_web_static_files::ResourceFiles;
77

@@ -15,7 +15,7 @@ include!(concat!(env!("OUT_DIR"), "/generated.rs"));
1515
/// Set up the Actix server routes. This defines which paths are used.
1616
// Keep in mind that the order of these matters. An early, greedy route will take
1717
// precedence over a later route.
18-
pub fn config_routes(app: &mut actix_web::web::ServiceConfig, config: &Config) {
18+
pub fn config_routes(app: &mut actix_web::web::ServiceConfig) {
1919
app.service(web::resource("/ws").to(handlers::web_sockets::web_socket_handler))
2020
.service(web::resource("/download/{path:[^{}]+}").to(handlers::download::handle_download))
2121
// This `generate` imports the static files from the `app_assets` folder
@@ -45,14 +45,6 @@ pub fn config_routes(app: &mut actix_web::web::ServiceConfig, config: &Config) {
4545
.guard(guard::Method(Method::GET))
4646
.to(handlers::search::search_query),
4747
);
48-
if config.opts.rdf_search {
49-
tracing::info!("RDF search enabled. You can POST to /search to index RDF documents.");
50-
app.service(
51-
web::resource("/search")
52-
.guard(guard::Method(Method::POST))
53-
.to(handlers::search::search_index_rdf),
54-
);
55-
}
5648
app.service(web::resource(ANY).to(handlers::resource::handle_get_resource))
5749
// Also allow the home resource (not matched by the previous one)
5850
.service(web::resource("/").to(handlers::resource::handle_get_resource));

server/src/serve.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ pub async fn serve(config: crate::config::Config) -> AtomicServerResult<()> {
5252
.wrap(tracing_actix_web::TracingLogger::default())
5353
.wrap(middleware::Compress::default())
5454
// Here are the actual handlers / endpoints
55-
.configure(|app| crate::routes::config_routes(app, &appstate.config))
55+
.configure(crate::routes::config_routes)
5656
.default_service(web::to(|| {
5757
tracing::error!("Wrong route, should not happen with normal requests");
5858
actix_web::HttpResponse::NotFound()

0 commit comments

Comments
 (0)