1
0

Chunking searches (#787)

* new attempt

* revised searching CTEs

* prepare fix

* fix tests

* fixes

* restructured project_item to use queries

* search changes! fmt clippy prepare

* small changes
This commit is contained in:
Wyatt Verchere
2023-12-08 22:14:17 -08:00
committed by GitHub
parent 945e5a2dc3
commit 235f4f10ef
28 changed files with 1784 additions and 1026 deletions

View File

@@ -228,6 +228,20 @@ impl LoaderFieldType {
LoaderFieldType::ArrayEnum(_) => "array_enum",
}
}
pub fn is_array(&self) -> bool {
match self {
LoaderFieldType::ArrayInteger => true,
LoaderFieldType::ArrayText => true,
LoaderFieldType::ArrayBoolean => true,
LoaderFieldType::ArrayEnum(_) => true,
LoaderFieldType::Integer => false,
LoaderFieldType::Text => false,
LoaderFieldType::Boolean => false,
LoaderFieldType::Enum(_) => false,
}
}
}
#[derive(Clone, Serialize, Deserialize, Debug)]
@@ -283,7 +297,7 @@ pub struct QueryVersionField {
pub version_id: VersionId,
pub field_id: LoaderFieldId,
pub int_value: Option<i32>,
pub enum_value: Option<LoaderFieldEnumValue>,
pub enum_value: Option<LoaderFieldEnumValueId>,
pub string_value: Option<String>,
}
@@ -293,7 +307,7 @@ impl QueryVersionField {
self
}
pub fn with_enum_value(mut self, enum_value: LoaderFieldEnumValue) -> Self {
pub fn with_enum_value(mut self, enum_value: LoaderFieldEnumValueId) -> Self {
self.enum_value = Some(enum_value);
self
}
@@ -304,6 +318,27 @@ impl QueryVersionField {
}
}
#[derive(Clone, Serialize, Deserialize, Debug)]
pub struct QueryLoaderField {
pub id: LoaderFieldId,
pub field: String,
pub field_type: String,
pub enum_type: Option<LoaderFieldEnumId>,
pub min_val: Option<i32>,
pub max_val: Option<i32>,
pub optional: bool,
}
#[derive(Clone, Serialize, Deserialize, Debug)]
pub struct QueryLoaderFieldEnumValue {
pub id: LoaderFieldEnumValueId,
pub enum_id: LoaderFieldEnumId,
pub value: String,
pub ordering: Option<i32>,
pub created: DateTime<Utc>,
pub metadata: Option<serde_json::Value>,
}
#[derive(Clone, Serialize, Deserialize, Debug)]
pub struct SideType {
pub id: SideTypeId,
@@ -710,11 +745,11 @@ impl VersionField {
}
}
VersionFieldValue::Enum(_, v) => {
query_version_fields.push(base.clone().with_enum_value(v))
query_version_fields.push(base.clone().with_enum_value(v.id))
}
VersionFieldValue::ArrayEnum(_, v) => {
for ev in v {
query_version_fields.push(base.clone().with_enum_value(ev));
query_version_fields.push(base.clone().with_enum_value(ev.id));
}
}
};
@@ -733,7 +768,7 @@ impl VersionField {
l.field_id.0,
l.version_id.0,
l.int_value,
l.enum_value.as_ref().map(|e| e.id.0),
l.enum_value.as_ref().map(|e| e.0),
l.string_value.clone(),
)
})
@@ -807,106 +842,53 @@ impl VersionField {
}
pub fn from_query_json(
loader_fields: Option<serde_json::Value>,
version_fields: Option<serde_json::Value>,
loader_field_enum_values: Option<serde_json::Value>,
query_version_field_combined: Vec<QueryVersionField>,
query_loader_fields: &[QueryLoaderField],
query_loader_field_enum_values: &[QueryLoaderFieldEnumValue],
allow_many: bool, // If true, will allow multiple values for a single singleton field, returning them as separate VersionFields
// allow_many = true, multiple Bools => two VersionFields of Bool
// allow_many = false, multiple Bools => error
// multiple Arraybools => 1 VersionField of ArrayBool
) -> Vec<VersionField> {
#[derive(Deserialize, Debug)]
struct JsonLoaderField {
version_id: i64,
lf_id: i32,
field: String,
field_type: String,
enum_type: Option<i32>,
min_val: Option<i32>,
max_val: Option<i32>,
optional: bool,
}
#[derive(Deserialize, Debug)]
struct JsonVersionField {
field_id: i32,
int_value: Option<i32>,
enum_value: Option<i32>,
string_value: Option<String>,
}
#[derive(Deserialize, Debug)]
struct JsonLoaderFieldEnumValue {
id: i32,
enum_id: i32,
value: String,
ordering: Option<i32>,
created: DateTime<Utc>,
metadata: Option<serde_json::Value>,
}
let query_loader_fields: Vec<JsonLoaderField> = loader_fields
.and_then(|x| serde_json::from_value(x).ok())
.unwrap_or_default();
let query_version_field_combined: Vec<JsonVersionField> = version_fields
.and_then(|x| serde_json::from_value(x).ok())
.unwrap_or_default();
let query_loader_field_enum_values: Vec<JsonLoaderFieldEnumValue> =
loader_field_enum_values
.and_then(|x| serde_json::from_value(x).ok())
.unwrap_or_default();
query_loader_fields
.into_iter()
.iter()
.flat_map(|q| {
let loader_field_type = match LoaderFieldType::build(&q.field_type, q.enum_type) {
Some(lft) => lft,
None => return vec![],
};
let loader_field_type =
match LoaderFieldType::build(&q.field_type, q.enum_type.map(|l| l.0)) {
Some(lft) => lft,
None => return vec![],
};
let loader_field = LoaderField {
id: LoaderFieldId(q.lf_id),
id: q.id,
field: q.field.clone(),
field_type: loader_field_type,
optional: q.optional,
min_val: q.min_val,
max_val: q.max_val,
};
let version_id = VersionId(q.version_id);
let values = query_version_field_combined
.iter()
.filter_map(|qvf| {
if qvf.field_id == q.lf_id {
let lfev = query_loader_field_enum_values
.iter()
.find(|x| Some(x.id) == qvf.enum_value);
Some(QueryVersionField {
version_id,
field_id: LoaderFieldId(qvf.field_id),
int_value: qvf.int_value,
enum_value: lfev.map(|lfev| LoaderFieldEnumValue {
id: LoaderFieldEnumValueId(lfev.id),
enum_id: LoaderFieldEnumId(lfev.enum_id),
value: lfev.value.clone(),
ordering: lfev.ordering,
created: lfev.created,
metadata: lfev.metadata.clone().unwrap_or_default(),
}),
string_value: qvf.string_value.clone(),
})
} else {
None
}
})
// todo: avoid clone here?
let version_fields = query_version_field_combined
.iter()
.filter(|qvf| qvf.field_id == q.id)
.cloned()
.collect::<Vec<_>>();
if allow_many {
VersionField::build_many(loader_field, version_id, values)
.unwrap_or_default()
.into_iter()
.unique()
.collect_vec()
VersionField::build_many(
loader_field,
version_fields,
query_loader_field_enum_values,
)
.unwrap_or_default()
.into_iter()
.unique()
.collect_vec()
} else {
match VersionField::build(loader_field, version_id, values) {
match VersionField::build(
loader_field,
version_fields,
query_loader_field_enum_values,
) {
Ok(vf) => vec![vf],
Err(_) => vec![],
}
@@ -917,10 +899,14 @@ impl VersionField {
pub fn build(
loader_field: LoaderField,
version_id: VersionId,
query_version_fields: Vec<QueryVersionField>,
query_loader_field_enum_values: &[QueryLoaderFieldEnumValue],
) -> Result<VersionField, DatabaseError> {
let value = VersionFieldValue::build(&loader_field.field_type, query_version_fields)?;
let (version_id, value) = VersionFieldValue::build(
&loader_field.field_type,
query_version_fields,
query_loader_field_enum_values,
)?;
Ok(VersionField {
version_id,
field_id: loader_field.id,
@@ -931,13 +917,17 @@ impl VersionField {
pub fn build_many(
loader_field: LoaderField,
version_id: VersionId,
query_version_fields: Vec<QueryVersionField>,
query_loader_field_enum_values: &[QueryLoaderFieldEnumValue],
) -> Result<Vec<VersionField>, DatabaseError> {
let values = VersionFieldValue::build_many(&loader_field.field_type, query_version_fields)?;
let values = VersionFieldValue::build_many(
&loader_field.field_type,
query_version_fields,
query_loader_field_enum_values,
)?;
Ok(values
.into_iter()
.map(|value| VersionField {
.map(|(version_id, value)| VersionField {
version_id,
field_id: loader_field.id,
field_name: loader_field.field.clone(),
@@ -1030,13 +1020,14 @@ impl VersionFieldValue {
pub fn build(
field_type: &LoaderFieldType,
qvfs: Vec<QueryVersionField>,
) -> Result<VersionFieldValue, DatabaseError> {
qlfev: &[QueryLoaderFieldEnumValue],
) -> Result<(VersionId, VersionFieldValue), DatabaseError> {
match field_type {
LoaderFieldType::Integer
| LoaderFieldType::Text
| LoaderFieldType::Boolean
| LoaderFieldType::Enum(_) => {
let mut fields = Self::build_many(field_type, qvfs)?;
let mut fields = Self::build_many(field_type, qvfs, qlfev)?;
if fields.len() > 1 {
return Err(DatabaseError::SchemaError(format!(
"Multiple fields for field {}",
@@ -1054,7 +1045,7 @@ impl VersionFieldValue {
| LoaderFieldType::ArrayText
| LoaderFieldType::ArrayBoolean
| LoaderFieldType::ArrayEnum(_) => {
let fields = Self::build_many(field_type, qvfs)?;
let fields = Self::build_many(field_type, qvfs, qlfev)?;
Ok(fields.into_iter().next().ok_or_else(|| {
DatabaseError::SchemaError(format!(
"No version fields for field {}",
@@ -1066,14 +1057,15 @@ impl VersionFieldValue {
}
// Build from internal query data
// This encapsulates reundant behavior in db querie -> object conversions
// This encapsulates redundant behavior in db query -> object conversions
// This allows for multiple fields to be built at once. If there are multiple fields,
// but the type only allows for a single field, then multiple VersionFieldValues will be returned
// If there are multiple fields, and the type allows for multiple fields, then a single VersionFieldValue will be returned (array.len == 1)
pub fn build_many(
field_type: &LoaderFieldType,
qvfs: Vec<QueryVersionField>,
) -> Result<Vec<VersionFieldValue>, DatabaseError> {
qlfev: &[QueryLoaderFieldEnumValue],
) -> Result<Vec<(VersionId, VersionFieldValue)>, DatabaseError> {
let field_name = field_type.to_str();
let did_not_exist_error = |field_name: &str, desired_field: &str| {
DatabaseError::SchemaError(format!(
@@ -1082,82 +1074,168 @@ impl VersionFieldValue {
))
};
Ok(match field_type {
// Check errors- version_id must all be the same
let version_id = qvfs
.iter()
.map(|qvf| qvf.version_id)
.unique()
.collect::<Vec<_>>();
// If the field type is a non-array, then the reason for multiple version ids is that there are multiple versions being aggregated, and those version ids are contained within.
// If the field type is an array, then the reason for multiple version ids is that there are multiple values for a single version
// (or a greater aggregation between multiple arrays, in which case the per-field version is lost, so we just take the first one and use it for that)
let version_id = version_id.into_iter().next().unwrap_or(VersionId(0));
let field_id = qvfs
.iter()
.map(|qvf| qvf.field_id)
.unique()
.collect::<Vec<_>>();
if field_id.len() > 1 {
return Err(DatabaseError::SchemaError(format!(
"Multiple field ids for field {}",
field_name
)));
}
let mut value = match field_type {
// Singleton fields
// If there are multiple, we assume multiple versions are being concatenated
LoaderFieldType::Integer => qvfs
.into_iter()
.map(|qvf| {
Ok(VersionFieldValue::Integer(
qvf.int_value
.ok_or(did_not_exist_error(field_name, "int_value"))?,
Ok((
qvf.version_id,
VersionFieldValue::Integer(
qvf.int_value
.ok_or(did_not_exist_error(field_name, "int_value"))?,
),
))
})
.collect::<Result<Vec<VersionFieldValue>, DatabaseError>>()?,
.collect::<Result<Vec<(VersionId, VersionFieldValue)>, DatabaseError>>()?,
LoaderFieldType::Text => qvfs
.into_iter()
.map(|qvf| {
Ok::<VersionFieldValue, DatabaseError>(VersionFieldValue::Text(
qvf.string_value
.ok_or(did_not_exist_error(field_name, "string_value"))?,
Ok((
qvf.version_id,
VersionFieldValue::Text(
qvf.string_value
.ok_or(did_not_exist_error(field_name, "string_value"))?,
),
))
})
.collect::<Result<Vec<VersionFieldValue>, DatabaseError>>()?,
.collect::<Result<Vec<(VersionId, VersionFieldValue)>, DatabaseError>>()?,
LoaderFieldType::Boolean => qvfs
.into_iter()
.map(|qvf| {
Ok::<VersionFieldValue, DatabaseError>(VersionFieldValue::Boolean(
qvf.int_value
.ok_or(did_not_exist_error(field_name, "int_value"))?
!= 0,
))
})
.collect::<Result<Vec<VersionFieldValue>, DatabaseError>>()?,
LoaderFieldType::Enum(id) => qvfs
.into_iter()
.map(|qvf| {
Ok::<VersionFieldValue, DatabaseError>(VersionFieldValue::Enum(
*id,
qvf.enum_value
.ok_or(did_not_exist_error(field_name, "enum_value"))?,
))
})
.collect::<Result<Vec<VersionFieldValue>, DatabaseError>>()?,
LoaderFieldType::ArrayInteger => vec![VersionFieldValue::ArrayInteger(
qvfs.into_iter()
.map(|qvf| {
qvf.int_value
.ok_or(did_not_exist_error(field_name, "int_value"))
})
.collect::<Result<_, _>>()?,
)],
LoaderFieldType::ArrayText => vec![VersionFieldValue::ArrayText(
qvfs.into_iter()
.map(|qvf| {
qvf.string_value
.ok_or(did_not_exist_error(field_name, "string_value"))
})
.collect::<Result<_, _>>()?,
)],
LoaderFieldType::ArrayBoolean => vec![VersionFieldValue::ArrayBoolean(
qvfs.into_iter()
.map(|qvf| {
Ok::<bool, DatabaseError>(
Ok((
qvf.version_id,
VersionFieldValue::Boolean(
qvf.int_value
.ok_or(did_not_exist_error(field_name, "int_value"))?
!= 0,
)
})
.collect::<Result<_, _>>()?,
),
))
})
.collect::<Result<Vec<(VersionId, VersionFieldValue)>, DatabaseError>>()?,
LoaderFieldType::Enum(id) => qvfs
.into_iter()
.map(|qvf| {
Ok((
qvf.version_id,
VersionFieldValue::Enum(*id, {
let enum_id = qvf
.enum_value
.ok_or(did_not_exist_error(field_name, "enum_value"))?;
let lfev = qlfev
.iter()
.find(|x| x.id == enum_id)
.ok_or(did_not_exist_error(field_name, "enum_value"))?;
LoaderFieldEnumValue {
id: lfev.id,
enum_id: lfev.enum_id,
value: lfev.value.clone(),
ordering: lfev.ordering,
created: lfev.created,
metadata: lfev.metadata.clone().unwrap_or_default(),
}
}),
))
})
.collect::<Result<Vec<(VersionId, VersionFieldValue)>, DatabaseError>>()?,
// Array fields
// We concatenate into one array
LoaderFieldType::ArrayInteger => vec![(
version_id,
VersionFieldValue::ArrayInteger(
qvfs.into_iter()
.map(|qvf| {
qvf.int_value
.ok_or(did_not_exist_error(field_name, "int_value"))
})
.collect::<Result<_, _>>()?,
),
)],
LoaderFieldType::ArrayEnum(id) => vec![VersionFieldValue::ArrayEnum(
*id,
qvfs.into_iter()
.map(|qvf| {
qvf.enum_value
.ok_or(did_not_exist_error(field_name, "enum_value"))
})
.collect::<Result<_, _>>()?,
LoaderFieldType::ArrayText => vec![(
version_id,
VersionFieldValue::ArrayText(
qvfs.into_iter()
.map(|qvf| {
qvf.string_value
.ok_or(did_not_exist_error(field_name, "string_value"))
})
.collect::<Result<_, _>>()?,
),
)],
})
LoaderFieldType::ArrayBoolean => vec![(
version_id,
VersionFieldValue::ArrayBoolean(
qvfs.into_iter()
.map(|qvf| {
Ok::<bool, DatabaseError>(
qvf.int_value
.ok_or(did_not_exist_error(field_name, "int_value"))?
!= 0,
)
})
.collect::<Result<_, _>>()?,
),
)],
LoaderFieldType::ArrayEnum(id) => vec![(
version_id,
VersionFieldValue::ArrayEnum(
*id,
qvfs.into_iter()
.map(|qvf| {
let enum_id = qvf
.enum_value
.ok_or(did_not_exist_error(field_name, "enum_value"))?;
let lfev = qlfev
.iter()
.find(|x| x.id == enum_id)
.ok_or(did_not_exist_error(field_name, "enum_value"))?;
Ok::<_, DatabaseError>(LoaderFieldEnumValue {
id: lfev.id,
enum_id: lfev.enum_id,
value: lfev.value.clone(),
ordering: lfev.ordering,
created: lfev.created,
metadata: lfev.metadata.clone().unwrap_or_default(),
})
})
.collect::<Result<_, _>>()?,
),
)],
};
// Sort arrayenums by ordering, then by created
for (_, v) in value.iter_mut() {
if let VersionFieldValue::ArrayEnum(_, v) = v {
v.sort_by(|a, b| a.ordering.cmp(&b.ordering).then(a.created.cmp(&b.created)));
}
}
Ok(value)
}
// Serialize to internal value, such as for converting to user-facing JSON