Grouping

In addition to field selection and computing aggregates, we also need to return the results of any requested grouping operations.

This is done by delegating to the eval_groups function:

    let groups = query
        .groups
        .as_ref()
        .map(|grouping| {
            eval_groups(
                collection_relationships,
                variables,
                state,
                grouping,
                &paginated,
            )
        })
        .transpose()?;

eval_groups takes a set of rows, and proceeds largely like execute_query itself.

First, rows are partitioned into groups:

fn eval_groups(
    collection_relationships: &BTreeMap<models::RelationshipName, ndc_models::Relationship>,
    variables: &BTreeMap<models::VariableName, serde_json::Value>,
    state: &AppState,
    grouping: &ndc_models::Grouping,
    paginated: &[Row],
) -> Result<Vec<ndc_models::Group>> {
    let chunks: Vec<Chunk> = paginated
        .iter()
        .chunk_by(|row| {
            eval_dimensions(
                collection_relationships,
                variables,
                state,
                row,
                &grouping.dimensions,
            )
            .expect("cannot eval dimensions")
        })
        .into_iter()
        .map(|(dimensions, rows)| Chunk {
            dimensions,
            rows: rows.cloned().collect(),
        })
        .collect();

The eval_dimensions function computes a vector of dimensions for each row:

fn eval_dimensions(
    collection_relationships: &BTreeMap<models::RelationshipName, models::Relationship>,
    variables: &BTreeMap<models::VariableName, serde_json::Value>,
    state: &AppState,
    row: &Row,
    dimensions: &[ndc_models::Dimension],
) -> Result<Vec<serde_json::Value>> {
    let mut values = vec![];
    for dimension in dimensions {
        let value = eval_dimension(collection_relationships, variables, state, row, dimension)?;
        values.push(value);
    }
    Ok(values)
}

The only type of dimension we need to handle is a column. First the value of the column is computed by delegating to eval_column_field_path, and then any extraction function is evaluated using the eval_extraction function:

fn eval_dimension(
    collection_relationships: &BTreeMap<models::RelationshipName, models::Relationship>,
    variables: &BTreeMap<models::VariableName, serde_json::Value>,
    state: &AppState,
    row: &Row,
    dimension: &models::Dimension,
) -> Result<serde_json::Value> {
    match dimension {
        models::Dimension::Column {
            column_name,
            arguments,
            field_path,
            path,
            extraction,
        } => {
            let value = eval_column_at_path(
                collection_relationships,
                variables,
                state,
                row,
                path,
                column_name,
                arguments,
                field_path.as_deref(),
            )?;

            eval_extraction(extraction, value)
        }
    }
}

Next, the partitions are sorted, using the group_sort function which is very similar to its row-based counterpart sort:

    let sorted = group_sort(variables, chunks, &grouping.order_by)?;

Next, groups are aggregated and filtered:

    let mut groups: Vec<models::Group> = vec![];

    for chunk in &sorted {
        let dimensions = chunk.dimensions.clone();

        let mut aggregates: IndexMap<models::FieldName, serde_json::Value> = IndexMap::new();
        for (aggregate_name, aggregate) in &grouping.aggregates {
            aggregates.insert(
                aggregate_name.clone(),
                eval_aggregate(variables, aggregate, &chunk.rows)?,
            );
        }
        if let Some(predicate) = &grouping.predicate {
            if eval_group_expression(variables, predicate, &chunk.rows)? {
                groups.push(models::Group {
                    dimensions: dimensions.clone(),
                    aggregates,
                });
            }
        } else {
            groups.push(models::Group {
                dimensions: dimensions.clone(),
                aggregates,
            });
        }
    }

The eval_group_expression function is also very similar to the eval_expression function which performs a similar operation on rows.

Finally, the groups are paginated and returned:

    let paginated: Vec<models::Group> =
        paginate(groups.into_iter(), grouping.limit, grouping.offset);

    Ok(paginated)
}