Skip to content

Commit 1cf304e

Browse files
authored
Add pg_query::summary() (#62)
1 parent 66eb7be commit 1cf304e

File tree

12 files changed

+1851
-1
lines changed

12 files changed

+1851
-1
lines changed

Cargo.lock

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ cc = "1.0.83"
3434
glob = "0.3.1"
3535

3636
[dev-dependencies]
37+
brunch = "0.11"
3738
easy-parallel = "3.2.0"
3839
pretty_assertions = "1.4.0"
3940
regex = "1.6.0"
41+
42+
[[bench]]
43+
name = "parse_vs_summary"
44+
harness = false

benches/parse_vs_summary.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
use brunch::Bench;
2+
use pg_query;
3+
4+
brunch::benches!(
5+
Bench::new("parse").run_seeded_with(seed, |query| pg_query::parse(&query)),
6+
Bench::new("summary").run_seeded_with(seed, |query| pg_query::summary(&query, -1)),
7+
// I had to be less mean just so the parse+truncate one didn't crash.
8+
Bench::new("parse + truncate").run_seeded_with(less_mean_seed, |query| pg_query::parse(&query).unwrap().truncate(50).unwrap()),
9+
Bench::new("summary + truncate").run_seeded_with(less_mean_seed, |query| pg_query::summary(&query, 50)),
10+
);
11+
12+
fn less_mean_seed() -> String {
13+
build_query(30)
14+
}
15+
16+
fn seed() -> String {
17+
build_query(500)
18+
}
19+
20+
fn build_query(table_references: i32) -> String {
21+
let mut query = "SELECT * FROM t".to_string();
22+
for i in 0..table_references {
23+
query = format!("{query} JOIN t{i} ON t.id = t{i}.t_id AND t{i}.k IN (1, 2, 3, 4) AND t{i}.f IN (SELECT o FROM p WHERE q = 'foo')");
24+
}
25+
query
26+
}

build.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
2121
// Copy the relevant source files to the OUT_DIR
2222
let source_paths = vec![
2323
build_path.join("pg_query").with_extension("h"),
24+
build_path.join("postgres_deparse").with_extension("h"),
2425
build_path.join("Makefile"),
2526
build_path.join("src"),
2627
build_path.join("protobuf"),

libpg_query

Submodule libpg_query updated 222 files

src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ mod parse_result;
4848
#[rustfmt::skip]
4949
pub mod protobuf;
5050
mod query;
51+
mod summary;
52+
mod summary_result;
5153
mod truncate;
5254

5355
pub use error::*;
@@ -56,6 +58,8 @@ pub use node_mut::*;
5658
pub use node_ref::*;
5759
pub use parse_result::*;
5860
pub use query::*;
61+
pub use summary::*;
62+
pub use summary_result::*;
5963
pub use truncate::*;
6064

6165
pub use protobuf::Node;

src/protobuf.rs

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4455,6 +4455,121 @@ pub struct ScanToken {
44554455
#[prost(enumeration = "KeywordKind", tag = "5")]
44564456
pub keyword_kind: i32,
44574457
}
4458+
/// protobuf-c doesn't support optional fields, so any optional strings
4459+
/// are just an empty string if it should be the equivalent of None/nil.
4460+
///
4461+
/// These fields have `// optional` at the end of the line.
4462+
///
4463+
/// Upstream issue: <https://github.com/protobuf-c/protobuf-c/issues/476>
4464+
#[derive(serde::Serialize)]
4465+
#[derive(Clone, PartialEq, ::prost::Message)]
4466+
pub struct SummaryResult {
4467+
#[prost(message, repeated, tag = "1")]
4468+
pub tables: ::prost::alloc::vec::Vec<summary_result::Table>,
4469+
/// The value here is the table name (i.e. schema.table or just table).
4470+
#[prost(map = "string, string", tag = "2")]
4471+
pub aliases: ::std::collections::HashMap<
4472+
::prost::alloc::string::String,
4473+
::prost::alloc::string::String,
4474+
>,
4475+
#[prost(string, repeated, tag = "3")]
4476+
pub cte_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
4477+
#[prost(message, repeated, tag = "4")]
4478+
pub functions: ::prost::alloc::vec::Vec<summary_result::Function>,
4479+
#[prost(message, repeated, tag = "5")]
4480+
pub filter_columns: ::prost::alloc::vec::Vec<summary_result::FilterColumn>,
4481+
#[prost(string, repeated, tag = "6")]
4482+
pub statement_types: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
4483+
/// optional, empty if truncation limit is -1
4484+
#[prost(string, tag = "7")]
4485+
pub truncated_query: ::prost::alloc::string::String,
4486+
}
4487+
/// Nested message and enum types in `SummaryResult`.
4488+
pub mod summary_result {
4489+
#[derive(serde::Serialize)]
4490+
#[derive(Clone, PartialEq, ::prost::Message)]
4491+
pub struct Table {
4492+
#[prost(string, tag = "1")]
4493+
pub name: ::prost::alloc::string::String,
4494+
#[prost(string, tag = "2")]
4495+
pub schema_name: ::prost::alloc::string::String,
4496+
#[prost(string, tag = "3")]
4497+
pub table_name: ::prost::alloc::string::String,
4498+
#[prost(enumeration = "Context", tag = "4")]
4499+
pub context: i32,
4500+
}
4501+
#[derive(serde::Serialize)]
4502+
#[derive(Clone, PartialEq, ::prost::Message)]
4503+
pub struct Function {
4504+
#[prost(string, tag = "1")]
4505+
pub name: ::prost::alloc::string::String,
4506+
#[prost(string, tag = "2")]
4507+
pub function_name: ::prost::alloc::string::String,
4508+
/// optional
4509+
#[prost(string, tag = "3")]
4510+
pub schema_name: ::prost::alloc::string::String,
4511+
#[prost(enumeration = "Context", tag = "4")]
4512+
pub context: i32,
4513+
}
4514+
#[derive(serde::Serialize)]
4515+
#[derive(Clone, PartialEq, ::prost::Message)]
4516+
pub struct FilterColumn {
4517+
/// optional
4518+
#[prost(string, tag = "1")]
4519+
pub schema_name: ::prost::alloc::string::String,
4520+
/// optional
4521+
#[prost(string, tag = "2")]
4522+
pub table_name: ::prost::alloc::string::String,
4523+
#[prost(string, tag = "3")]
4524+
pub column: ::prost::alloc::string::String,
4525+
}
4526+
#[derive(serde::Serialize)]
4527+
#[derive(
4528+
Clone,
4529+
Copy,
4530+
Debug,
4531+
PartialEq,
4532+
Eq,
4533+
Hash,
4534+
PartialOrd,
4535+
Ord,
4536+
::prost::Enumeration
4537+
)]
4538+
#[repr(i32)]
4539+
pub enum Context {
4540+
None = 0,
4541+
Select = 1,
4542+
Dml = 2,
4543+
Ddl = 3,
4544+
Call = 4,
4545+
}
4546+
impl Context {
4547+
/// String value of the enum field names used in the ProtoBuf definition.
4548+
///
4549+
/// The values are not transformed in any way and thus are considered stable
4550+
/// (if the ProtoBuf definition does not change) and safe for programmatic use.
4551+
pub fn as_str_name(&self) -> &'static str {
4552+
match self {
4553+
Self::None => "None",
4554+
Self::Select => "Select",
4555+
Self::Dml => "DML",
4556+
Self::Ddl => "DDL",
4557+
Self::Call => "Call",
4558+
}
4559+
}
4560+
/// Creates an enum from field names used in the ProtoBuf definition.
4561+
pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
4562+
match value {
4563+
"None" => Some(Self::None),
4564+
"Select" => Some(Self::Select),
4565+
"DML" => Some(Self::Dml),
4566+
"DDL" => Some(Self::Ddl),
4567+
"Call" => Some(Self::Call),
4568+
_ => None,
4569+
}
4570+
}
4571+
}
4572+
}
44584573
#[derive(serde::Serialize)]
44594574
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
44604575
#[repr(i32)]

src/summary.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
use std::ffi::{CStr, CString};
2+
3+
use prost::Message;
4+
5+
use crate::bindings::*;
6+
use crate::error::*;
7+
use crate::protobuf;
8+
use crate::summary_result::SummaryResult;
9+
10+
/// Parses the given SQL statement and provides a summary of it.
11+
///
12+
/// It is possible to generate the same data using `pg_query::parse` and
13+
/// iterating through the parse tree. However, `pg_query::summary` uses a
14+
/// C implementation to avoid sending as much data over protobuf.
15+
///
16+
/// Avoiding sending the parse tree over protobuf can cause as much as an
17+
/// *order of magnitude* performance improvement. It also prevents some
18+
/// crashes caused by protobuf handling such a large amount of data.
19+
///
20+
/// You can run `cargo bench parse_vs_summary` to run the benchmarks that
21+
/// comparse the two options.
22+
///
23+
/// # Example
24+
///
25+
/// ```rust
26+
/// use pg_query::{Node, NodeEnum, NodeRef};
27+
///
28+
/// let result = pg_query::summary("SELECT * FROM contacts", -1);
29+
/// assert!(result.is_ok());
30+
/// let result = result.unwrap();
31+
/// assert_eq!(result.tables(), vec!["contacts"]);
32+
/// ```
33+
pub fn summary(statement: &str, truncate_limit: i32) -> Result<SummaryResult> {
34+
let input = CString::new(statement)?;
35+
let result = unsafe { pg_query_summary(input.as_ptr(), 0, truncate_limit) };
36+
let parse_result = if !result.error.is_null() {
37+
let message = unsafe { CStr::from_ptr((*result.error).message) }.to_string_lossy().to_string();
38+
Err(Error::Parse(message))
39+
} else {
40+
let data = unsafe { std::slice::from_raw_parts(result.summary.data as *const u8, result.summary.len as usize) };
41+
let stderr = unsafe { CStr::from_ptr(result.stderr_buffer) }.to_string_lossy().to_string();
42+
protobuf::SummaryResult::decode(data).map_err(Error::Decode).map(|result| SummaryResult::new(result, stderr))
43+
};
44+
unsafe { pg_query_free_summary_parse_result(result) };
45+
parse_result
46+
}

0 commit comments

Comments
 (0)