better parser generation

This commit is contained in:
nora 2022-02-12 21:53:09 +01:00
parent c43126af1f
commit 83778ac2c9
10 changed files with 859 additions and 516 deletions

1
Cargo.lock generated
View file

@ -26,7 +26,6 @@ dependencies = [
name = "amqp_codegen" name = "amqp_codegen"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow",
"heck", "heck",
"itertools", "itertools",
"strong-xml", "strong-xml",

View file

@ -6,7 +6,8 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
anyhow = "1.0.53"
heck = "0.4.0" heck = "0.4.0"
itertools = "0.10.3" itertools = "0.10.3"
strong-xml = "0.6.3" strong-xml = "0.6.3"
[features]

3
amqp_codegen/README.md Normal file
View file

@ -0,0 +1,3 @@
codegen for method serialization/deserialization
run using `cargo r > ../amqp_transport/src/classes/generated.rs`

View file

@ -1,7 +1,6 @@
mod parser; mod parser;
use crate::parser::codegen_parser; use crate::parser::codegen_parser;
use anyhow::Result;
use heck::ToUpperCamelCase; use heck::ToUpperCamelCase;
use std::fs; use std::fs;
use strong_xml::XmlRead; use strong_xml::XmlRead;
@ -76,21 +75,21 @@ struct Field {
asserts: Vec<Assert>, asserts: Vec<Assert>,
} }
fn main() -> Result<()> { fn main() {
let content = fs::read_to_string("./amqp-0-9-1.xml").unwrap(); let content = fs::read_to_string("./amqp-0-9-1.xml").unwrap();
let amqp = Amqp::from_str(&content)?; let amqp = Amqp::from_str(&content).unwrap();
codegen(&amqp) codegen(&amqp);
} }
fn codegen(amqp: &Amqp) -> Result<()> { fn codegen(amqp: &Amqp) {
println!("// This file has been generated by `amqp_codegen`. Do not edit it manually.\n"); println!("// This file has been generated by `amqp_codegen`. Do not edit it manually.\n");
codegen_domain_defs(amqp)?; codegen_domain_defs(amqp);
codegen_class_defs(amqp)?; codegen_class_defs(amqp);
codegen_parser(amqp) codegen_parser(amqp);
} }
fn codegen_domain_defs(amqp: &Amqp) -> Result<()> { fn codegen_domain_defs(amqp: &Amqp) {
for domain in &amqp.domains { for domain in &amqp.domains {
let invariants = invariants(domain.asserts.iter()); let invariants = invariants(domain.asserts.iter());
@ -103,11 +102,10 @@ fn codegen_domain_defs(amqp: &Amqp) -> Result<()> {
amqp_type_to_rust_type(&domain.kind), amqp_type_to_rust_type(&domain.kind),
); );
} }
Ok(())
} }
fn codegen_class_defs(amqp: &Amqp) -> Result<()> { fn codegen_class_defs(amqp: &Amqp) {
println!("#[derive(Debug, Clone, PartialEq)]");
println!("pub enum Class {{"); println!("pub enum Class {{");
for class in &amqp.classes { for class in &amqp.classes {
let class_name = class.name.to_upper_camel_case(); let class_name = class.name.to_upper_camel_case();
@ -118,6 +116,7 @@ fn codegen_class_defs(amqp: &Amqp) -> Result<()> {
for class in &amqp.classes { for class in &amqp.classes {
let enum_name = class.name.to_upper_camel_case(); let enum_name = class.name.to_upper_camel_case();
println!("/// Index {}, handler = {}", class.index, class.handler); println!("/// Index {}, handler = {}", class.index, class.handler);
println!("#[derive(Debug, Clone, PartialEq)]");
println!("pub enum {enum_name} {{"); println!("pub enum {enum_name} {{");
for method in &class.methods { for method in &class.methods {
let method_name = method.name.to_upper_camel_case(); let method_name = method.name.to_upper_camel_case();
@ -127,10 +126,8 @@ fn codegen_class_defs(amqp: &Amqp) -> Result<()> {
println!(" {{"); println!(" {{");
for field in &method.fields { for field in &method.fields {
let field_name = snake_case(&field.name); let field_name = snake_case(&field.name);
let (field_type, field_docs) = resolve_type( let (field_type, field_docs) =
field.domain.as_ref().or(field.kind.as_ref()).unwrap(), get_invariants_with_type(field_type(field), field.asserts.as_ref());
field.asserts.as_ref(),
)?;
if !field_docs.is_empty() { if !field_docs.is_empty() {
println!(" /// {field_docs}"); println!(" /// {field_docs}");
} }
@ -143,8 +140,6 @@ fn codegen_class_defs(amqp: &Amqp) -> Result<()> {
} }
println!("}}"); println!("}}");
} }
Ok(())
} }
fn amqp_type_to_rust_type(amqp_type: &str) -> &'static str { fn amqp_type_to_rust_type(amqp_type: &str) -> &'static str {
@ -161,13 +156,25 @@ fn amqp_type_to_rust_type(amqp_type: &str) -> &'static str {
} }
} }
fn field_type(field: &Field) -> &String {
field.domain.as_ref().or(field.kind.as_ref()).unwrap()
}
fn resolve_type_from_domain(amqp: &Amqp, domain: &str) -> String {
amqp.domains
.iter()
.find(|d| d.name == domain)
.map(|d| d.kind.clone())
.unwrap()
}
/// returns (type name, invariant docs) /// returns (type name, invariant docs)
fn resolve_type(domain: &str, asserts: &[Assert]) -> Result<(String, String)> { fn get_invariants_with_type(domain: &str, asserts: &[Assert]) -> (String, String) {
let additional_docs = invariants(asserts.iter()); let additional_docs = invariants(asserts.iter());
let type_name = domain.to_upper_camel_case(); let type_name = domain.to_upper_camel_case();
Ok((type_name, additional_docs)) (type_name, additional_docs)
} }
fn snake_case(ident: &str) -> String { fn snake_case(ident: &str) -> String {

View file

@ -1,5 +1,6 @@
use crate::{Amqp, Class, Domain, Method}; use crate::{
use anyhow::Result; field_type, resolve_type_from_domain, snake_case, Amqp, Assert, Class, Domain, Method,
};
use heck::{ToSnakeCase, ToUpperCamelCase}; use heck::{ToSnakeCase, ToUpperCamelCase};
use itertools::Itertools; use itertools::Itertools;
@ -15,7 +16,7 @@ fn domain_function_name(domain_name: &str) -> String {
format!("domain_{domain_name}") format!("domain_{domain_name}")
} }
pub(crate) fn codegen_parser(amqp: &Amqp) -> Result<()> { pub(crate) fn codegen_parser(amqp: &Amqp) {
println!( println!(
"pub mod parse {{ "pub mod parse {{
use super::*; use super::*;
@ -39,7 +40,7 @@ pub type IResult<'a, T> = nom::IResult<&'a [u8], T, TransError>;
); );
for domain in &amqp.domains { for domain in &amqp.domains {
domain_parser(domain)?; domain_parser(domain);
} }
for class in &amqp.classes { for class in &amqp.classes {
@ -56,78 +57,120 @@ pub type IResult<'a, T> = nom::IResult<&'a [u8], T, TransError>;
" let (input, _) = tag([{class_index}])(input)?; " let (input, _) = tag([{class_index}])(input)?;
alt(({all_methods}))(input)" alt(({all_methods}))(input)"
); );
});
Ok(())
})?;
for method in &class.methods { for method in &class.methods {
method_parser(class, method)?; method_parser(amqp, class, method);
} }
} }
println!("\n}}"); println!("\n}}");
Ok(())
} }
fn domain_parser(domain: &Domain) -> Result<()> { fn domain_parser(domain: &Domain) {
let fn_name = domain_function_name(&domain.name); let fn_name = domain_function_name(&domain.name);
let type_name = domain.kind.to_snake_case(); let type_name = domain.kind.to_snake_case();
// don't even bother with bit domains, do them manually at call site
if type_name != "bit" {
function(&fn_name, &domain.name.to_upper_camel_case(), || { function(&fn_name, &domain.name.to_upper_camel_case(), || {
if domain.asserts.is_empty() { if domain.asserts.is_empty() {
if type_name == "bit" {
println!(" todo!() // bit")
} else {
println!(" {type_name}(input)"); println!(" {type_name}(input)");
}
} else { } else {
println!(" let (input, result) = {type_name}(input)?;"); println!(" let (input, result) = {type_name}(input)?;");
for assert in &domain.asserts { for assert in &domain.asserts {
match &*assert.check { assert_check(assert, &type_name, "result");
"notnull" => { /* todo */ }
"regexp" => {
let value = assert.value.as_ref().unwrap();
println!(
r#" static REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"{value}").unwrap());"#
);
println!(" if !REGEX.is_match(&result) {{ fail!() }}");
}
"le" => {} // can't validate this here
"length" => {
let length = assert.value.as_ref().unwrap();
println!(" if result.len() > {length} {{ fail!() }}");
}
_ => unimplemented!(),
}
} }
println!(" Ok((input, result))"); println!(" Ok((input, result))");
} }
Ok(()) });
}) }
} }
fn method_parser(class: &Class, method: &Method) -> Result<()> { fn method_parser(amqp: &Amqp, class: &Class, method: &Method) {
let class_name = class.name.to_snake_case(); let class_name = class.name.to_snake_case();
let function_name = method_function_name(&class_name)(method); let function_name = method_function_name(&class_name)(method);
function(&function_name, "Class", || { function(&function_name, "Class", || {
let method_index = method.index; let method_index = method.index;
println!(" let (input, _) = tag([{method_index}])(input)?;"); println!(" let (input, _) = tag([{method_index}])(input)?;");
println!(" todo!()"); let mut iter = method.fields.iter().peekable();
for _field in &method.fields {} while let Some(field) = iter.next() {
Ok(()) let type_name = resolve_type_from_domain(amqp, field_type(field));
})?;
Ok(()) if type_name == "bit" {
let mut fields_with_bit = vec![field];
loop {
if iter
.peek()
.map(|f| resolve_type_from_domain(amqp, field_type(f)) == "bit")
.unwrap_or(false)
{
fields_with_bit.push(iter.next().unwrap());
} else {
break;
}
}
let amount = fields_with_bit.len();
println!(" let (input, bits) = bit(input, {amount})?;");
for (i, field) in fields_with_bit.iter().enumerate() {
let field_name = snake_case(&field.name);
println!(" let {field_name} = bits[{i}];");
}
} else {
let fn_name = domain_function_name(field_type(field));
let field_name = snake_case(&field.name);
println!(" let (input, {field_name}) = {fn_name}(input)?;");
for assert in &field.asserts {
assert_check(assert, &type_name, &field_name);
}
}
}
let class_name = class_name.to_upper_camel_case();
let method_name = method.name.to_upper_camel_case();
println!(" Ok((input, Class::{class_name}({class_name}::{method_name} {{");
for field in &method.fields {
let field_name = snake_case(&field.name);
println!(" {field_name},");
}
println!(" }})))");
});
} }
fn function<F>(name: &str, ret_ty: &str, body: F) -> Result<()> fn assert_check(assert: &Assert, type_name: &str, var_name: &str) {
match &*assert.check {
"notnull" => match type_name {
"shortstr" | "longstr" => {
println!(" if {var_name}.is_empty() {{ fail!() }}")
}
"short" => println!(" if {var_name} == 0 {{ fail!() }}"),
_ => unimplemented!(),
},
"regexp" => {
let value = assert.value.as_ref().unwrap();
println!(
r#" static REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"{value}").unwrap());"#
);
println!(" if !REGEX.is_match(&{var_name}) {{ fail!() }}");
}
"le" => {} // can't validate this here
"length" => {
let length = assert.value.as_ref().unwrap();
println!(" if {var_name}.len() > {length} {{ fail!() }}");
}
_ => unimplemented!(),
}
}
fn function<F>(name: &str, ret_ty: &str, body: F)
where where
F: FnOnce() -> Result<()>, F: FnOnce(),
{ {
println!("fn {name}(input: &[u8]) -> IResult<{ret_ty}> {{"); println!("fn {name}(input: &[u8]) -> IResult<{ret_ty}> {{");
body()?; body();
println!("}}"); println!("}}");
Ok(())
} }

File diff suppressed because it is too large Load diff

View file

@ -1,3 +1,5 @@
use crate::classes::generated::Class;
use crate::error::{ConException, ProtocolError, TransError};
use std::collections::HashMap; use std::collections::HashMap;
mod generated; mod generated;
@ -5,6 +7,7 @@ mod parse_helper;
pub type Table = HashMap<String, FieldValue>; pub type Table = HashMap<String, FieldValue>;
#[derive(Debug, Clone, PartialEq)]
pub enum FieldValue { pub enum FieldValue {
Boolean(bool), Boolean(bool),
ShortShortInt(i8), ShortShortInt(i8),
@ -25,3 +28,19 @@ pub enum FieldValue {
FieldTable(Table), FieldTable(Table),
Void, Void,
} }
pub use generated::*;
/// Parses the payload of a method frame into the class/method
pub fn parse_method(payload: &[u8]) -> Result<Class, TransError> {
let nom_result = generated::parse::parse_method(payload);
match nom_result {
Ok(([], class)) => Ok(class),
Ok((_, _)) => Err(ProtocolError::ConException(ConException::SyntaxError).into()),
Err(nom::Err::Incomplete(_)) => {
Err(ProtocolError::ConException(ConException::SyntaxError).into())
}
Err(nom::Err::Failure(err) | nom::Err::Error(err)) => Err(err),
}
}

View file

@ -39,6 +39,7 @@ pub fn long(input: &[u8]) -> IResult<Long> {
pub fn longlong(input: &[u8]) -> IResult<Longlong> { pub fn longlong(input: &[u8]) -> IResult<Longlong> {
todo!() todo!()
} }
// todo: doing this using a vec is a bit wasteful, consider not doing that
pub fn bit(input: &[u8], amount: u8) -> IResult<Vec<Bit>> { pub fn bit(input: &[u8], amount: u8) -> IResult<Vec<Bit>> {
todo!() todo!()
} }

View file

@ -1,5 +1,6 @@
use crate::error::{ProtocolError, TransError}; use crate::error::{ProtocolError, TransError};
use crate::frame; use crate::frame;
use crate::frame::FrameType;
use anyhow::Context; use anyhow::Context;
use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::TcpStream; use tokio::net::TcpStream;
@ -27,6 +28,10 @@ impl Connection {
loop { loop {
let frame = frame::read_frame(&mut self.stream, 10000).await?; let frame = frame::read_frame(&mut self.stream, 10000).await?;
debug!(?frame, "received frame"); debug!(?frame, "received frame");
if frame.kind == FrameType::Method {
let class = super::classes::parse_method(&frame.payload)?;
debug!(?class, "was method frame");
}
} }
} }

View file

@ -14,10 +14,10 @@ mod frame_type {
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct Frame { pub struct Frame {
/// The type of the frame including its parsed metadata. /// The type of the frame including its parsed metadata.
kind: FrameType, pub kind: FrameType,
channel: u16, pub channel: u16,
/// Includes the whole payload, also including the metadata from each type. /// Includes the whole payload, also including the metadata from each type.
payload: Vec<u8>, pub payload: Vec<u8>,
} }
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
@ -29,23 +29,6 @@ pub enum FrameType {
Heartbeat = 8, Heartbeat = 8,
} }
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FrameTypeEnum {
/// 1
Method,
/// 2
Header {
class_id: u16,
body_size: u64,
/// Ordered from high to low
property_flags: u16,
},
/// 3
Body,
/// 8
Heartbeat,
}
pub async fn read_frame<R>(r: &mut R, max_frame_size: usize) -> Result<Frame, TransError> pub async fn read_frame<R>(r: &mut R, max_frame_size: usize) -> Result<Frame, TransError>
where where
R: AsyncReadExt + Unpin, R: AsyncReadExt + Unpin,