better parser generation

This commit is contained in:
nora 2022-02-12 21:53:09 +01:00
parent c43126af1f
commit 83778ac2c9
10 changed files with 859 additions and 516 deletions

1
Cargo.lock generated
View file

@ -26,7 +26,6 @@ dependencies = [
name = "amqp_codegen"
version = "0.1.0"
dependencies = [
"anyhow",
"heck",
"itertools",
"strong-xml",

View file

@ -6,7 +6,8 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0.53"
heck = "0.4.0"
itertools = "0.10.3"
strong-xml = "0.6.3"
[features]

3
amqp_codegen/README.md Normal file
View file

@ -0,0 +1,3 @@
codegen for method serialization/deserialization
run using `cargo r > ../amqp_transport/src/classes/generated.rs`

View file

@ -1,7 +1,6 @@
mod parser;
use crate::parser::codegen_parser;
use anyhow::Result;
use heck::ToUpperCamelCase;
use std::fs;
use strong_xml::XmlRead;
@ -76,21 +75,21 @@ struct Field {
asserts: Vec<Assert>,
}
fn main() -> Result<()> {
fn main() {
let content = fs::read_to_string("./amqp-0-9-1.xml").unwrap();
let amqp = Amqp::from_str(&content)?;
codegen(&amqp)
let amqp = Amqp::from_str(&content).unwrap();
codegen(&amqp);
}
fn codegen(amqp: &Amqp) -> Result<()> {
fn codegen(amqp: &Amqp) {
println!("// This file has been generated by `amqp_codegen`. Do not edit it manually.\n");
codegen_domain_defs(amqp)?;
codegen_class_defs(amqp)?;
codegen_parser(amqp)
codegen_domain_defs(amqp);
codegen_class_defs(amqp);
codegen_parser(amqp);
}
fn codegen_domain_defs(amqp: &Amqp) -> Result<()> {
fn codegen_domain_defs(amqp: &Amqp) {
for domain in &amqp.domains {
let invariants = invariants(domain.asserts.iter());
@ -103,11 +102,10 @@ fn codegen_domain_defs(amqp: &Amqp) -> Result<()> {
amqp_type_to_rust_type(&domain.kind),
);
}
Ok(())
}
fn codegen_class_defs(amqp: &Amqp) -> Result<()> {
fn codegen_class_defs(amqp: &Amqp) {
println!("#[derive(Debug, Clone, PartialEq)]");
println!("pub enum Class {{");
for class in &amqp.classes {
let class_name = class.name.to_upper_camel_case();
@ -118,6 +116,7 @@ fn codegen_class_defs(amqp: &Amqp) -> Result<()> {
for class in &amqp.classes {
let enum_name = class.name.to_upper_camel_case();
println!("/// Index {}, handler = {}", class.index, class.handler);
println!("#[derive(Debug, Clone, PartialEq)]");
println!("pub enum {enum_name} {{");
for method in &class.methods {
let method_name = method.name.to_upper_camel_case();
@ -127,10 +126,8 @@ fn codegen_class_defs(amqp: &Amqp) -> Result<()> {
println!(" {{");
for field in &method.fields {
let field_name = snake_case(&field.name);
let (field_type, field_docs) = resolve_type(
field.domain.as_ref().or(field.kind.as_ref()).unwrap(),
field.asserts.as_ref(),
)?;
let (field_type, field_docs) =
get_invariants_with_type(field_type(field), field.asserts.as_ref());
if !field_docs.is_empty() {
println!(" /// {field_docs}");
}
@ -143,8 +140,6 @@ fn codegen_class_defs(amqp: &Amqp) -> Result<()> {
}
println!("}}");
}
Ok(())
}
fn amqp_type_to_rust_type(amqp_type: &str) -> &'static str {
@ -161,13 +156,25 @@ fn amqp_type_to_rust_type(amqp_type: &str) -> &'static str {
}
}
fn field_type(field: &Field) -> &String {
field.domain.as_ref().or(field.kind.as_ref()).unwrap()
}
fn resolve_type_from_domain(amqp: &Amqp, domain: &str) -> String {
amqp.domains
.iter()
.find(|d| d.name == domain)
.map(|d| d.kind.clone())
.unwrap()
}
/// returns (type name, invariant docs)
fn resolve_type(domain: &str, asserts: &[Assert]) -> Result<(String, String)> {
fn get_invariants_with_type(domain: &str, asserts: &[Assert]) -> (String, String) {
let additional_docs = invariants(asserts.iter());
let type_name = domain.to_upper_camel_case();
Ok((type_name, additional_docs))
(type_name, additional_docs)
}
fn snake_case(ident: &str) -> String {

View file

@ -1,5 +1,6 @@
use crate::{Amqp, Class, Domain, Method};
use anyhow::Result;
use crate::{
field_type, resolve_type_from_domain, snake_case, Amqp, Assert, Class, Domain, Method,
};
use heck::{ToSnakeCase, ToUpperCamelCase};
use itertools::Itertools;
@ -15,7 +16,7 @@ fn domain_function_name(domain_name: &str) -> String {
format!("domain_{domain_name}")
}
pub(crate) fn codegen_parser(amqp: &Amqp) -> Result<()> {
pub(crate) fn codegen_parser(amqp: &Amqp) {
println!(
"pub mod parse {{
use super::*;
@ -39,7 +40,7 @@ pub type IResult<'a, T> = nom::IResult<&'a [u8], T, TransError>;
);
for domain in &amqp.domains {
domain_parser(domain)?;
domain_parser(domain);
}
for class in &amqp.classes {
@ -56,78 +57,120 @@ pub type IResult<'a, T> = nom::IResult<&'a [u8], T, TransError>;
" let (input, _) = tag([{class_index}])(input)?;
alt(({all_methods}))(input)"
);
Ok(())
})?;
});
for method in &class.methods {
method_parser(class, method)?;
method_parser(amqp, class, method);
}
}
println!("\n}}");
Ok(())
}
fn domain_parser(domain: &Domain) -> Result<()> {
fn domain_parser(domain: &Domain) {
let fn_name = domain_function_name(&domain.name);
let type_name = domain.kind.to_snake_case();
function(&fn_name, &domain.name.to_upper_camel_case(), || {
if domain.asserts.is_empty() {
if type_name == "bit" {
println!(" todo!() // bit")
} else {
// don't even bother with bit domains, do them manually at call site
if type_name != "bit" {
function(&fn_name, &domain.name.to_upper_camel_case(), || {
if domain.asserts.is_empty() {
println!(" {type_name}(input)");
}
} else {
println!(" let (input, result) = {type_name}(input)?;");
} else {
println!(" let (input, result) = {type_name}(input)?;");
for assert in &domain.asserts {
match &*assert.check {
"notnull" => { /* todo */ }
"regexp" => {
let value = assert.value.as_ref().unwrap();
println!(
r#" static REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"{value}").unwrap());"#
);
println!(" if !REGEX.is_match(&result) {{ fail!() }}");
}
"le" => {} // can't validate this here
"length" => {
let length = assert.value.as_ref().unwrap();
println!(" if result.len() > {length} {{ fail!() }}");
}
_ => unimplemented!(),
for assert in &domain.asserts {
assert_check(assert, &type_name, "result");
}
println!(" Ok((input, result))");
}
println!(" Ok((input, result))");
}
Ok(())
})
});
}
}
fn method_parser(class: &Class, method: &Method) -> Result<()> {
fn method_parser(amqp: &Amqp, class: &Class, method: &Method) {
let class_name = class.name.to_snake_case();
let function_name = method_function_name(&class_name)(method);
function(&function_name, "Class", || {
let method_index = method.index;
println!(" let (input, _) = tag([{method_index}])(input)?;");
println!(" todo!()");
for _field in &method.fields {}
Ok(())
})?;
let mut iter = method.fields.iter().peekable();
while let Some(field) = iter.next() {
let type_name = resolve_type_from_domain(amqp, field_type(field));
Ok(())
if type_name == "bit" {
let mut fields_with_bit = vec![field];
loop {
if iter
.peek()
.map(|f| resolve_type_from_domain(amqp, field_type(f)) == "bit")
.unwrap_or(false)
{
fields_with_bit.push(iter.next().unwrap());
} else {
break;
}
}
let amount = fields_with_bit.len();
println!(" let (input, bits) = bit(input, {amount})?;");
for (i, field) in fields_with_bit.iter().enumerate() {
let field_name = snake_case(&field.name);
println!(" let {field_name} = bits[{i}];");
}
} else {
let fn_name = domain_function_name(field_type(field));
let field_name = snake_case(&field.name);
println!(" let (input, {field_name}) = {fn_name}(input)?;");
for assert in &field.asserts {
assert_check(assert, &type_name, &field_name);
}
}
}
let class_name = class_name.to_upper_camel_case();
let method_name = method.name.to_upper_camel_case();
println!(" Ok((input, Class::{class_name}({class_name}::{method_name} {{");
for field in &method.fields {
let field_name = snake_case(&field.name);
println!(" {field_name},");
}
println!(" }})))");
});
}
fn function<F>(name: &str, ret_ty: &str, body: F) -> Result<()>
fn assert_check(assert: &Assert, type_name: &str, var_name: &str) {
match &*assert.check {
"notnull" => match type_name {
"shortstr" | "longstr" => {
println!(" if {var_name}.is_empty() {{ fail!() }}")
}
"short" => println!(" if {var_name} == 0 {{ fail!() }}"),
_ => unimplemented!(),
},
"regexp" => {
let value = assert.value.as_ref().unwrap();
println!(
r#" static REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"{value}").unwrap());"#
);
println!(" if !REGEX.is_match(&{var_name}) {{ fail!() }}");
}
"le" => {} // can't validate this here
"length" => {
let length = assert.value.as_ref().unwrap();
println!(" if {var_name}.len() > {length} {{ fail!() }}");
}
_ => unimplemented!(),
}
}
fn function<F>(name: &str, ret_ty: &str, body: F)
where
F: FnOnce() -> Result<()>,
F: FnOnce(),
{
println!("fn {name}(input: &[u8]) -> IResult<{ret_ty}> {{");
body()?;
body();
println!("}}");
Ok(())
}

File diff suppressed because it is too large Load diff

View file

@ -1,3 +1,5 @@
use crate::classes::generated::Class;
use crate::error::{ConException, ProtocolError, TransError};
use std::collections::HashMap;
mod generated;
@ -5,6 +7,7 @@ mod parse_helper;
pub type Table = HashMap<String, FieldValue>;
#[derive(Debug, Clone, PartialEq)]
pub enum FieldValue {
Boolean(bool),
ShortShortInt(i8),
@ -25,3 +28,19 @@ pub enum FieldValue {
FieldTable(Table),
Void,
}
pub use generated::*;
/// Parses the payload of a method frame into the class/method
pub fn parse_method(payload: &[u8]) -> Result<Class, TransError> {
let nom_result = generated::parse::parse_method(payload);
match nom_result {
Ok(([], class)) => Ok(class),
Ok((_, _)) => Err(ProtocolError::ConException(ConException::SyntaxError).into()),
Err(nom::Err::Incomplete(_)) => {
Err(ProtocolError::ConException(ConException::SyntaxError).into())
}
Err(nom::Err::Failure(err) | nom::Err::Error(err)) => Err(err),
}
}

View file

@ -39,6 +39,7 @@ pub fn long(input: &[u8]) -> IResult<Long> {
pub fn longlong(input: &[u8]) -> IResult<Longlong> {
todo!()
}
// todo: doing this using a vec is a bit wasteful, consider not doing that
pub fn bit(input: &[u8], amount: u8) -> IResult<Vec<Bit>> {
todo!()
}

View file

@ -1,5 +1,6 @@
use crate::error::{ProtocolError, TransError};
use crate::frame;
use crate::frame::FrameType;
use anyhow::Context;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::TcpStream;
@ -27,6 +28,10 @@ impl Connection {
loop {
let frame = frame::read_frame(&mut self.stream, 10000).await?;
debug!(?frame, "received frame");
if frame.kind == FrameType::Method {
let class = super::classes::parse_method(&frame.payload)?;
debug!(?class, "was method frame");
}
}
}

View file

@ -14,10 +14,10 @@ mod frame_type {
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Frame {
/// The type of the frame including its parsed metadata.
kind: FrameType,
channel: u16,
pub kind: FrameType,
pub channel: u16,
/// Includes the whole payload, also including the metadata from each type.
payload: Vec<u8>,
pub payload: Vec<u8>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
@ -29,23 +29,6 @@ pub enum FrameType {
Heartbeat = 8,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FrameTypeEnum {
/// 1
Method,
/// 2
Header {
class_id: u16,
body_size: u64,
/// Ordered from high to low
property_flags: u16,
},
/// 3
Body,
/// 8
Heartbeat,
}
pub async fn read_frame<R>(r: &mut R, max_frame_size: usize) -> Result<Frame, TransError>
where
R: AsyncReadExt + Unpin,