From 865ba45dc73f29257e160d7dd50d8a8b3b16e0f8 Mon Sep 17 00:00:00 2001 From: nils <48135649+Nilstrieb@users.noreply.github.com> Date: Thu, 26 Aug 2021 16:21:54 +0200 Subject: [PATCH 1/4] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c2be276..eb5a785 100644 --- a/README.md +++ b/README.md @@ -8,4 +8,5 @@ my goal is not to make a fully working jvm that will run your spring application it would be amazing if it even managed to run a hello world ## what i have for now: -* (Hopefully) working complete `.class` file parser +* Almost working complete `.class` file parser +* Primitive file info for `.class` files similar to `javap` From 54ca19a45a27388d608abe4511279f03b0bf6e1d Mon Sep 17 00:00:00 2001 From: Nilstrieb Date: Sat, 28 Aug 2021 23:38:37 +0200 Subject: [PATCH 2/4] Field and Method descriptor parsing with tests --- crates/class-struct/src/lib.rs | 153 ++++++++++++++++++++++++++++---- crates/class-struct/src/test.rs | 126 ++++++++++++++++++++++++++ 2 files changed, 263 insertions(+), 16 deletions(-) create mode 100644 crates/class-struct/src/test.rs diff --git a/crates/class-struct/src/lib.rs b/crates/class-struct/src/lib.rs index 05cf7a5..3e82d2a 100644 --- a/crates/class-struct/src/lib.rs +++ b/crates/class-struct/src/lib.rs @@ -1,23 +1,144 @@ #![allow(dead_code)] -struct MethodSignature { - args: Vec, - return_t: Type, +#[cfg(test)] +mod test; + +use std::borrow::Cow; +use std::str::FromStr; + +#[derive(Debug)] +pub struct ParseErr(pub Cow<'static, str>); + +impl ParseErr { + pub fn str(str: &'static str) -> Self { + Self(Cow::Borrowed(str)) + } + pub fn string(str: String) -> Self { + Self(Cow::Owned(str)) + } } -/// A Java type, found in signatures -enum Type { +/// A field descriptor for the type of a field in a class +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub struct FieldDescriptor(pub FieldType); + +/// The type of a field or method parameter +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub enum FieldType { + /// B + Byte, + /// C + Char, + /// D + Double, + /// F + Float, + /// I + Int, + /// J + Long, + /// L `ClassName` ; + Object(String), + /// S + Short, + /// Z + Boolean, + /// [ + Array(Box), +} + +/// A method descriptor for the type of a method in a class +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub struct MethodDescriptor { + parameters: Vec, + return_: MethodType, +} + +/// The type of a method +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub enum MethodType { + Some(FieldType), /// V Void, - /// B - Boolean, - Byte, - Short, - Int, - Long, - Float, - Double, - Object, - /// [ - Array(Box), +} + +impl FromStr for FieldDescriptor { + type Err = ParseErr; + + fn from_str(s: &str) -> Result { + Ok(Self(FieldType::from_char_iter(&mut s.chars())?)) + } +} + +impl FieldType { + /// Consumes as much chars as needed from the char iterator and tries to parse itself + pub fn from_char_iter(chars: &mut I) -> Result + where + I: Iterator, + { + let first = chars.next().ok_or_else(|| ParseErr::str("Empty string"))?; + Ok(match first { + 'B' => Self::Byte, + 'C' => Self::Char, + 'D' => Self::Double, + 'F' => Self::Float, + 'I' => Self::Int, + 'J' => Self::Long, + 'L' => Self::Object({ + let mut name = String::with_capacity(32); // we can expect ClassNames to be at least this long + loop { + let char = chars + .next() + .ok_or_else(|| ParseErr::str("Expected ; before end of string"))?; + + if char == ';' { + break; + }; + name.push(char); + } + name + }), + 'S' => Self::Short, + 'Z' => Self::Boolean, + '[' => Self::Array(Box::new(Self::from_char_iter(chars)?)), + c => { + return Err(ParseErr::string(format!( + "Invalid char in field descriptor {}", + c + ))) + } + }) + } +} + +impl FromStr for MethodDescriptor { + type Err = ParseErr; + + fn from_str(s: &str) -> Result { + let mut chars = s.chars().peekable(); + if chars.next().ok_or_else(|| ParseErr::str("Empty string"))? != '(' { + return Err(ParseErr::str("Needs to start with '('")); + } + + let mut parameters = Vec::new(); + + loop { + if let Some(')') = chars.peek() { + let _ = chars.next(); // consume the ) + break; + } + parameters.push(FieldType::from_char_iter(&mut chars)?); + } + + let return_ = if let Some('V') = chars.peek() { + MethodType::Void + } else { + MethodType::Some(FieldType::from_char_iter(&mut chars)?) + }; + + Ok(Self { + parameters, + return_, + }) + } } diff --git a/crates/class-struct/src/test.rs b/crates/class-struct/src/test.rs new file mode 100644 index 0000000..d858920 --- /dev/null +++ b/crates/class-struct/src/test.rs @@ -0,0 +1,126 @@ +use super::*; + +#[test] +fn field_descriptor() { + let descriptors = [ + FieldDescriptor::from_str("B").unwrap(), + FieldDescriptor::from_str("C").unwrap(), + FieldDescriptor::from_str("D").unwrap(), + FieldDescriptor::from_str("F").unwrap(), + FieldDescriptor::from_str("I").unwrap(), + FieldDescriptor::from_str("J").unwrap(), + FieldDescriptor::from_str("S").unwrap(), + FieldDescriptor::from_str("Z").unwrap(), + FieldDescriptor::from_str("[B").unwrap(), + FieldDescriptor::from_str("[[Z").unwrap(), + FieldDescriptor::from_str("Ljava/lang/String;").unwrap(), + FieldDescriptor::from_str("[[[Ljava/lang/String;").unwrap(), + ]; + + type FT = FieldType; + + let expected_descriptors = [ + FieldDescriptor(FT::Byte), + FieldDescriptor(FT::Char), + FieldDescriptor(FT::Double), + FieldDescriptor(FT::Float), + FieldDescriptor(FT::Int), + FieldDescriptor(FT::Long), + FieldDescriptor(FT::Short), + FieldDescriptor(FT::Boolean), + FieldDescriptor(FT::Array(Box::new(FT::Byte))), + FieldDescriptor(FT::Array(Box::new(FT::Array(Box::new(FT::Boolean))))), + FieldDescriptor(FT::Object("java/lang/String".to_string())), + FieldDescriptor(FT::Array(Box::new(FT::Array(Box::new(FT::Array( + Box::new(FT::Object("java/lang/String".to_string())), + )))))), + ]; + + let invalid_descriptors = ["", "Q", "[]", "[", "Ljava/lang/String", "L", "[[[Ljava"]; + + descriptors + .iter() + .zip(expected_descriptors.iter()) + .for_each(|(a, b)| assert_eq!(a, b)); + + invalid_descriptors + .iter() + .map(|d| FieldDescriptor::from_str(d)) + .for_each(|rs| { + if rs.is_ok() { + panic!("Successfully parsed invalid result, {:?}", rs); + } + }); +} + +#[test] +fn method_descriptor() { + let descriptors = vec![ + MethodDescriptor::from_str("()V").unwrap(), + MethodDescriptor::from_str("(B)V").unwrap(), + MethodDescriptor::from_str("([ZZ)Ljava/lang/Object;").unwrap(), + MethodDescriptor::from_str("(IDLjava/lang/Thread;)Ljava/lang/Object;").unwrap(), + MethodDescriptor::from_str("(BBBBBBBBBB)B").unwrap(), + MethodDescriptor::from_str("()Z").unwrap(), + ]; + + type FT = FieldType; + + let expected_descriptors = [ + MethodDescriptor { + parameters: vec![], + return_: MethodType::Void, + }, + MethodDescriptor { + parameters: vec![FT::Byte], + return_: MethodType::Void, + }, + MethodDescriptor { + parameters: vec![FT::Array(Box::new(FT::Boolean)), FT::Boolean], + return_: MethodType::Some(FT::Object("java/lang/Object".to_string())), + }, + MethodDescriptor { + parameters: vec![ + FT::Int, + FT::Double, + FT::Object("java/lang/Thread".to_string()), + ], + return_: MethodType::Some(FT::Object("java/lang/Object".to_string())), + }, + MethodDescriptor { + parameters: vec![ + FT::Byte, + FT::Byte, + FT::Byte, + FT::Byte, + FT::Byte, + FT::Byte, + FT::Byte, + FT::Byte, + FT::Byte, + FT::Byte, + ], + return_: MethodType::Some(FT::Byte), + }, + MethodDescriptor { + parameters: vec![], + return_: MethodType::Some(FT::Boolean), + }, + ]; + + let invalid_descriptors = ["()", "(V)V", ")V", "(;)Z", "(java/lang/StringZ)", "V"]; + + invalid_descriptors + .iter() + .map(|d| MethodDescriptor::from_str(d)) + .for_each(|rs| { + if rs.is_ok() { + panic!("Successfully parsed invalid result, {:?}", rs); + } + }); + + descriptors + .iter() + .zip(expected_descriptors.iter()) + .for_each(|(a, b)| assert_eq!(a, b)); +} From 8376d22aa60587392550cdcb3a29f605a49ef43d Mon Sep 17 00:00:00 2001 From: Nilstrieb Date: Sat, 28 Aug 2021 23:51:43 +0200 Subject: [PATCH 3/4] moving to Java SE16 everything should be done except the attributes --- crates/file-parser/src/lib.rs | 20 +++++++++++++++- crates/file-parser/src/model/cp_info.rs | 31 +++++++++++++++++-------- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/crates/file-parser/src/lib.rs b/crates/file-parser/src/lib.rs index 6b7ac86..3ca621f 100644 --- a/crates/file-parser/src/lib.rs +++ b/crates/file-parser/src/lib.rs @@ -165,7 +165,6 @@ impl Parse for ClassFile { impl Parse for CpInfo { fn parse(data: &mut Data, cp: &[CpInfo]) -> Result { let tag = data.u1()?; - dbg!(tag); Ok(match tag { 7 => Self { @@ -261,6 +260,13 @@ impl Parse for CpInfo { descriptor_index: data.cp(cp)?, }), }, + 17 => Self { + tag, + inner: CpInfoInner::Dynamic(cp_info::Dynamic { + bootstrap_method_attr_index: data.u2()?, + name_and_type_index: data.cp(cp)?, + }), + }, 18 => Self { tag, inner: CpInfoInner::InvokeDynamic(cp_info::InvokeDynamic { @@ -268,6 +274,18 @@ impl Parse for CpInfo { name_and_type_index: data.cp(cp)?, }), }, + 19 => Self { + tag, + inner: CpInfoInner::Module(cp_info::Module { + name_index: data.cp(cp)?, + }), + }, + 20 => Self { + tag, + inner: CpInfoInner::Package(cp_info::Package { + name_index: data.cp(cp)?, + }), + }, _ => return Err(ParseErr(format!("Invalid CPInfo tag: {}", tag))), }) } diff --git a/crates/file-parser/src/model/cp_info.rs b/crates/file-parser/src/model/cp_info.rs index 101dd03..8221eae 100644 --- a/crates/file-parser/src/model/cp_info.rs +++ b/crates/file-parser/src/model/cp_info.rs @@ -159,7 +159,6 @@ impl ValidateCpInfo for CpInfoInner { #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub struct Class { - /// Entry must be `Utf8` pub name_index: FromPool, } @@ -167,7 +166,6 @@ pub struct Class { pub struct Fieldref { /// May be a class or interface type pub class_index: FromPool, - /// Entry must be `NameAndType` pub name_and_type_index: FromPool, } @@ -175,7 +173,6 @@ pub struct Fieldref { pub struct MethodRef { /// Must be a class type pub class_index: FromPool, - /// Entry must be `NameAndType` pub name_and_type_index: FromPool, } @@ -183,13 +180,11 @@ pub struct MethodRef { pub struct InterfaceMethodref { /// Must be an interface type pub class_index: FromPool, - /// Entry must be `NameAndType` pub name_and_type_index: FromPool, } #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub struct String { - /// Entry must be `Utf8` pub string_index: FromPool, } @@ -226,9 +221,7 @@ pub struct Double { /// Any field or method, without the class it belongs to #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub struct NameAndType { - /// Entry must be `Utf8` pub name_index: FromPool, - /// Entry must be `Utf8` pub descriptor_index: FromPool, } @@ -256,18 +249,33 @@ pub enum MethodHandleIndex { #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub struct MethodType { - /// Entry must be `Utf8` pub descriptor_index: FromPool, } +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub struct Dynamic { + /// Must be a valid index into the `bootstrap_methods` array of the bootstrap method table of this class field + pub bootstrap_method_attr_index: u2, + pub name_and_type_index: FromPool, +} + #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub struct InvokeDynamic { /// Must be a valid index into the `bootstrap_methods` array of the bootstrap method table of this class field pub bootstrap_method_attr_index: u2, - /// Entry must `NameAndType` pub name_and_type_index: FromPool, } +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub struct Module { + pub name_index: FromPool, +} + +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub struct Package { + pub name_index: FromPool, +} + // default implementations impl_try_from_cp!( @@ -283,7 +291,10 @@ impl_try_from_cp!( NameAndType, MethodHandle, MethodType, - InvokeDynamic + Dynamic, + InvokeDynamic, + Module, + Package ); impl ValidateCpInfo for Utf8 { From 98efb3166043453cd4792ea508ddf4fc6ff85a9e Mon Sep 17 00:00:00 2001 From: Nilstrieb Date: Sat, 28 Aug 2021 23:51:56 +0200 Subject: [PATCH 4/4] moving to Java SE16 everything should be done except the attributes --- crates/file-parser/src/model/mod.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/crates/file-parser/src/model/mod.rs b/crates/file-parser/src/model/mod.rs index 73b3c7d..946f8fb 100644 --- a/crates/file-parser/src/model/mod.rs +++ b/crates/file-parser/src/model/mod.rs @@ -2,6 +2,8 @@ //! The models for a .class file //! //! [The .class specs](https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html) +//! +//! todo poart to [SE16](https://docs.oracle.com/javase/specs/jvms/se16/html/jvms-4.html) #![allow(dead_code)] /// All of the Constants in the Constant Pool @@ -74,7 +76,10 @@ pub enum CpInfoInner { Utf8(cp_info::Utf8), MethodHandle(cp_info::MethodHandle), MethodType(cp_info::MethodType), + Dynamic(cp_info::Dynamic), InvokeDynamic(cp_info::InvokeDynamic), + Module(cp_info::Module), + Package(cp_info::Package), } /// Information about a field @@ -222,6 +227,14 @@ pub enum AttributeInfoInner { BootstrapMethods { bootstrap_methods: Vec, }, + // todo + MethodParameters, + Module, + ModulePackages, + ModuleMainClass, + NestHost, + NestMembers, + Record, } /// An exception handler in the JVM bytecode array @@ -450,6 +463,8 @@ pub enum ClassAccessFlag { Annotation = 0x2000, /// Declared as an enum type. Enum = 0x4000, + /// Is a module, not a class or interface. + MODULE = 0x8000, } /// Access Flags of a method