restructure structure

This commit is contained in:
nora 2021-12-15 21:36:37 +01:00
parent 1a920ab9af
commit 1186e70e69
22 changed files with 196 additions and 24 deletions

8
cs_parser/Cargo.toml Normal file
View file

@ -0,0 +1,8 @@
[package]
name = "cs_parser"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

765
cs_parser/src/lib.rs Normal file
View file

@ -0,0 +1,765 @@
mod model;
#[cfg(test)]
mod test;
use crate::cp_info::ValidateCpInfo;
pub use model::*;
use std::fmt::{Display, Formatter};
#[derive(Debug)]
pub struct ParseErr(String);
impl Display for ParseErr {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "Could not parse class file: {}", self.0)
}
}
impl std::error::Error for ParseErr {}
pub type Result<T> = std::result::Result<T, ParseErr>;
#[derive(Clone)]
struct Data<'a> {
data: &'a [u1],
pointer: usize,
}
/// Parses the class file into a `ClassFile` structure
pub fn parse_class_file(data: &[u1]) -> Result<ClassFile> {
let mut data = Data::new(data);
ClassFile::parse(&mut data, &[])
}
impl<'a> Data<'a> {
fn new(data: &'a [u1]) -> Self {
Data { data, pointer: 0 }
}
fn u1(&mut self) -> Result<u1> {
let item = self.data.get(self.pointer).cloned();
self.pointer += 1;
item.ok_or_else(|| ParseErr("No u1 left".to_string()))
}
fn u2(&mut self) -> Result<u2> {
Ok(((self.u1()? as u2) << 8) | self.u1()? as u2)
}
/// Parses a u2 and validates it in the constant pool
fn cp<T: ValidateCpInfo>(&mut self, pool: &[CpInfo]) -> Result<FromPool<T>> {
let index = self.u2()?;
T::validate_cp_info(pool, index)?;
Ok(index.into())
}
fn u4(&mut self) -> Result<u4> {
Ok(((self.u2()? as u4) << 16) | self.u2()? as u4)
}
fn last_u1(&self) -> Result<u1> {
self.data
.get(self.pointer - 1)
.cloned()
.ok_or_else(|| ParseErr("Last u1 not found".to_string()))
}
fn last_u2(&self) -> Result<u2> {
let last2u1 = self
.data
.get(self.pointer - 2)
.cloned()
.ok_or_else(|| ParseErr("Last u2 not found".to_string()))?;
Ok(((last2u1 as u2) << 8) | self.last_u1()? as u2)
}
fn last_u4(&self) -> Result<u4> {
let last2u1 = self
.data
.get(self.pointer - 3)
.cloned()
.ok_or_else(|| ParseErr("Last 2 u1 in last u4 not found".to_string()))?;
let last3u1 = self
.data
.get(self.pointer - 4)
.cloned()
.ok_or_else(|| ParseErr("Last 3 u1 in last u4 not found".to_string()))?;
Ok(((last3u1 as u4) << 24) | ((last2u1 as u4) << 16) | self.last_u2()? as u4)
}
}
trait Parse {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self>
where
Self: Sized;
}
fn parse_vec<T: Parse, S: Into<usize>>(len: S, data: &mut Data, cp: &[CpInfo]) -> Result<Vec<T>> {
let len = len.into();
let mut vec = Vec::with_capacity(len);
for _ in 0..len {
vec.push(T::parse(data, cp)?);
}
Ok(vec)
}
macro_rules! parse_primitive {
($($value:ident),*) => {
$(impl Parse for $value {
fn parse(data: &mut Data, _cp: &[CpInfo]) -> Result<Self>
where
Self: Sized,
{
data.$value()
}
})*
};
}
parse_primitive!(u1, u2, u4);
impl<T> Parse for FromPool<T>
where
T: ValidateCpInfo,
{
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
data.cp(cp)
}
}
impl Parse for ClassFile {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
let magic = data.u4()?;
assert_eq!(magic, 0xCAFEBABE);
let minor_version = data.u2()?;
let major_version = data.u2()?;
dbg!("reached constant pool");
let constant_pool = parse_vec(data.u2()? - 1, data, cp)?; // the minus one is important
dbg!("after constant pool");
let cp = &constant_pool;
let access_flags = data.u2()?;
let this_class = data.cp(cp)?;
let super_class = data.cp(cp)?;
let interfaces = parse_vec(data.u2()?, data, cp)?;
let fields = parse_vec(data.u2()?, data, cp)?;
let methods = parse_vec(data.u2()?, data, cp)?;
let attributes = parse_vec(data.u2()?, data, cp)?;
let mut class = Self {
magic,
minor_version,
major_version,
constant_pool,
access_flags,
this_class,
super_class,
interfaces,
fields,
methods,
attributes,
};
resolve_attributes(&mut class)?;
Ok(class)
}
}
impl Parse for CpInfo {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
let tag = data.u1()?;
Ok(match tag {
7 => Self {
tag,
inner: CpInfoInner::Class(cp_info::Class {
name_index: data.cp(cp)?,
}),
},
9 => Self {
tag,
inner: CpInfoInner::Fieldref(cp_info::Fieldref {
class_index: data.cp(cp)?,
name_and_type_index: data.cp(cp)?,
}),
},
10 => Self {
tag,
inner: CpInfoInner::MethodRef(cp_info::MethodRef {
class_index: data.cp(cp)?,
name_and_type_index: data.cp(cp)?,
}),
},
11 => Self {
tag,
inner: CpInfoInner::InterfaceMethodref(cp_info::InterfaceMethodref {
class_index: data.cp(cp)?,
name_and_type_index: data.cp(cp)?,
}),
},
8 => Self {
tag,
inner: CpInfoInner::String(cp_info::String {
string_index: data.cp(cp)?,
}),
},
3 => Self {
tag,
inner: CpInfoInner::Integer(cp_info::Integer { bytes: data.u4()? }),
},
4 => Self {
tag,
inner: CpInfoInner::Float(cp_info::Float { bytes: data.u4()? }),
},
5 => Self {
tag,
inner: CpInfoInner::Long(cp_info::Long {
high_bytes: data.u4()?,
low_bytes: data.u4()?,
}),
},
6 => Self {
tag,
inner: CpInfoInner::Double(cp_info::Double {
high_bytes: data.u4()?,
low_bytes: data.u4()?,
}),
},
12 => Self {
tag,
inner: CpInfoInner::NameAndType(cp_info::NameAndType {
name_index: data.cp(cp)?,
descriptor_index: data.cp(cp)?,
}),
},
1 => Self {
tag,
inner: CpInfoInner::Utf8(cp_info::Utf8 {
bytes: String::from_utf8(parse_vec(data.u2()?, data, cp)?).map_err(|err| {
ParseErr(format!("Invalid utf8 in CpInfo::Utf8: {}", err))
})?,
}),
},
15 => Self {
tag,
inner: CpInfoInner::MethodHandle(cp_info::MethodHandle {
reference_kind: data.u1()?,
reference_index: match data.last_u1()? {
1..=4 => cp_info::MethodHandleIndex::Field(data.cp(cp)?),
5..=8 => cp_info::MethodHandleIndex::Method(data.cp(cp)?),
9 => cp_info::MethodHandleIndex::Interface(data.cp(cp)?),
n => {
return Err(ParseErr(format!(
"Invalid MethodHandle reference kind: {}",
n
)))
}
},
}),
},
16 => Self {
tag,
inner: CpInfoInner::MethodType(cp_info::MethodType {
descriptor_index: data.cp(cp)?,
}),
},
17 => Self {
tag,
inner: CpInfoInner::Dynamic(cp_info::Dynamic {
bootstrap_method_attr_index: data.u2()?,
name_and_type_index: data.cp(cp)?,
}),
},
18 => Self {
tag,
inner: CpInfoInner::InvokeDynamic(cp_info::InvokeDynamic {
bootstrap_method_attr_index: data.u2()?,
name_and_type_index: data.cp(cp)?,
}),
},
19 => Self {
tag,
inner: CpInfoInner::Module(cp_info::Module {
name_index: data.cp(cp)?,
}),
},
20 => Self {
tag,
inner: CpInfoInner::Package(cp_info::Package {
name_index: data.cp(cp)?,
}),
},
_ => return Err(ParseErr(format!("Invalid CPInfo tag: {}", tag))),
})
}
}
impl Parse for FieldInfo {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
Ok(Self {
access_flags: data.u2()?,
name_index: data.cp(cp)?,
descriptor_index: data.cp(cp)?,
attributes: parse_vec(data.u2()?, data, cp)?,
})
}
}
impl Parse for MethodInfo {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
Ok(Self {
access_flags: data.u2()?,
name_index: data.cp(cp)?,
descriptor_index: data.cp(cp)?,
attributes: parse_vec(data.u2()?, data, cp)?,
})
}
}
impl Parse for AttributeInfo {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
Ok(Self {
attribute_name_index: data.cp(cp)?,
attribute_length: data.u4()?,
inner: AttributeInfoInner::Unknown {
attribute_content: parse_vec(data.last_u4()? as usize, data, cp)?,
},
})
}
}
impl Parse for AttributeCodeException {
fn parse(data: &mut Data, _cp: &[CpInfo]) -> Result<Self> {
Ok(Self {
start_pc: data.last_u2()?,
end_pc: data.last_u2()?,
handler_pc: data.last_u2()?,
catch_type: data.last_u2()?,
})
}
}
impl Parse for StackMapFrame {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
let frame_type = data.u1()?;
Ok(match frame_type {
0..=63 => Self::SameFrame { frame_type },
64..=127 => Self::SameLocals1StackItemFrame {
frame_type,
stack: VerificationTypeInfo::parse(data, cp)?,
},
247 => Self::SameLocals1StackItemFrameExtended {
frame_type,
offset_delta: data.u2()?,
stack: VerificationTypeInfo::parse(data, cp)?,
},
246..=250 => Self::ChopFrame {
frame_type,
offset_delta: data.u2()?,
},
251 => Self::SameFrameExtended {
frame_type,
offset_delta: data.u2()?,
},
252..=254 => Self::AppendFrame {
frame_type,
offset_delta: data.u2()?,
locals: parse_vec(data.last_u2()?, data, cp)?,
},
255 => Self::FullFrame {
frame_type,
offset_delta: data.u2()?,
locals: parse_vec(data.u2()?, data, cp)?,
stack: parse_vec(data.u2()?, data, cp)?,
},
_ => {
return Err(ParseErr(format!(
"Invalid StackMapFrame type: {}",
frame_type
)))
}
})
}
}
impl Parse for VerificationTypeInfo {
fn parse(data: &mut Data, _cp: &[CpInfo]) -> Result<Self> {
let tag = data.u1()?;
Ok(match tag {
0 => Self::Top { tag },
1 => Self::Integer { tag },
2 => Self::Float { tag },
4 => Self::Long { tag },
3 => Self::Double { tag },
5 => Self::Null { tag },
6 => Self::UninitializedThis { tag },
7 => Self::Object {
tag,
cpool_index: data.u2()?.into(),
},
8 => Self::Uninitialized {
tag,
offset: data.u2()?,
},
_ => {
return Err(ParseErr(format!(
"Invalid VerificationTypeInfo tag: {}",
tag
)))
}
})
}
}
impl Parse for AttributeInnerClass {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
Ok(Self {
inner_class_info_index: data.cp(cp)?,
outer_class_info_index: data.cp(cp)?,
inner_class_name_index: data.cp(cp)?,
inner_class_access_flags: data.u2()?,
})
}
}
impl Parse for AttributeLineNumber {
fn parse(data: &mut Data, _cp: &[CpInfo]) -> Result<Self> {
Ok(Self {
start_pc: data.u2()?,
line_number: data.u2()?,
})
}
}
impl Parse for AttributeLocalVariableTable {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
Ok(Self {
start_pc: data.u2()?,
length: data.u2()?,
name_index: data.cp(cp)?,
descriptor_or_signature_index: data.cp(cp)?,
index: data.u2()?,
})
}
}
impl Parse for Annotation {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
Ok(Self {
type_index: data.cp(cp)?,
num_element_value_pairs: data.u2()?,
element_value_pairs: parse_vec(data.last_u2()?, data, cp)?,
})
}
}
impl Parse for AnnotationElementValuePair {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
Ok(Self {
element_name_index: data.cp(cp)?,
element_name_name: AnnotationElementValue::parse(data, cp)?,
})
}
}
impl Parse for AnnotationElementValue {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
Ok(Self {
tag: data.u1()?,
value: AnnotationElementValueValue::parse(data, cp)?,
})
}
}
impl Parse for AnnotationElementValueValue {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
let tag = data.last_u1()? as char;
Ok(match tag {
'B' | 'C' | 'D' | 'F' | 'I' | 'J' | 'S' | 'Z' | 's' => Self::ConstValueIndex {
index: data.u2()?.into(),
},
'e' => Self::EnumConstValue {
type_name_index: data.cp(cp)?,
const_name_index: data.cp(cp)?,
},
'c' => Self::ClassInfoIndex {
index: data.cp(cp)?,
},
'@' => Self::AnnotationValue {
annotation: Box::new(Annotation::parse(data, cp)?),
},
'[' => Self::ArrayValue {
values: parse_vec(data.u2()?, data, cp)?,
},
_ => {
return Err(ParseErr(format!(
"Invalid AnnotationElementValueValue tag: {}",
tag
)))
}
})
}
}
impl Parse for ParameterAnnotation {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
Ok(Self {
annotations: parse_vec(data.u2()?, data, cp)?,
})
}
}
impl Parse for BootstrapMethod {
fn parse(data: &mut Data, cp: &[CpInfo]) -> Result<Self> {
Ok(Self {
bootstrap_method_ref: data.cp(cp)?,
bootstrap_arguments: parse_vec(data.u2()?, data, cp)?,
})
}
}
fn resolve_attributes(class: &mut ClassFile) -> Result<()> {
let pool = &class.constant_pool;
class
.attributes
.iter_mut()
.map(|attr| attr.resolve_attribute(pool))
.collect::<Result<Vec<()>>>()?;
class
.methods
.iter_mut()
.map(|method| {
method
.attributes
.iter_mut()
.map(|attr| attr.resolve_attribute(pool))
.collect::<Result<Vec<()>>>()
})
.collect::<Result<Vec<_>>>()?;
class
.fields
.iter_mut()
.map(|method| {
method
.attributes
.iter_mut()
.map(|attr| attr.resolve_attribute(pool))
.collect::<Result<Vec<()>>>()
})
.collect::<Result<Vec<_>>>()?;
Ok(())
}
impl AttributeInfo {
fn resolve_attribute(&mut self, pool: &[CpInfo]) -> Result<()> {
// this is a borrow checker hack, but it works :(
let attr = std::mem::replace(
self,
AttributeInfo {
attribute_name_index: 0.into(),
attribute_length: 0,
inner: AttributeInfoInner::__Empty,
},
);
let (&index, &len, content) = match &attr {
AttributeInfo {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::Unknown { attribute_content },
} => (attribute_name_index, attribute_length, attribute_content),
_ => unreachable!("Attribute already resolved"),
};
let info = match pool.get((index.inner()) as usize - 1) {
Some(CpInfo {
inner: CpInfoInner::Utf8(cp_info::Utf8 { bytes, .. }),
..
}) => bytes,
Some(_) => return Err(ParseErr("Attribute name is not CpInfo::Utf8".to_string())),
_ => return Err(ParseErr("Constant Pool index out of Bounds".to_string())),
};
let mut data = Data::new(content);
self.resolve_attribute_inner(index, len, info, &mut data, pool)
}
fn resolve_attribute_inner(
&mut self,
attribute_name_index: FromPool<cp_info::Utf8>,
attribute_length: u32,
name: &str,
data: &mut Data,
cp: &[CpInfo],
) -> Result<()> {
let _ = std::mem::replace(
self,
match name {
"ConstantValue" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::ConstantValue {
constantvalue_index: data.cp(cp)?,
},
},
"Code" => {
let mut code = Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::Code {
max_stack: data.u2()?,
max_locals: data.u2()?,
code: parse_vec(data.u4()? as usize, data, cp)?,
exception_table: parse_vec(data.u2()?, data, cp)?,
attributes: parse_vec(data.u2()?, data, cp)?,
},
};
if let AttributeInfoInner::Code {
ref mut attributes, ..
} = code.inner
{
attributes
.iter_mut()
.map(|attr| attr.resolve_attribute(cp))
.collect::<Result<Vec<()>>>()?;
} else {
unreachable!()
}
code
}
"StackMapTable" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::StackMapTable {
number_of_entries: data.u2()?,
entries: parse_vec(data.last_u2()?, data, cp)?,
},
},
"Exceptions" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::Exceptions {
exception_index_table: parse_vec(data.u2()?, data, cp)?,
},
},
"InnerClasses" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::InnerClasses {
classes: parse_vec(data.u2()?, data, cp)?,
},
},
"EnclosingMethod" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::EnclosingMethod {
class_index: data.cp(cp)?,
method_index: data.cp(cp)?,
},
},
"Synthetic" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::Synthetic,
},
"Signature" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::Signature {
signature_index: data.cp(cp)?,
},
},
"SourceFile" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::SourceFile {
sourcefile_index: data.cp(cp)?,
},
},
"SourceDebugExtension" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::SourceDebugExtension {
debug_extension: parse_vec(data.last_u2()?, data, cp)?,
},
},
"LineNumberTable" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::LineNumberTable {
line_number_table: parse_vec(data.u2()?, data, cp)?,
},
},
"LocalVariableTable" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::LocalVariableTable {
local_variable_table: parse_vec(data.u2()?, data, cp)?,
},
},
"LocalVariableTypeTable" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::LocalVariableTypeTable {
local_variable_table: parse_vec(data.u2()?, data, cp)?,
},
},
"Deprecated" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::Deprecated,
},
"RuntimeVisibleAnnotations" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::RuntimeVisibleAnnotations {
annotations: parse_vec(data.u2()?, data, cp)?,
},
},
"RuntimeInvisibleAnnotations" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::RuntimeInvisibleAnnotations {
annotations: parse_vec(data.u2()?, data, cp)?,
},
},
"RuntimeVisibleParameterAnnotations" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::RuntimeVisibleParameterAnnotations {
parameter_annotations: parse_vec(data.u1()?, data, cp)?,
},
},
"RuntimeInvisibleParameterAnnotations" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::RuntimeInvisibleParameterAnnotations {
parameter_annotations: parse_vec(data.u1()?, data, cp)?,
},
},
"AnnotationDefault" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::AnnotationDefault {
default_value: AnnotationElementValue {
tag: data.u1()?,
value: AnnotationElementValueValue::parse(data, cp)?,
},
},
},
"BootstrapMethods" => Self {
attribute_name_index,
attribute_length,
inner: AttributeInfoInner::BootstrapMethods {
bootstrap_methods: parse_vec(data.u2()?, data, cp)?,
},
},
name => return Err(ParseErr(format!("Invalid Attribute name: {}", name))),
},
);
Ok(())
}
}

View file

@ -0,0 +1,333 @@
use crate::{u1, u2, u4, CpInfo, CpInfoInner, ParseErr};
use std::marker::PhantomData;
///
/// An index into the constant pool of the class
/// `T` -> What type the target value is supposed to be. Create an enum if multiple values can be there
///
/// The value this is pointing at must *always* be a entry of the correct type T
/// Type checking is done at parse time, so that the value can be get with minimal overhead
#[repr(transparent)]
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct FromPool<T> {
inner: u2,
_marker: PhantomData<fn() -> T>,
}
// could probably be derived if I chose a better marker
impl<T: Clone> Copy for FromPool<T> {}
impl<T> From<u2> for FromPool<T> {
#[inline]
fn from(n: u2) -> Self {
Self {
inner: n,
_marker: PhantomData,
}
}
}
impl<T> FromPool<T> {
#[inline]
pub const fn inner(&self) -> u2 {
self.inner
}
}
impl<'pool, T> FromPool<T>
where
T: FromCpInfo<'pool>,
{
#[inline]
pub fn get(&self, pool: &'pool [CpInfo]) -> T::Target {
T::from_cp_info_with_index(pool, self.inner)
}
}
impl<'pool, T> FromPool<Option<T>>
where
T: FromCpInfo<'pool>,
{
#[inline]
pub fn maybe_get(&self, pool: &'pool [CpInfo]) -> Option<T::Target> {
if self.inner == 0 {
None
} else {
Some(T::from_cp_info_with_index(pool, self.inner))
}
}
}
pub trait ValidateCpInfo {
/// check that the constant pool entry has the correct type
/// `index` is the original, non-null index (it can be 0 optional constants)
fn validate_cp_info(info: &[CpInfo], index: u2) -> Result<(), ParseErr>;
}
pub trait FromCpInfo<'pool>: ValidateCpInfo {
type Target;
fn from_cp_info(info: &'pool CpInfo) -> Self::Target;
fn from_cp_info_with_index(info: &'pool [CpInfo], index: u2) -> Self::Target {
Self::from_cp_info(&info[index as usize - 1])
}
}
impl<'pool, T> FromCpInfo<'pool> for Option<T>
where
T: FromCpInfo<'pool>,
{
type Target = Option<T::Target>;
#[inline]
fn from_cp_info(_info: &'pool CpInfo) -> Self::Target {
unreachable!("FromPool<Option<T>> should always be get through `from_cp_info_with_index`")
}
fn from_cp_info_with_index(info: &'pool [CpInfo], index: u2) -> Self::Target {
if index == 0 {
None
} else {
Some(T::from_cp_info_with_index(info, index))
}
}
}
impl<T> ValidateCpInfo for Option<T>
where
T: ValidateCpInfo,
{
fn validate_cp_info(info: &[CpInfo], index: u2) -> Result<(), ParseErr> {
if index == 0 {
Ok(())
} else {
T::validate_cp_info(info, index)
}
}
}
macro_rules! impl_try_from_cp {
($($name:ident),*) => {
$(
impl<'pool> FromCpInfo<'pool> for $name {
type Target = &'pool Self;
#[inline]
fn from_cp_info(info: &'pool CpInfo) -> Self::Target {
match &info.inner {
CpInfoInner::$name(class) => class,
_kind => unreachable!(),
}
}
}
impl ValidateCpInfo for $name {
fn validate_cp_info(info: &[CpInfo], index: u2) -> Result<(), ParseErr> {
if index == 0 {
return Err(ParseErr("Index must not be 0".to_string()));
}
if info.len() == 0 {
return Ok(());
}
// todo this here might actually be an empty constant pool depending on whether is is still parsing the constant pool
// it needs to be checked after testing
// not now
// pls
// i hate this
match &info[index as usize - 1].inner {
CpInfoInner::$name(_) => Ok(()),
kind => Err(ParseErr(format!(
concat!("Expected '", stringify!($name), "', found '{:?}'"),
kind
))),
}
}
}
)*
};
}
impl<'pool> FromCpInfo<'pool> for CpInfoInner {
type Target = &'pool Self;
fn from_cp_info(info: &'pool CpInfo) -> Self::Target {
&info.inner
}
}
impl ValidateCpInfo for CpInfoInner {
fn validate_cp_info(_info: &[CpInfo], _index: u2) -> Result<(), ParseErr> {
Ok(())
}
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Class {
pub name_index: FromPool<Utf8>,
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Fieldref {
/// May be a class or interface type
pub class_index: FromPool<Class>,
pub name_and_type_index: FromPool<NameAndType>,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct MethodRef {
/// Must be a class type
pub class_index: FromPool<Class>,
pub name_and_type_index: FromPool<NameAndType>,
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct InterfaceMethodref {
/// Must be an interface type
pub class_index: FromPool<Class>,
pub name_and_type_index: FromPool<NameAndType>,
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct String {
pub string_index: FromPool<Utf8>,
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Integer {
// Big endian
pub bytes: u4,
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Float {
/// IEEE 754 floating-point single format, big endian
pub bytes: u4,
}
/// 8 byte constants take up two spaces in the constant pool
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Long {
/// Big endian
pub high_bytes: u4,
/// Big endian
pub low_bytes: u4,
}
/// 8 byte constants take up two spaces in the constant pool
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Double {
/// IEEE 754 floating-point double format, big endian
pub high_bytes: u4,
/// IEEE 754 floating-point double format, big endian
pub low_bytes: u4,
}
/// Any field or method, without the class it belongs to
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct NameAndType {
pub name_index: FromPool<Utf8>,
pub descriptor_index: FromPool<Utf8>,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct Utf8 {
/// Contains modified UTF-8
pub bytes: std::string::String,
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct MethodHandle {
/// The kind of method handle (0-9)
/// If the kind is 1-4, the entry must be `FieldRef`. If the kind is 5-8, the entry must be `MethodRef`
/// If the kind is 9, the entry must be `InterfaceMethodRef`
pub reference_kind: u1,
pub reference_index: MethodHandleIndex,
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub enum MethodHandleIndex {
Field(FromPool<Fieldref>),
Method(FromPool<MethodRef>),
Interface(FromPool<InterfaceMethodref>),
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct MethodType {
pub descriptor_index: FromPool<Utf8>,
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Dynamic {
/// Must be a valid index into the `bootstrap_methods` array of the bootstrap method table of this class field
pub bootstrap_method_attr_index: u2,
pub name_and_type_index: FromPool<NameAndType>,
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct InvokeDynamic {
/// Must be a valid index into the `bootstrap_methods` array of the bootstrap method table of this class field
pub bootstrap_method_attr_index: u2,
pub name_and_type_index: FromPool<NameAndType>,
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Module {
pub name_index: FromPool<Utf8>,
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Package {
pub name_index: FromPool<Utf8>,
}
// default implementations
impl_try_from_cp!(
Class,
Fieldref,
MethodRef,
InterfaceMethodref,
String,
Integer,
Float,
Long,
Double,
NameAndType,
MethodHandle,
MethodType,
Dynamic,
InvokeDynamic,
Module,
Package
);
impl ValidateCpInfo for Utf8 {
fn validate_cp_info(info: &[CpInfo], index: u2) -> Result<(), ParseErr> {
if index == 0 {
return Err(ParseErr("Index must not be 0".to_string()));
}
if info.len() == 0 {
return Ok(());
}
match &info[index as usize - 1].inner {
CpInfoInner::Utf8(_) => Ok(()),
kind => Err(ParseErr(format!(
concat!("Expected '", stringify!($name), "', found '{:?}'"),
kind
))),
}
}
}
// custom implementations
impl<'pool> FromCpInfo<'pool> for Utf8 {
type Target = &'pool str;
#[inline]
fn from_cp_info(info: &'pool CpInfo) -> Self::Target {
match &info.inner {
CpInfoInner::Utf8(class) => &class.bytes,
_ => unreachable!(),
}
}
}

612
cs_parser/src/model/mod.rs Normal file
View file

@ -0,0 +1,612 @@
//!
//! The models for a .class file
//!
//! [The .class specs](https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html)
//!
//! todo poart to [SE16](https://docs.oracle.com/javase/specs/jvms/se16/html/jvms-4.html)
#![allow(dead_code)]
/// All of the Constants in the Constant Pool
pub mod cp_info;
pub use cp_info::FromPool;
// The types used in the specs
#[allow(non_camel_case_types)]
pub type u1 = u8;
#[allow(non_camel_case_types)]
pub type u2 = u16;
#[allow(non_camel_case_types)]
pub type u4 = u32;
///
/// # Represents a .class file
///
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct ClassFile {
/// Magic number identifying the format (= 0xCAFEBABE)
pub magic: u4,
/// The version of the class file (.X)
pub minor_version: u2,
/// The version of the class file (X.)
pub major_version: u2,
/// `constant_pool_count` = Number of entries in the constant pool + 1
/// The constant pool. Indexed from 1 to constant_pool_count - 1
pub constant_pool: Vec<CpInfo>,
/// Mask of `ClassAccessFlag` used to denote access permissions
pub access_flags: u2,
/// A valid index into the `constant_pool` table. The entry must be a `Class`
pub this_class: FromPool<cp_info::Class>,
/// Zero or a valid index into the `constant_pool` table
pub super_class: FromPool<Option<cp_info::Class>>,
/// Each entry must be a valid index into the `constant_pool` table. The entry must be a `Class`
pub interfaces: Vec<FromPool<cp_info::Class>>,
/// All fields of the class. Contains only fields of the class itself
pub fields: Vec<FieldInfo>,
/// All methods of the class. If it's neither Native nor Abstract, the implementation has to be provided too
pub methods: Vec<MethodInfo>,
/// All attributes of the class
pub attributes: Vec<AttributeInfo>,
}
/// A constant from the constant pool
/// May have indices back to the constant pool, with expected types
/// _index: A valid index into the `constant_pool` table.
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct CpInfo {
pub tag: u1,
pub inner: CpInfoInner,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum CpInfoInner {
Class(cp_info::Class),
Fieldref(cp_info::Fieldref),
MethodRef(cp_info::MethodRef),
InterfaceMethodref(cp_info::InterfaceMethodref),
String(cp_info::String),
Integer(cp_info::Integer),
Float(cp_info::Float),
/// 8 byte constants take up two spaces in the constant pool
Long(cp_info::Long),
/// 8 byte constants take up two spaces in the constant pool
Double(cp_info::Double),
/// Any field or method, without the class it belongs to
NameAndType(cp_info::NameAndType),
Utf8(cp_info::Utf8),
MethodHandle(cp_info::MethodHandle),
MethodType(cp_info::MethodType),
Dynamic(cp_info::Dynamic),
InvokeDynamic(cp_info::InvokeDynamic),
Module(cp_info::Module),
Package(cp_info::Package),
}
/// Information about a field
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct FieldInfo {
/// Mask of `FieldAccessFlag` used to denote access permissions
pub access_flags: u2,
/// Entry must be `Utf8`
pub name_index: FromPool<cp_info::Utf8>,
/// Entry must be `Utf8`
pub descriptor_index: FromPool<cp_info::Utf8>,
pub attributes: Vec<AttributeInfo>,
}
/// Information about a method
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct MethodInfo {
/// Mask of `MethodAccessFlag` used to denote access permissions
pub access_flags: u2,
/// Index to the `constant_pool` of the method name, must be `Utf8`
pub name_index: FromPool<cp_info::Utf8>,
/// Index to the `constant_pool` of the method descriptor, must be `Utf8`
pub descriptor_index: FromPool<cp_info::Utf8>,
/// The attributes for this method
pub attributes: Vec<AttributeInfo>,
}
/// Information about an attribute
///
/// `attribute_name_index`: Index to the `constant_pool`, must be `Utf8`
/// `attribute_length`: The length of the subsequent bytes, does not include the first 6
///
/// _index: Index to the `constant_pool` table of any type
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct AttributeInfo {
pub attribute_name_index: FromPool<cp_info::Utf8>,
pub attribute_length: u4,
/// The attribute value
pub inner: AttributeInfoInner,
}
/// The Attributes, without the two common fields
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum AttributeInfoInner {
__Empty,
/// The exact kind of attribute is not known yet and will be resolved later in the process
Unknown {
attribute_content: Vec<u1>,
},
/// Only on fields, the constant value of that field
ConstantValue {
/// Must be of type `Long`/`Float`/`Double`/`Integer`/`String`
constantvalue_index: FromPool<CpInfoInner>,
},
/// Only on methods, contains JVM instructions and auxiliary information for a single method
Code {
/// The maximum depth of the operand stack for this method
max_stack: u2,
/// The number of the local variables array, including the parameters
max_locals: u2,
/// The JVM bytecode of this method
code: Vec<u1>,
/// The exception handlers for this method
exception_table: Vec<AttributeCodeException>,
/// The attributes of the code
attributes: Vec<AttributeInfo>,
},
/// Only on the `Code` attribute, used for verification
/// May be implicit on version >= 50.0, with no entries
StackMapTable {
number_of_entries: u2,
entries: Vec<StackMapFrame>,
},
/// Only on `MethodInfo`, indicates which checked exceptions might be thrown
Exceptions {
/// Must be a `Class` constant
exception_index_table: Vec<u2>,
},
/// Only on a `ClassFile`. Specifies the inner classes of a class
InnerClasses {
classes: Vec<AttributeInnerClass>,
},
/// Only on a `ClassFile`, required if it is local or anonymous
EnclosingMethod {
/// Must be a `Class` constant, the innermost enclosing class
class_index: FromPool<cp_info::Class>,
/// Must be zero or `NameAndType`
method_index: FromPool<cp_info::NameAndType>,
},
/// Can be on `ClassFile`, `FieldInfo`,or `MethodInfo`.
/// Every generated class has to have this attribute or the `Synthetic` Accessor modifier
Synthetic,
/// Can be on `ClassFile`, `FieldInfo`,or `MethodInfo`. Records generic signature information
Signature {
/// Must be `Utf8`, and a Class/Method/Field signature
signature_index: FromPool<cp_info::Utf8>,
},
/// Only on a `ClassFile`
SourceFile {
/// Must be `Utf8`, the name of the source filed
sourcefile_index: FromPool<cp_info::Utf8>,
},
/// Only on a `ClassFile`
SourceDebugExtension {
/// A modified UTF-8 of additional debugging information, `attribute_length`: number of items in `debug_extension`
debug_extension: Vec<u1>,
},
/// Only on the `Code` attribute. It includes line number information used by debuggers
LineNumberTable {
line_number_table: Vec<AttributeLineNumber>,
},
/// Only on the `Code` attribute. It may be used to determine the value of local variables by debuggers
LocalVariableTable {
/// Note: the 3rd field is called `descriptor_index` and represents an field descriptor
local_variable_table: Vec<AttributeLocalVariableTable>,
},
/// Only on the `Code` attribute. It provides signature information instead of descriptor information
LocalVariableTypeTable {
/// Note: the 3rd field is called `signature_index` and represents a field type signature
local_variable_table: Vec<AttributeLocalVariableTable>,
},
/// Can be on `ClassFile`, `FieldInfo`,or `MethodInfo`. Marks a class/field/method as deprecated
Deprecated,
/// Can be on `ClassFile`, `FieldInfo`,or `MethodInfo`. Contains all Runtime visible annotations
RuntimeVisibleAnnotations {
annotations: Vec<Annotation>,
},
/// Same as `RuntimeVisibleAnnotations`, but invisible to reflection
RuntimeInvisibleAnnotations {
annotations: Vec<Annotation>,
},
/// Only on `MethodInfo`, parameter annotations visible during runtime
RuntimeVisibleParameterAnnotations {
parameter_annotations: Vec<ParameterAnnotation>,
},
/// Same as `RuntimeVisibleParameterAnnotations`, but invisible to reflection
RuntimeInvisibleParameterAnnotations {
parameter_annotations: Vec<ParameterAnnotation>,
},
/// Only on `MethodInfo`, on those representing elements of annotation types, the default value of the element
AnnotationDefault {
default_value: AnnotationElementValue,
},
/// Only on `ClassFile`. Records bootstrap method specifiers for `invokedynamic`
BootstrapMethods {
bootstrap_methods: Vec<BootstrapMethod>,
},
/// Only on `ClassFile`, where there may be one at most. Specifies packages exported and opened by a module
Module(Box<Module>),
// todo
MethodParameters,
ModulePackages,
ModuleMainClass,
NestHost,
NestMembers,
Record,
}
/// An exception handler in the JVM bytecode array
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub struct AttributeCodeException {
/// The ranges in the code in which the handler is active. Must be a valid index into the code array.
/// The `start_pc` is inclusive
pub start_pc: u2,
/// The ranges in the code in which the handler is active. Must be a valid index into the code array or the length.
/// The `end_pc` is exclusive
pub end_pc: u2,
/// The start of the exception handler, must be a valid index into the code array at an opcode instruction
pub handler_pc: u2,
/// If the catch type is nonzero, it must be a valid index into the `constant_pool`, must be a `Class`
/// Zero means it catches all Exceptions, this is usually for `finally`
pub catch_type: u2,
}
/// Specifies the type state at a particular bytecode offset
/// Has a offset_delta, the offset is calculated by adding offset_delta + 1 to the previous offset
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum StackMapFrame {
/// Exactly the same locals as the previous frame and zero stack items, offset_delta is frame_type
SameFrame {
frame_type: u1, // 0-63
},
/// Exactly the same locals as the previous frame and 1 stack item, offset_delta is (frame_type - 64)
SameLocals1StackItemFrame {
frame_type: u1, // 64-127
stack: VerificationTypeInfo,
},
/// Exactly the same locals as the previous frame and 1 stack item, offset_delta is given explicitly
SameLocals1StackItemFrameExtended {
frame_type: u1, // 247
offset_delta: u2,
stack: VerificationTypeInfo,
},
/// Operand stack is empty and the locals are the same, except for the *k* last locals (`k = 251 - frame_type`)
ChopFrame {
frame_type: u1, // 248-250
offset_delta: u2,
},
/// Exactly the same locals as the previous frame and zero stack items
SameFrameExtended {
frame_type: u1, // 251
offset_delta: u2,
},
/// Operand stack is empty and exactly the same locals as the previous frame, except k locals are added, (`k = frame_type-251`)
AppendFrame {
frame_type: u1, // 252-254
offset_delta: u2,
/// `length = frame_type - 251`
locals: Vec<VerificationTypeInfo>,
},
/// The stack or Variable entries in the locals/stack can be either 1 or 2 entries wide, depending on the type
FullFrame {
frame_type: u1, //255
offset_delta: u2,
locals: Vec<VerificationTypeInfo>,
stack: Vec<VerificationTypeInfo>,
},
}
/// A stack value/local variable type `StackMapFrame`
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub enum VerificationTypeInfo {
Top {
tag: u1, // 0
},
Integer {
tag: u1, // 1
},
Float {
tag: u1, // 2
},
Long {
tag: u1, // 4
},
Double {
tag: u1, // 3
},
Null {
tag: u1, // 5
},
UninitializedThis {
tag: u1, // 6
},
Object {
tag: u1, // 7
/// Must be a `Class`
cpool_index: FromPool<cp_info::Class>,
},
Uninitialized {
tag: u1, // 8
offset: u2,
},
}
/// A struct for the `AttributeInfo::InnerClasses`
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub struct AttributeInnerClass {
/// Must be a `Class`
pub inner_class_info_index: FromPool<cp_info::Class>,
/// Must be 0 or a `Class`
pub outer_class_info_index: FromPool<cp_info::Class>,
/// Must be 0 or `Utf8`
pub inner_class_name_index: FromPool<cp_info::Utf8>,
/// Must be a mask of `InnerClassAccessFlags`
pub inner_class_access_flags: u2,
}
/// Line number information for `AttributeInfo::LineNumberTable`
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub struct AttributeLineNumber {
/// Index into the code array where a new line in the source begins
pub start_pc: u2,
/// The line number in the source file
pub line_number: u2,
}
/// Local variable information for `AttributeInfo::LocalVariableTable` and `AttributeInfo::LocalVariableTypeTable`
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub struct AttributeLocalVariableTable {
/// The local variable must have a value between `start_pc` and `start_pc + length`. Must be a valid opcode
pub start_pc: u2,
/// The local variable must have a value between `start_pc` and `start_pc + length`
pub length: u2,
/// Must be `Utf8`
pub name_index: FromPool<cp_info::Utf8>,
/// Must be `Utf8`, field descriptor or field signature encoding the type
pub descriptor_or_signature_index: FromPool<cp_info::Utf8>,
/// The variable must be at `index` in the local variable array
pub index: u2,
}
/// A runtime-visible annotation to the program
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct Annotation {
/// Must be `Utf8`
pub type_index: FromPool<cp_info::Utf8>,
pub num_element_value_pairs: u2,
pub element_value_pairs: Vec<AnnotationElementValuePair>,
}
// these type names have just become java at this point. no shame.
/// A element-value pair in the `Annotation`
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct AnnotationElementValuePair {
/// Must be `Utf8`
pub element_name_index: FromPool<cp_info::Utf8>,
pub element_name_name: AnnotationElementValue,
}
/// The value of an `AnnotationElementValuePair`
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct AnnotationElementValue {
/// B, C, D, F, I, J, S, Z or s, e, c, @,
pub tag: u1,
pub value: AnnotationElementValueValue,
}
/// The value of a `AnnotationElementValue`
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum AnnotationElementValueValue {
/// If the tag is B, C, D, F, I, J, S, Z, or s.
ConstValueIndex {
/// Must be the matching constant pool entry
index: FromPool<CpInfoInner>,
},
/// If the tag is e
EnumConstValue {
/// Must be `Utf8`
type_name_index: FromPool<cp_info::Utf8>,
/// Must be `Utf8`
const_name_index: FromPool<cp_info::Utf8>,
},
/// If the tag is c
ClassInfoIndex {
/// Must be `Utf8`, for example Ljava/lang/Object; for Object
index: FromPool<cp_info::Utf8>,
},
/// If the tag is @
AnnotationValue {
/// Represents a nested annotation
annotation: Box<Annotation>,
},
/// If the tag is [
ArrayValue { values: Vec<AnnotationElementValue> },
}
/// Used in `AttributeInfo::RuntimeVisibleParameterAnnotations`
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct ParameterAnnotation {
pub annotations: Vec<Annotation>,
}
/// Used in `AttributeInfo::BootstrapMethods `
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct BootstrapMethod {
/// Must be a `MethodHandle`
pub bootstrap_method_ref: FromPool<cp_info::MethodHandle>,
/// Each argument is a cpool entry. The constants must be `String, Class, Integer, Long, Float, Double, MethodHandle, or MethodType`
pub bootstrap_arguments: Vec<FromPool<CpInfoInner>>,
}
/// Used in `AttributeInfo::Module`
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct Module {
pub module_name_index: FromPool<cp_info::Utf8>,
/// The following flags exist
/// * 0x0020 (ACC_OPEN) - Indicates that this module is open.
/// * 0x1000 (ACC_SYNTHETIC) - Indicates that this module was not explicitly or implicitly declared.
/// * 0x8000 (ACC_MANDATED) - Indicates that this module was implicitly declared.
pub module_flags: u2,
/// The version of the module
pub module_version_index: FromPool<Option<cp_info::Utf8>>,
/// If the module is `java.base`, the Vec must be empty
pub requires: Vec<ModuleRequires>,
pub exports: Vec<ModuleExports>,
pub opens: Vec<ModuleOpens>,
pub uses_index: Vec<u2>,
pub provides: Vec<ModuleProvides>,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct ModuleRequires {
pub requires_index: FromPool<cp_info::Module>,
/// * 0x0020 (ACC_TRANSITIVE) - Indicates that any module which depends on the current module, implicitly declares a dependence on the module indicated by this entry.
/// * 0x0040 (ACC_STATIC_PHASE) - Indicates that this dependence is mandatory in the static phase, i.e., at compile time, but is optional in the dynamic phase, i.e., at run time.
/// * 0x1000 (ACC_SYNTHETIC) - Indicates that this dependence was not explicitly or implicitly declared in the source of the module declaration.
/// * 0x8000 (ACC_MANDATED) - Indicates that this dependence was implicitly declared in the source of the module declaration.
/// If the current module is not java.base, and the class file version number is 54.0 or above, then neither ACC_TRANSITIVE nor ACC_STATIC_PHASE may be set in requires_flags.
pub requires_flags: u2,
pub requires_version_index: FromPool<Option<cp_info::Utf8>>,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct ModuleExports {
pub exports_index: FromPool<cp_info::Package>,
/// * 0x1000 (ACC_SYNTHETIC) - Indicates that this export was not explicitly or implicitly declared in the source of the module declaration.
/// * 0x8000 (ACC_MANDATED) - Indicates that this export was implicitly declared in the source of the module declaration.
pub exports_flags: u2,
/// If there are no exports, the package is *unqualified*, allowing unrestricted access
/// If there are exports, the package is *qualified*, only allowing the following modules can access it
pub exports_to_index: Vec<FromPool<cp_info::Module>>,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct ModuleOpens {
pub opens_index: FromPool<cp_info::Module>,
/// * 0x1000 (ACC_SYNTHETIC) - Indicates that this opening was not explicitly or implicitly declared in the source of the module declaration.
/// * 0x8000 (ACC_MANDATED) - Indicates that this opening was implicitly declared in the source of the module declaration.
pub opens_flags: u2,
/// If there are no exports, the package is *unqualified*, allowing unrestricted reflective access
/// If there are exports, the package is *qualified*, only allowing the following modules can reflectively access it
pub opens_to_index: Vec<FromPool<cp_info::Module>>,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
/// A service interface for which this module represents an implementation
pub struct ModuleProvides {
/// Represents the interface
pub provides_index: FromPool<cp_info::Class>,
/// Represents the implementations, must be nonzero
pub provides_with_index: Vec<FromPool<cp_info::Class>>,
}
/////// Access Flags
/// Access Flags of a class
#[repr(u16)]
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub enum ClassAccessFlag {
/// Declared public; may be accessed from outside its package.
Public = 0x0001,
/// Declared final; no subclasses allowed.
Final = 0x0010,
/// Treat superclass methods specially when invoked by the invokespecial instruction.
Super = 0x0020,
/// Is an interface, not a class.
Interface = 0x0200,
/// Declared abstract; must not be instantiated.
Abstract = 0x0400,
/// Declared synthetic; not present in the source code.
Synthetic = 0x1000,
/// Declared as an annotation type.
Annotation = 0x2000,
/// Declared as an enum type.
Enum = 0x4000,
/// Is a module, not a class or interface.
MODULE = 0x8000,
}
/// Access Flags of a method
#[repr(u16)]
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub enum MethodAccessFlag {
/// Declared public; may be accessed from outside its package.
PUBLIC = 0x0001,
/// Declared private; accessible only within the defining class.
PRIVATE = 0x0002,
/// Declared protected; may be accessed within subclasses.
PROTECTED = 0x0004,
/// Declared static.
STATIC = 0x0008,
/// Declared final; must not be overridden.
FINAL = 0x0010,
/// Declared synchronized; invocation is wrapped by a monitor use.
SYNCHRONIZED = 0x0020,
/// A bridge method, generated by the compiler.
BRIDGE = 0x0040,
/// Declared with variable number of arguments.
VARARGS = 0x0080,
/// Declared native; implemented in a language other than Java.
NATIVE = 0x0100,
/// Declared abstract; no implementation is provided.
ABSTRACT = 0x0400,
/// Declared strictfp; floating-point mode is FP-strict.
STRICT = 0x0800,
// /Declared synthetic; not present in the source code.
SYNTHETIC = 0x1000,
}
/// Access flags for an inner class
#[repr(u16)]
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub enum InnerClassAccessFlags {
/// Marked or implicitly public in source.
PUBLIC = 0x0001,
/// Marked private in source.
PRIVATE = 0x0002,
/// Marked protected in source.
PROTECTED = 0x0004,
/// Marked or implicitly static in source.
STATIC = 0x0008,
/// Marked final in source.
FINAL = 0x0010,
/// Was an interface in source.
INTERFACE = 0x0200,
/// Marked or implicitly abstract in source.
ABSTRACT = 0x0400,
/// Declared synthetic; not present in the source code.
SYNTHETIC = 0x1000,
/// Declared as an annotation type.
ANNOTATION = 0x2000,
/// Declared as an enum type.
ENUM = 0x4000,
}
/// Access flags for a field
#[repr(u16)]
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub enum FieldAccessFlags {
/// Declared public; may be accessed from outside its package.
PUBLIC = 0x0001,
/// Declared private; usable only within the defining class.
PRIVATE = 0x0002,
/// Declared protected; may be accessed within subclasses.
PROTECTED = 0x0004,
/// Declared static.
STATIC = 0x0008,
/// Declared final; never directly assigned to after object construction (JLS §17.5).
FINAL = 0x0010,
/// Declared volatile; cannot be cached.
VOLATILE = 0x0040,
/// Declared transient; not written or read by a persistent object manager.
TRANSIENT = 0x0080,
/// Declared synthetic; not present in the source code.
SYNTHETIC = 0x1000,
/// Declared as an element of an enum.
ENUM = 0x4000,
}

145
cs_parser/src/test.rs Normal file
View file

@ -0,0 +1,145 @@
use super::*;
#[test]
fn data_u1() {
let bytes = [0xff, 0x00];
let mut data = Data {
data: &bytes,
pointer: 0,
};
assert_eq!(data.u1().unwrap(), 0xff);
assert_eq!(data.u1().unwrap(), 0x00);
assert_eq!(data.last_u1().unwrap(), 0x00);
}
#[test]
fn data_u2() {
let bytes = [0xff, 0x33, 0x11, 0x00];
let mut data = Data {
data: &bytes,
pointer: 0,
};
assert_eq!(data.u2().unwrap(), 0xff33);
assert_eq!(data.u2().unwrap(), 0x1100);
assert_eq!(data.last_u2().unwrap(), 0x1100);
}
#[test]
fn data_u4() {
let bytes = [0xff, 0x33, 0x11, 0x00];
let mut data = Data {
data: &bytes,
pointer: 0,
};
assert_eq!(data.u4().unwrap(), 0xff331100);
assert_eq!(data.last_u4().unwrap(), 0xff331100);
}
#[test]
fn parse_empty_class() {
let class = include_bytes!("../testdata/Test.class");
let parsed = parse_class_file(class).unwrap();
assert_eq!(parsed.minor_version, 0);
assert_eq!(parsed.major_version, 0x003b);
assert_eq!(parsed.constant_pool.len() + 1, 0x000d);
assert_eq!(parsed.constant_pool.len(), 12);
assert_eq!(
parsed.constant_pool,
vec![
CpInfo {
tag: 0x0a,
inner: CpInfoInner::MethodRef(cp_info::MethodRef {
class_index: 2.into(),
name_and_type_index: 3.into(),
})
},
CpInfo {
tag: 7,
inner: CpInfoInner::Class(cp_info::Class {
name_index: 4.into(),
})
},
CpInfo {
tag: 0xc,
inner: CpInfoInner::NameAndType(cp_info::NameAndType {
name_index: 5.into(),
descriptor_index: 6.into(),
})
},
CpInfo {
tag: 1,
inner: CpInfoInner::Utf8(cp_info::Utf8 {
bytes: "java/lang/Object".to_string()
})
},
CpInfo {
tag: 1,
inner: CpInfoInner::Utf8(cp_info::Utf8 {
bytes: "<init>".to_string()
})
},
CpInfo {
tag: 1,
inner: CpInfoInner::Utf8(cp_info::Utf8 {
bytes: "()V".to_string()
})
},
CpInfo {
tag: 7,
inner: CpInfoInner::Class(cp_info::Class {
name_index: 8.into(),
})
},
CpInfo {
tag: 1,
inner: CpInfoInner::Utf8(cp_info::Utf8 {
bytes: "Test".to_string()
})
},
CpInfo {
tag: 1,
inner: CpInfoInner::Utf8(cp_info::Utf8 {
bytes: "Code".to_string()
})
},
CpInfo {
tag: 1,
inner: CpInfoInner::Utf8(cp_info::Utf8 {
bytes: "LineNumberTable".to_string()
})
},
CpInfo {
tag: 1,
inner: CpInfoInner::Utf8(cp_info::Utf8 {
bytes: "SourceFile".to_string()
})
},
CpInfo {
tag: 1,
inner: CpInfoInner::Utf8(cp_info::Utf8 {
bytes: "Test.java".to_string()
})
}
]
);
assert_eq!(parsed.access_flags, 0x0021);
assert_eq!(parsed.this_class, 7.into());
assert_eq!(parsed.super_class, 2.into());
assert_eq!(parsed.interfaces.len(), 0);
assert_eq!(parsed.interfaces, vec![]);
assert_eq!(parsed.fields.len(), 0);
assert_eq!(parsed.fields, vec![]);
assert_eq!(parsed.methods.len(), 1);
assert_eq!(parsed.methods[0].access_flags, 1);
assert_eq!(parsed.methods[0].name_index, 5.into());
assert_eq!(parsed.methods[0].descriptor_index, 6.into());
assert_eq!(parsed.methods[0].attributes.len(), 1);
}
#[test]
fn more_complex_file() {
let class = include_bytes!("../testdata/Test2.class");
let parsed = parse_class_file(class).unwrap();
assert_eq!(parsed.magic, 0xCAFEBABE);
}

BIN
cs_parser/testdata/Test.class vendored Normal file

Binary file not shown.

133
cs_parser/testdata/Test.class.txt vendored Normal file
View file

@ -0,0 +1,133 @@
Manually parsed by hand
hexdump -C Test.class
00000000 |ca fe ba be|00 00|00 3b |00 0d|0a.00 02.00 03|07. |.......;........|
00000010 00 04|0c.00 05.00 06|01 .00 10.6a 61 76 61 2f 6c |..........java/l|
00000020 61 6e 67 2f 4f 62 6a 65 63 74|01.00 06.3c 69 6e |ang/Object...<in|
00000030 69 74 3e|01.00 03.28 29 56|07.00 08|01.00 04.54 |it>...()V......T|
00000040 65 73 74|01.00 04.43 6f 64 65|01.00 0f.4c 69 6e |est...Code...Lin|
00000050 65 4e 75 6d 62 65 72 54 61 62 6c 65|01.00 0a.53 |eNumberTable...S|
00000060 6f 75 72 63 65 46 69 6c 65|01.00 09.54 65 73 74 |ourceFile...Test|
00000070 2e 6a 61 76 61|00 21|00 07|00 02|00 00|00 00|00 |.java.!.........|
00000080 01|00 01.00 05.00 06.00 01:00 09.00 00 00 1d.00 |................|
00000090 01.00 01.00 00 00 05.2a b7 00 01 b1.00 00.00 01: |.......*........|
000000a0 00 0a.00 00 00 06.00 01 :00 00.00 01|00 01|00 0b. |................|
000000b0 00 00 00 02.00 0c |......|
000000b6
Magic: ca fe ba be
Minor: 00 00
Major: 00 3b
CpCount: 00 0d (13) (13 - 1 = 12)
Cp: [
1: {
tag: 0a (10, MethodRef)
class_index: 00 02 (2)
name_and_type_index: 00 03
}
2: {
tag: 07 (7, Class)
name_index: 00 04 (4) (java/lang/Object)
}
3: {
tag: 0c (12, NameAndType)
name_index: 00 05 (05)
descriptor_index: 00 06
}
4: {
tag: 01 (1, Utf8)
length: 00 10 (16)
string: 6a 61 76 61 2f 6c 61 6e 67 2f 4f 62 6a 65 63 74 (java/lang/Object)
}
5: {
tag: 01 (1, Utf8)
length: 00 06 (6)
string: 3c 69 6e 69 74 3e (<init>)
}
6: {
tag: 01 (1, Utf8)
length: 00 03 (3)
bytes: 28 29 56 (()V)
}
7: {
tag: 07 (7, Class)
name_index: 00 08 (8) (Test)
}
8: {
tag: 01 (1, Utf8)
length: 00 04 (4)
bytes: 54 65 73 74 (Test)
}
9: {
tag: 01 (1, Utf8)
length: 00 04 (4)
bytes: 43 6f 64 65 (Code)
}
10: {
tag: 01 (1, Utf8)
length: 00 0f (15)
bytes: 4c 69 6e 65 4e 75 6d 62 65 72 54 61 62 6c 65 (LineNumberTable)
}
11: {
tag: 01 (1, Utf8)
length: 00 0a (10)
bytes: 53 6f 75 72 63 65 46 69 6c 65 (SourceFile)
}
12: {
tag: 01 (1, Utf8)
length: 00 09 (9)
bytes: 54 65 73 74 2e 6a 61 76 61 (Test.java)
}
]
access_flags: 00 21
this_class: 00 07 (Test)
super_class: 00 02 (java/lang/Object)
interfaces_count: 00 00
interfaces: []
fields_count: 00 00
fields: []
methods_count: 00 01
methods: [
{
access_flags: 00 01
name_index: 00 05
descriptor_index: 00 06
attributes_count: 00 01
attributes: [
{
name_index: 00 09 (Code)
attribute_length: 00 00 00 1d (29)
max_stack: 00 01
max_locals: 00 01
code_length: 00 00 00 05
code: 2a b7 00 01 b1
exception_table_length: 00 00
exception_table: []
attributes_count: 00 01
attributes: [
{
attribute_name_index: 00 0a (LineNumberTable)
attribute_length: 00 00 00 06
line_number_table_length: 00 01
line_number_table: [
{
start_pc: 00 00
line_number: 00 01
}
]
}
]
}
]
}
]
attributes_count: 00 01
attributes: [
{
attribute_name_index: 00 0b (SourceFile)
attribute_length: 00 00 00 02
sourcefile_index: 00 0c
}
]

1
cs_parser/testdata/Test.java vendored Normal file
View file

@ -0,0 +1 @@
public class Test {}

BIN
cs_parser/testdata/Test2.class vendored Normal file

Binary file not shown.

13
cs_parser/testdata/Test2.java vendored Normal file
View file

@ -0,0 +1,13 @@
class Test2 {
int myField;
public static void main(String[] args) {
int i = 0;
i++;
new Test2().print(i);
}
void print(int i) {
System.out.println(i);
}
}