pub mod attributes;
#[cfg(feature = "encoding_rs")]
use encoding_rs::Encoding;
use std::borrow::Cow;
use std::io::BufRead;
use std::ops::Deref;
use std::str::from_utf8;
use self::attributes::{Attribute, Attributes};
use errors::{Error, Result};
use escape::{escape, unescape};
use reader::Reader;
use memchr;
#[derive(Clone)]
pub struct BytesStart<'a> {
buf: Cow<'a, [u8]>,
name_len: usize,
}
impl<'a> BytesStart<'a> {
#[inline]
pub fn borrowed(content: &'a [u8], name_len: usize) -> Self {
BytesStart {
buf: Cow::Borrowed(content),
name_len: name_len,
}
}
#[inline]
pub fn borrowed_name(name: &'a [u8]) -> BytesStart<'a> {
Self::borrowed(name, name.len())
}
#[inline]
pub fn owned<C: Into<Vec<u8>>>(content: C, name_len: usize) -> BytesStart<'static> {
BytesStart {
buf: Cow::Owned(content.into()),
name_len: name_len,
}
}
#[inline]
pub fn owned_name<C: Into<Vec<u8>>>(name: C) -> BytesStart<'static> {
let content = name.into();
BytesStart {
name_len: content.len(),
buf: Cow::Owned(content),
}
}
pub fn into_owned(self) -> BytesStart<'static> {
Self::owned(self.buf.into_owned(), self.name_len)
}
pub fn to_owned(&self) -> BytesStart<'static> {
Self::owned(self.buf.to_owned(), self.name_len)
}
pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
where
I: IntoIterator,
I::Item: Into<Attribute<'b>>,
{
self.extend_attributes(attributes);
self
}
#[inline]
pub fn name(&self) -> &[u8] {
&self.buf[..self.name_len]
}
#[inline]
pub fn local_name(&self) -> &[u8] {
let name = self.name();
memchr::memchr(b':', name).map_or(name, |i| &name[i + 1..])
}
#[inline]
pub fn unescaped(&self) -> Result<Cow<[u8]>> {
unescape(&*self.buf).map_err(Error::EscapeError)
}
pub fn attributes(&self) -> Attributes {
Attributes::new(self, self.name_len)
}
pub fn html_attributes(&self) -> Attributes {
Attributes::html(self, self.name_len)
}
#[inline]
pub fn attributes_raw(&self) -> &[u8] {
&self.buf[self.name_len..]
}
pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
where
I: IntoIterator,
I::Item: Into<Attribute<'b>>,
{
for attr in attributes {
self.push_attribute(attr);
}
self
}
#[cfg(feature = "encoding")]
#[inline]
pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
let decoded = reader.decode(&*self);
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
}
#[cfg(not(feature = "encoding"))]
#[inline]
pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
let decoded = reader.decode(&*self)?;
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
}
pub fn push_attribute<'b, A: Into<Attribute<'b>>>(&mut self, attr: A) {
let a = attr.into();
let bytes = self.buf.to_mut();
bytes.push(b' ');
bytes.extend_from_slice(a.key);
bytes.extend_from_slice(b"=\"");
bytes.extend_from_slice(&*a.value);
bytes.push(b'"');
}
pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
let bytes = self.buf.to_mut();
bytes.splice(..self.name_len, name.iter().cloned());
self.name_len = name.len();
self
}
pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
self.buf.to_mut().truncate(self.name_len);
self
}
}
impl<'a> std::fmt::Debug for BytesStart<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use crate::utils::write_byte_string;
write!(f, "BytesStart {{ buf: ")?;
write_byte_string(f, &self.buf)?;
write!(f, ", name_len: {} }}", self.name_len)
}
}
#[derive(Clone, Debug)]
pub struct BytesDecl<'a> {
element: BytesStart<'a>,
}
impl<'a> BytesDecl<'a> {
pub fn from_start(start: BytesStart<'a>) -> BytesDecl<'a> {
BytesDecl { element: start }
}
pub fn version(&self) -> Result<Cow<[u8]>> {
match self.element.attributes().next() {
Some(Err(e)) => Err(e),
Some(Ok(Attribute {
key: b"version",
value: v,
})) => Ok(v),
Some(Ok(a)) => {
let found = from_utf8(a.key).map_err(Error::Utf8)?.to_string();
Err(Error::XmlDeclWithoutVersion(Some(found)))
}
None => Err(Error::XmlDeclWithoutVersion(None)),
}
}
pub fn encoding(&self) -> Option<Result<Cow<[u8]>>> {
for a in self.element.attributes() {
match a {
Err(e) => return Some(Err(e)),
Ok(Attribute {
key: b"encoding",
value: v,
}) => return Some(Ok(v)),
_ => (),
}
}
None
}
pub fn standalone(&self) -> Option<Result<Cow<[u8]>>> {
for a in self.element.attributes() {
match a {
Err(e) => return Some(Err(e)),
Ok(Attribute {
key: b"standalone",
value: v,
}) => return Some(Ok(v)),
_ => (),
}
}
None
}
pub fn new(
version: &[u8],
encoding: Option<&[u8]>,
standalone: Option<&[u8]>,
) -> BytesDecl<'static> {
let encoding_attr_len = if let Some(xs) = encoding {
12 + xs.len()
} else {
0
};
let standalone_attr_len = if let Some(xs) = standalone {
14 + xs.len()
} else {
0
};
let mut buf = Vec::with_capacity(14 + encoding_attr_len + standalone_attr_len);
buf.extend_from_slice(b"xml version=\"");
buf.extend_from_slice(version);
if let Some(encoding_val) = encoding {
buf.extend_from_slice(b"\" encoding=\"");
buf.extend_from_slice(encoding_val);
}
if let Some(standalone_val) = standalone {
buf.extend_from_slice(b"\" standalone=\"");
buf.extend_from_slice(standalone_val);
}
buf.push(b'"');
BytesDecl {
element: BytesStart::owned(buf, 3),
}
}
#[cfg(feature = "encoding_rs")]
pub fn encoder(&self) -> Option<&'static Encoding> {
self.encoding()
.and_then(|e| e.ok())
.and_then(|e| Encoding::for_label(&*e))
}
pub fn into_owned(self) -> BytesDecl<'static> {
BytesDecl {
element: self.element.into_owned(),
}
}
}
#[derive(Clone)]
pub struct BytesEnd<'a> {
name: Cow<'a, [u8]>,
}
impl<'a> BytesEnd<'a> {
#[inline]
pub fn borrowed(name: &'a [u8]) -> BytesEnd<'a> {
BytesEnd {
name: Cow::Borrowed(name),
}
}
#[inline]
pub fn owned(name: Vec<u8>) -> BytesEnd<'static> {
BytesEnd {
name: Cow::Owned(name),
}
}
pub fn into_owned(self) -> BytesEnd<'static> {
BytesEnd {
name: Cow::Owned(self.name.into_owned()),
}
}
#[inline]
pub fn name(&self) -> &[u8] {
&*self.name
}
#[inline]
pub fn local_name(&self) -> &[u8] {
if let Some(i) = self.name().iter().position(|b| *b == b':') {
&self.name()[i + 1..]
} else {
self.name()
}
}
}
impl<'a> std::fmt::Debug for BytesEnd<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use crate::utils::write_byte_string;
write!(f, "BytesEnd {{ name: ")?;
write_byte_string(f, &self.name)?;
write!(f, " }}")
}
}
#[derive(Clone)]
pub struct BytesText<'a> {
content: Cow<'a, [u8]>,
}
impl<'a> BytesText<'a> {
#[inline]
pub fn from_escaped<C: Into<Cow<'a, [u8]>>>(content: C) -> BytesText<'a> {
BytesText {
content: content.into(),
}
}
#[inline]
pub fn from_plain(content: &'a [u8]) -> BytesText<'a> {
BytesText {
content: escape(content),
}
}
#[inline]
pub fn from_escaped_str<C: Into<Cow<'a, str>>>(content: C) -> BytesText<'a> {
Self::from_escaped(match content.into() {
Cow::Owned(o) => Cow::Owned(o.into_bytes()),
Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()),
})
}
#[inline]
pub fn from_plain_str(content: &'a str) -> BytesText<'a> {
Self::from_plain(content.as_bytes())
}
#[inline]
pub fn into_owned(self) -> BytesText<'static> {
BytesText {
content: self.content.into_owned().into(),
}
}
pub fn unescaped(&self) -> Result<Cow<[u8]>> {
unescape(self).map_err(Error::EscapeError)
}
#[cfg(feature = "encoding")]
pub fn unescape_and_decode_without_bom<B: BufRead>(
&self,
reader: &mut Reader<B>,
) -> Result<String> {
let decoded = reader.decode_without_bom(&*self);
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
}
#[cfg(not(feature = "encoding"))]
pub fn unescape_and_decode_without_bom<B: BufRead>(
&self,
reader: &Reader<B>,
) -> Result<String> {
let decoded = reader.decode_without_bom(&*self)?;
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
}
#[cfg(feature = "encoding")]
pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
let decoded = reader.decode(&*self);
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
}
#[cfg(not(feature = "encoding"))]
pub fn unescape_and_decode<B: BufRead>(&self, reader: &Reader<B>) -> Result<String> {
let decoded = reader.decode(&*self)?;
let unescaped = unescape(decoded.as_bytes()).map_err(Error::EscapeError)?;
String::from_utf8(unescaped.into_owned()).map_err(|e| Error::Utf8(e.utf8_error()))
}
pub fn escaped(&self) -> &[u8] {
self.content.as_ref()
}
}
impl<'a> std::fmt::Debug for BytesText<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use crate::utils::write_byte_string;
write!(f, "BytesText {{ content: ")?;
write_byte_string(f, &self.content)?;
write!(f, " }}")
}
}
#[derive(Clone, Debug)]
pub enum Event<'a> {
Start(BytesStart<'a>),
End(BytesEnd<'a>),
Empty(BytesStart<'a>),
Text(BytesText<'a>),
Comment(BytesText<'a>),
CData(BytesText<'a>),
Decl(BytesDecl<'a>),
PI(BytesText<'a>),
DocType(BytesText<'a>),
Eof,
}
impl<'a> Event<'a> {
pub fn into_owned(self) -> Event<'static> {
match self {
Event::Start(e) => Event::Start(e.into_owned()),
Event::End(e) => Event::End(e.into_owned()),
Event::Empty(e) => Event::Empty(e.into_owned()),
Event::Text(e) => Event::Text(e.into_owned()),
Event::Comment(e) => Event::Comment(e.into_owned()),
Event::CData(e) => Event::CData(e.into_owned()),
Event::Decl(e) => Event::Decl(e.into_owned()),
Event::PI(e) => Event::PI(e.into_owned()),
Event::DocType(e) => Event::DocType(e.into_owned()),
Event::Eof => Event::Eof,
}
}
}
impl<'a> Deref for BytesStart<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
&*self.buf
}
}
impl<'a> Deref for BytesDecl<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
&*self.element
}
}
impl<'a> Deref for BytesEnd<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
&*self.name
}
}
impl<'a> Deref for BytesText<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
&*self.content
}
}
impl<'a> Deref for Event<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
match *self {
Event::Start(ref e) | Event::Empty(ref e) => &*e,
Event::End(ref e) => &*e,
Event::Text(ref e) => &*e,
Event::Decl(ref e) => &*e,
Event::PI(ref e) => &*e,
Event::CData(ref e) => &*e,
Event::Comment(ref e) => &*e,
Event::DocType(ref e) => &*e,
Event::Eof => &[],
}
}
}
impl<'a> AsRef<Event<'a>> for Event<'a> {
fn as_ref(&self) -> &Event<'a> {
self
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn local_name() {
use std::str::from_utf8;
let xml = r#"
<foo:bus attr='bar'>foobusbar</foo:bus>
<foo: attr='bar'>foobusbar</foo:>
<:foo attr='bar'>foobusbar</:foo>
<foo:bus:baz attr='bar'>foobusbar</foo:bus:baz>
"#;
let mut rdr = Reader::from_str(xml);
let mut buf = Vec::new();
let mut parsed_local_names = Vec::new();
loop {
match rdr.read_event(&mut buf).expect("unable to read xml event") {
Event::Start(ref e) => parsed_local_names.push(
from_utf8(e.local_name())
.expect("unable to build str from local_name")
.to_string(),
),
Event::End(ref e) => parsed_local_names.push(
from_utf8(e.local_name())
.expect("unable to build str from local_name")
.to_string(),
),
Event::Eof => break,
_ => {}
}
}
assert_eq!(parsed_local_names[0], "bus".to_string());
assert_eq!(parsed_local_names[1], "bus".to_string());
assert_eq!(parsed_local_names[2], "".to_string());
assert_eq!(parsed_local_names[3], "".to_string());
assert_eq!(parsed_local_names[4], "foo".to_string());
assert_eq!(parsed_local_names[5], "foo".to_string());
assert_eq!(parsed_local_names[6], "bus:baz".to_string());
assert_eq!(parsed_local_names[7], "bus:baz".to_string());
}
#[test]
fn bytestart_create() {
let b = BytesStart::owned_name("test");
assert_eq!(b.len(), 4);
assert_eq!(b.name(), b"test");
}
#[test]
fn bytestart_set_name() {
let mut b = BytesStart::owned_name("test");
assert_eq!(b.len(), 4);
assert_eq!(b.name(), b"test");
assert_eq!(b.attributes_raw(), b"");
b.push_attribute(("x", "a"));
assert_eq!(b.len(), 10);
assert_eq!(b.attributes_raw(), b" x=\"a\"");
b.set_name(b"g");
assert_eq!(b.len(), 7);
assert_eq!(b.name(), b"g");
}
#[test]
fn bytestart_clear_attributes() {
let mut b = BytesStart::owned_name("test");
b.push_attribute(("x", "y\"z"));
b.push_attribute(("x", "y\"z"));
b.clear_attributes();
assert!(b.attributes().next().is_none());
assert_eq!(b.len(), 4);
assert_eq!(b.name(), b"test");
}
}