My first working Rust program! I covered all the basic design cases (except for bracket crossing, or malformed text such as not having matching brackets).
My goal is to write a function that will split up a string at whitespace, while also skipping over any text contained within brackets.
use std::collections::HashMap;
fn main() {
let specials = Vec::from(["\\d", "\\D", "\\w", "\\W", "\\s", "\\S", "\\a", "\\A", "\\(", "\\)", "\\[", "\\]", "\\{", "\\}"]);
let brackets: HashMap<&str, &str> = HashMap::from([
("[^", "]"),
("(?:", ")"),
("[", "]"),
("(", ")"),
("{", "}"),
]);
let input = String::from(" a bc d ( e f ( g ) ) h ij ");
let result_vec = split(&input, &brackets, &specials);
// Should be ["a", "bc", "d", "( e f ( g ) )", "h", "ij"]
println!("Final result: {:?}", result_vec);
assert_eq!(6, result_vec.len());
assert_eq!(result_vec.get(0).unwrap(), "a");
assert_eq!(result_vec.get(1).unwrap(), "bc");
assert_eq!(result_vec.get(2).unwrap(), "d");
assert_eq!(result_vec.get(3).unwrap(), "( e f ( g ) )");
assert_eq!(result_vec.get(4).unwrap(), "h");
assert_eq!(result_vec.get(5).unwrap(), "ij");
}
fn split(input: &String, brackets: &HashMap<&str, &str>, specials: &Vec<&str>) -> Vec<String> {
let mut output: Vec<String> = Vec::new();
let mut stack: Vec<&str> = Vec::new();
let mut index = 0; // tracks the end of the current chunk
let mut cursor = 0; // tracks the start of the current chunk
while index < input.len() {
let substring = &input[index..];
for special in specials.iter() {
if substring.starts_with(special) {
index = &index + special.len();
continue;
}
}
// Matches whitespace
if substring.starts_with(|c: char| c.is_whitespace()) {
// If there's nothing on the stack, we aren't
// inside brackets, and we are good to slice
if index != cursor && stack.is_empty() {
let sliced_string = (&input[cursor..index]).to_string();
println!("Found whitespace, slicing: '{}'", sliced_string);
output.push(sliced_string);
index = &index + 1;
cursor = index;
} else if stack.is_empty() {
index = &index + 1;
cursor = index;
} else {
index = &index + 1;
}
// Either way, move on
continue;
}
// Matches brackets
for (&d_left, &d_right) in brackets.iter() {
if substring.starts_with(d_left) {
// Opening bracket
println!("\tFound opening bracket: pushing '{}'", d_left);
stack.push(d_left);
break;
} else if substring.starts_with(d_right) {
// Closing bracket
println!("\tFound closing bracket, stack is: {:?}", stack);
if stack.ends_with(&[d_left]) {
let popped = stack.pop(); // discard this
if popped.is_some() {
println!("\t\tPopped '{}'", popped.unwrap());
}
if stack.is_empty() {
println!("\t\tStack is empty!");
index = &index + 1;
let sliced_string = &input[cursor..index];
println!("\t\tSlicing '{}'", sliced_string);
output.push(sliced_string.to_string());
index = &index + 1;
cursor = index;
}
break;
}
}
}
index = &index + 1;
}
let sliced_string = (&input[cursor..index]).to_string();
if !sliced_string.is_empty() {
println!("Reached end, slicing: '{}'", sliced_string);
output.push(sliced_string);
}
return output;
}
And if you are thinking "this looks like a Java developer wrote this" that's correct, this is also loosely based on some code I designed for a Java project, so it's probably not taking advantage of some of rust's capabilities
