summaryrefslogtreecommitdiff
path: root/libs/pbd/pbd/tokenizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'libs/pbd/pbd/tokenizer.h')
-rw-r--r--libs/pbd/pbd/tokenizer.h30
1 files changed, 26 insertions, 4 deletions
diff --git a/libs/pbd/pbd/tokenizer.h b/libs/pbd/pbd/tokenizer.h
index a976b79341..b80e3eac4a 100644
--- a/libs/pbd/pbd/tokenizer.h
+++ b/libs/pbd/pbd/tokenizer.h
@@ -4,18 +4,24 @@
#include <iterator>
#include <string>
+#include <pbd/whitespace.h>
+
namespace PBD {
/**
Tokenize string, this should work for standard
- strings aswell as Glib::ustring. This is a bit of a hack,
+ strings as well as Glib::ustring. This is a bit of a hack,
there are much better string tokenizing patterns out there.
+ If strip_whitespace is set to true, tokens will be checked to see
+ that they still have a length after stripping. If no length, they
+ are discarded.
*/
template<typename StringType, typename Iter>
unsigned int
tokenize(const StringType& str,
const StringType& delims,
- Iter it)
+ Iter it,
+ bool strip_whitespace=false)
{
typename StringType::size_type start_pos = 0;
typename StringType::size_type end_pos = 0;
@@ -28,14 +34,30 @@ tokenize(const StringType& str,
if (end_pos == str.npos) {
end_pos = str.length();
}
- *it++ = str.substr(start_pos, end_pos - start_pos);
+ if (strip_whitespace) {
+ StringType stripped = str.substr(start_pos, end_pos - start_pos);
+ strip_whitespace_edges (stripped);
+ if (stripped.length()) {
+ *it++ = stripped;
+ }
+ } else {
+ *it++ = str.substr(start_pos, end_pos - start_pos);
+ }
++token_count;
start_pos = str.find_first_not_of(delims, end_pos + 1);
}
} while (start_pos != str.npos);
if (start_pos != str.npos) {
- *it++ = str.substr(start_pos, str.length() - start_pos);
+ if (strip_whitespace) {
+ StringType stripped = str.substr(start_pos, str.length() - start_pos);
+ strip_whitespace_edges (stripped);
+ if (stripped.length()) {
+ *it++ = stripped;
+ }
+ } else {
+ *it++ = str.substr(start_pos, str.length() - start_pos);
+ }
++token_count;
}