GM6000 Digital Heater Controller Branch: main
SDX-1330
TextBlock.h
Go to the documentation of this file.
1#ifndef Cpl_Text_Tokenizer_TextBlock_h_
2#define Cpl_Text_Tokenizer_TextBlock_h_
3/*-----------------------------------------------------------------------------
4* This file is part of the Colony.Core Project. The Colony.Core Project is an
5* open source project with a BSD type of licensing agreement. See the license
6* agreement (license.txt) in the top/ directory or on the Internet at
7* http://integerfox.com/colony.core/license.txt
8*
9* Copyright (c) 2014-2022 John T. Taylor
10*
11* Redistributions of the source code must retain the above copyright notice.
12*----------------------------------------------------------------------------*/
13/** @file */
14
15
16///
17namespace Cpl {
18///
19namespace Text {
20///
21namespace Tokenizer {
22
23
24/** This concrete class tokenizes a Text Block that has the following
25 format: <pre>
26
27 parameter [(delimiter parameter)*] [terminator]
28
29 parameter := { all printable characters except for QUOTE, DEL, TERM, and ESC }
30 QUOTE := { specified quote character (used to start/end TEXT STRINGS) }
31 DEL := { specified delimiter character }
32 TERM := { specified terminator character }
33 ESC := { specified Escape character used inside of TEXT STRINGS }
34
35 The tokenizer replaces the delimiter and terminator characters with an '\0'
36 character. The tokenizer supports the parameter fields being "text strings".
37 In addition, any leading and trailing whitespace is removed from each
38 parameter. Note: no whitespace is altered/removed from the contents of a
39 text string.
40
41 If the string being parsed has two DEL characters in row, then tokenized
42 results will return an 'empty parameter' (a parameter will a zero string
43 length) for this sequence. The EXCEPTION to this rule is when the
44 delimiter is the space character, then the tokenizer will never generated
45 and/or detect any 'empty parameters'.
46
47 The original string is "corrupted" since the tokenizer changes characters.
48 Also, the tokenizer will shorten the string when dealing with 'text
49 strings'.
50
51 A TEXT STRING can contain any alphanumeric and/or punctuation characters
52 including the QUOTE, DEL, TERM, and ESC characters. A text string is
53 always enclosed with beginning and ending QUOTE characters. When a QUOTE
54 character is encounter outside of a text string, it always starts a text
55 string. To enter a literal QUOTE character within a text string, precede the
56 QUOTE character with the ESC character (e.g., \"). To enter a literal ESC
57 character within a text string, precede the character with ESC character
58 (e.g., \\‍). IMPORTANT NOTE: The QUOTE and ESC characters are REMOVED from
59 the string when it is tokenized!
60
61
62</pre>*/
64{
65private:
66 /// Pointer to the first data field
67 char* m_base;
68
69 /// Pointer to the current token
70 char* m_ptr;
71
72 /// Indicates if the tokens are valid
73 bool m_validTokens;
74
75 /// Indicates if the terminator character was encountered
76 bool m_terminatorFound;
77
78 /// Number of data fields
79 unsigned m_count;
80
81
82
83public:
84 /** Constructor. Requires a pointer to the 'raw' string to be tokenized.
85 Note: All of the parsing occurs in this method.
86 */
87 TextBlock( char* string, char delimiter=',', char terminator=';', char quote='"', char escape='\\' );
88
89
90
91public:
92 /** Returns true if the string was successfully tokenized. The string
93 will fail to parse if it does not match the format/syntax described
94 in the class definition.
95
96 THIS METHOD MUST BE CALLED BEFORE ANY OF THE FOLLOWING
97 METHODS. THE PARSED TOKENS ARE ONLY VALID IF THIS METHOD
98 RETURNS TRUE!
99 */
100 inline bool isValidTokens() const noexcept { return m_validTokens; }
101
102
103 /// Returns the number of parameter fields in the Text block
104 inline unsigned numParameters() const noexcept { return m_count; }
105
106
107 /// Returns the Nth parameter (index starts with 0). Return a null pointer if index is out-of-range
108 const char* getParameter( unsigned index ) const noexcept;
109
110
111 /// Returns true if the parsing stopped because the terminator character was encounter (vs. end-of-string)
112 inline bool isTerminated() const noexcept { return m_terminatorFound; }
113
114
115 /** Returns a pointer to the portion of the string that has not
116 been tokenized, i.e. the first character AFTER the terminator
117 character.
118 */
119 inline const char* remaining() const noexcept { return m_ptr; }
120
121
122protected:
123 /// Helpter method
124 void removeWhiteSpace( char* startOfTokenPtr, char* firstNonSpacePtr, char* lastNonSpacePtr ) noexcept;
125
126};
127
128
129}; // end namespaces
130};
131};
132#endif // end header latch
This concrete class tokenizes a Text Block that has the following format:
Definition TextBlock.h:64
bool isTerminated() const noexcept
Returns true if the parsing stopped because the terminator character was encounter (vs....
Definition TextBlock.h:112
TextBlock(char *string, char delimiter=',', char terminator=';', char quote='"', char escape='\\' )
Constructor.
void removeWhiteSpace(char *startOfTokenPtr, char *firstNonSpacePtr, char *lastNonSpacePtr) noexcept
Helpter method.
const char * remaining() const noexcept
Returns a pointer to the portion of the string that has not been tokenized, i.e.
Definition TextBlock.h:119
bool isValidTokens() const noexcept
Returns true if the string was successfully tokenized.
Definition TextBlock.h:100
const char * getParameter(unsigned index) const noexcept
Returns the Nth parameter (index starts with 0). Return a null pointer if index is out-of-range.
unsigned numParameters() const noexcept
Returns the number of parameter fields in the Text block.
Definition TextBlock.h:104
The 'Cpl' namespace is the root name space for the Colony.
Definition Api16.h:20