using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace TradeIdeas.MiscSupport { /// /// This class allows you to build lists appropriate to send to TCL. /// This code is based heavily on MiscSupport.C /// static public class TclList { /// /// Quote a string so you can display it for a user or use it in a TCL list. /// /// /// public static string TclQuote(this string toQuote) { StringBuilder result = new StringBuilder(); result.LAppend(toQuote); return result.ToString(); } /// /// Append an item to a list. /// /// This is the list. An empty StringBuilder is an empty list. /// The new item. This will be quoted if necessary. /// /// The destination. /// That makes it easy to append several things. /// list.append("a").append("b").append("c") /// public static StringBuilder LAppend(this StringBuilder destination, string toAppend) { if (destination.Length > 0) destination.Append(' '); if (toAppend == "") destination.Append("{}"); else { int inputLength = toAppend.Length; bool requireSlashQuote = false; bool requireBraceQuote = false; int braceCount = 0; for (int i = 0; (i < inputLength) && !requireSlashQuote; i++) { char ch = toAppend[i]; if ((ch < 32) || (ch == 127)) // Quote all unprintable characters. requireSlashQuote = true; else { switch (ch) { case ' ': case '"': requireBraceQuote = true; break; case '\\': requireBraceQuote = true; if (i + 1 == inputLength) requireSlashQuote = true; else i++; break; case '{': requireBraceQuote = true; braceCount++; break; case '}': requireBraceQuote = true; if (braceCount > 0) braceCount--; else requireSlashQuote = true; break; } } } if (braceCount != 0) // The braces did not match. requireSlashQuote = true; if (requireSlashQuote) { // This is the most comprehensive quoting solution. It can quote // anything. But the input can grow to 4 times the original size. // If applied recursively, the size could double each time. for (int i = 0; i < inputLength; i++) { char ch = toAppend[i]; switch (ch) { // Some of this effort is not required. \a does not need // to be quoted at all. \t could be quoted just like a brace // or an unprintable character. We go out of our way here to // make things more readable to a human. Also, sometimes it is // convenient to the programmer to get rid or \n and \r, // so the main input loop can use something simple like a gets(). case '\a': destination.Append("\\a"); break; case '\b': destination.Append("\\b"); break; case '\f': destination.Append("\\f"); break; case '\n': destination.Append("\\n"); break; case '\r': destination.Append("\\r"); break; case '\t': destination.Append("\\t"); break; case '\v': destination.Append("\\v"); break; case ' ': case '"': case '\\': case '{': case '}': case ';': case '$': case '[': case ']': destination.Append('\\'); destination.Append(ch); break; default: if ((ch < 32) || (ch == 127)) { // By the time we get here we know that we could include the // character as is and TCL would not complain. But this // is convenient to the human reader. destination.Append(string.Format(@"\{0}{1}{2}", ((ch >> 6) & 7), ((ch >> 3) & 7), (ch & 7))); // We always print these in octal and we always use 3 digits. // This is the most precise format used by TCL. 12\0034 will // be four characters long. '1', '2', (char)3, '4'. // 12\x034 on the other hand is interpreted as '1', '2', // (int)34. The 0 is ignored. Any number of characters // would be ignored in this position. } else { destination.Append(ch); } break; } } } else if (requireBraceQuote) { destination.Append('{'); destination.Append(toAppend); destination.Append('}'); } else destination.Append(toAppend); } return destination; } public static string Test() { StringBuilder standardChars = new StringBuilder(); for (char ch = (char)0; ch < 256; ch++) { standardChars.Append(ch); } string[] inputs = new string[] { "All chars", standardChars.ToString(), // should use slash quoting. See the code for each special case. "Empty string", "", // {} "Simple value", "simple_value", // simple_value "Simple list", "a b c", // {a b c} // just add { and } "Recursive list", "{a b} {{c d} {e f}} {} {g h}", // {{a b} {{c d} {e f}} {} {g h}} // just add { and } "Fancy recursive list", "{a b} \\\\ \\} \\{ {c d}", // {{a b} \\ \} \{ {c d}} // just add { and } "Ends with slash", "a bc\\", // a\ bc\\ // slash quoting of final slash and space "Too many opens", "{{{}}", // \{\{\{\}\} "Too many closes", "{{}}}", // \{\{\}\}\} "Wrong order", "{}}{{}", // \{\}\}\{\{\} "Quote and curly", "a\"b{c", // a\"b\{c // Add the slashes // This test case is aimed at a specific bug that was fixed recently. "Simple value with UTF-8", "“Hello_world”", // “Hello_world” "Simple list with UTF-8", "¡Hola! ¿Què pasa?", // {¡Hola! ¿Què pasa?} "UTF-8 and backslash", "Don’t stop here {" // Don’t\ stop\ here\ \{ }; StringBuilder result = new StringBuilder(); foreach (string s in inputs) result.LAppend(s); return result.ToString(); } /// /// Attempt to parse this string into a list of strings. Try to make this the same /// as TCL would do. /// /// We know we are not as smart (or complete) as TCL. Any backslashes will cause this /// to fail. Any list item which starts with a double quote will fail. (Double quotes /// in other places are fine.) /// /// Presumably this could be improved in the future. Presumably we could get smarter /// and correctly parse more strings. Don't rely on any of these failures. /// /// This is aimed at an import feature for the PrototypeEditor. This method handles /// the cases that we expect to find in that class. This means that the user can break /// something, but this is all based on what we expect the user to do. /// /// A string to parse. /// null if we are unable to parse. public static IList Parse(String input) { IList result = new List(); StringBuilder inProgress = null; bool inBraces = false; bool lastCharWasABraceAndClosedTheList = false; int braceCount = 0; foreach (Char ch in input) { switch (ch) { case ' ': case '\r': case '\t': case '\n': if (null != inProgress) { if (inBraces) inProgress.Append(ch); else { result.Add(inProgress.ToString()); inProgress = null; } } break; case '{': if (null == inProgress) { if (lastCharWasABraceAndClosedTheList) // Something like "{a}{}" If you start a list element with a { you // can't have anything but a whitespace or end of string following // the close brace. return null; inProgress = new StringBuilder(); inBraces = true; braceCount = 1; } else if (inBraces) { braceCount++; inProgress.Append(ch); } else // "ab{cd ef}gh" is a list with two items. "ab{cd" is the first item. // "ef}gh" is the second item. { and } are not considered special here. inProgress.Append(ch); break; case '}': // This is legal if strange. "a } c}d" is a valid list with three items, // "a", "}" and "c}d" In these cases } is not considered special. if (null == inProgress) { if (lastCharWasABraceAndClosedTheList) // Something like "{a}}" If you start a list element with a { you // can't have anything but a whitespace or end of string following // the close brace. return null; inProgress = new StringBuilder(); inBraces = false; braceCount = 0; inProgress.Append(ch); } else if (!inBraces) inProgress.Append(ch); else { braceCount--; if (braceCount == 0) { // List item complete. result.Add(inProgress.ToString()); inProgress = null; inBraces = false; } else // List item is not done yet. inProgress.Append(ch); } break; case '"': if (null != inProgress) // Quotes are okay in the middle of a list item. In fact, // this is common the way we do things. inProgress.Append(ch); else // This is valid TCL, but for simplicity we're not going to try to parse it. // A list item starting with a double quote. return null; break; case '\\': // This is valid TCL, but for simplicity we're not going to try to parse it. // The user could create a list like this, but we try to avoid that in general, // as it would be confusing for the user. return null; default: if (null == inProgress) { if (lastCharWasABraceAndClosedTheList) // Something like "{a}b" If you start a list element with a { you // can't have anything but a whitespace or end of string following // the close brace. return null; inProgress = new StringBuilder(); inBraces = false; braceCount = 0; } inProgress.Append(ch); break; } lastCharWasABraceAndClosedTheList = (ch == '}') && (null == inProgress); } if (null != inProgress) { if (inBraces) // We got to the end of something like "{a b} {c d". The last item, "c d", // is incomplete. return null; // Else the end of string correctly terminated the last item. result.Add(inProgress.ToString()); } return result; } } }