#ifndef __MiscSupport_h_ #define __MiscSupport_h_ #include #include #include #include #include #include #include #include #include "FixedMalloc.h" ///////////////////////////////////////////////////////////////////// // Like assert() // ///////////////////////////////////////////////////////////////////// // Value is always computed. We also call assert(value) if assertions are // enabled. Value is discarded either way. You do not get a warning either // way. This is useful when (a) a function has a side effect (b) the function // returns true on success, and (c) failure seems unlikely, but we still want // to check sometimes. template < class T > void assertTrue(T const &value) { assert(value); } template < class T > void assertFalse(T const &value) { assert(!value); } ///////////////////////////////////////////////////////////////////// // Support for maps ///////////////////////////////////////////////////////////////////// // A common map type. typedef std::map< std::string, std::string > PropertyList; // This looks up a value in a map and returns a default value. The most // important difference between this and [] is that this function never // modifies the map. [] will create a new entry if the value you are looking // for does not exist. This can cause a type of memory leak, if random values // are continuously added. template< class KeyType, class ValueType > ValueType getProperty(std::map< KeyType, ValueType > const &properties, KeyType const &propertyName, ValueType const &defaultValue) { typename std::map< KeyType, ValueType >::const_iterator item = properties.find(propertyName); if (item == properties.end()) { return defaultValue; } else { return item->second; } } // This is similar to the function above, but this only works on pointers, and // it always returns a pointer to the value in the map, or it returns NULL if // the item is not in the map. If the data type is already a pointer and // cannot be null, then the previous version makes a little more sense. This // version would give you a pointer to a pointer if the value type is a // pointer. template< class KeyType , class ValueType > ValueType const *getProperty(std::map< KeyType, ValueType > const &properties, KeyType const &propertyName) { typename std::map< KeyType, ValueType >::const_iterator item = properties.find(propertyName); if (item == properties.end()) { return NULL; } else { return &(item->second); } } // This is similar to the function above, but the table and the resulting // pointer are not const. This is the only version of the three that lets you // modify a value in the table. template< class KeyType , class ValueType > ValueType *getProperty(std::map< KeyType, ValueType > &properties, KeyType const &propertyName) { typename std::map< KeyType, ValueType >::iterator item = properties.find(propertyName); if (item == properties.end()) { return NULL; } else { return &(item->second); } } // This is similar to the first version of getProperty. This version, // however, creates a default value based on the type of the map. This // is mostly used when the value type is a pointer. In this case the // desired default is almost certainly NULL. However, the compiler has trouble // trying to match NULL to that template for some reason. template< class KeyType , class ValueType > ValueType getPropertyDefault(std::map< KeyType, ValueType > const &properties, KeyType const &propertyName) { return getProperty(properties, propertyName, ValueType()); } // This rather ugly specialization is required when you have a map with // std::string as the key type, and a literal string constant as the key. template< class ValueType > ValueType getPropertyDefault(std::map< std::string, ValueType > const &properties, char const *propertyName) { return getPropertyDefault(properties, std::string(propertyName)); } ///////////////////////////////////////////////////////////////////// // get host name // // A wrapper around the gethostname() system call. ///////////////////////////////////////////////////////////////////// // E.g. fogell.trade-ideas.com std::string getHostName(); // E.g. fogell std::string getShortHostName(); ///////////////////////////////////////////////////////////////////// // nextItem() // // This will add one more item to the end of the vector, and return // a reference to that item. The item is initialized to the default // item for that type. // // It's common for me to say something like Item &item = map["next"] // with std::map. This creates a new item in the map. It lets me // slowly manipulate that item to build it up. The alternative would // be to build the item in a local variable, then copy it into the // map. This is cleaner an more efficient. // // nextItem() is like [] in PHP. // nextItem(x) = 5; // C++ // x[] = 5; // PHP // That particular example isn't very interesting. I'd use // vector::push_back() in that case. But it helps illustrate what's // going on. ///////////////////////////////////////////////////////////////////// template < typename T > T &nextItem(std::vector< T > &vector) { const size_t size = vector.size(); vector.resize(size + 1); return vector[size]; } ///////////////////////////////////////////////////////////////////// // getLine // // This is good for parsing simple input. Each line is separated from the // next by a single \n. Each time you call getLine, everything before the // first \n is removed from the source and returned. The first \n is removed // and discarded. ///////////////////////////////////////////////////////////////////// std::string breakOnce(std::string &source, char breakAt); inline std::string getLine(std::string &source) { return breakOnce(source, '\n'); } ///////////////////////////////////////////////////////////////////// // Convert to and from string. // // These are all based on the standard C functions with similar names. // These use C++ strings to make the easier to use, including making them // thread safe. Some of these introduce a default value, as detecting // errors in the original version was a pain. ///////////////////////////////////////////////////////////////////// long int strtolDefault(const std::string &input, long int defaultValue); long int strtolDefault(const char *input, long int defaultValue); double strtodDefault(const std::string &input, double defaultValue); double strtodDefault(const char *input, double defaultValue); long unsigned int strtoulDefault(const std::string &input, long unsigned int defaultValue); long unsigned int strtoulDefault(const char *input, long unsigned int defaultValue); long long int strtollDefault(const std::string &input, long long int defaultValue); long long int strtollDefault(const char *input, long long int defaultValue); long long unsigned int strtoullDefault(const std::string &input, long long unsigned int defaultValue); long long unsigned int strtoullDefault(const char *input, long long unsigned int defaultValue); // These also convert from a string to a number. The type of the number comes // from the argument. In case of error, we throw an exception. In case of // an error, n might be changed to some invalid value. class InvalidConversionException { }; void nfroma(long int &n, std::string const &s); void nfroma(double &n, std::string const &s); // The convert from a number to a string. std::string itoa(int i); std::string ltoa(long i); std::string ultoa(unsigned long i); std::string lltoa(long long i); std::string ulltoa(unsigned long long i); std::string dtoa(double d); std::string dtoa(double d, int digits); std::string dtoaFixed(double d, int digits); // The previous functions are based on traditional the C style names. // For some reason if you don't use the right one you will not get a warning // message. This is also useful when you are using a typedef and you don't // know what you really have. This selects the right version for you. inline std::string ntoa(int i) { return itoa(i); } inline std::string ntoa(long i) { return ltoa(i); } inline std::string ntoa(unsigned long i) { return ultoa(i); } inline std::string ntoa(long long i) { return lltoa(i); } inline std::string ntoa(unsigned long long i) { return ulltoa(i); } inline std::string ntoa(double d) { return dtoa(d); } std::string addCommas(int64_t n); // In g++ it appears that int is always 32 bits. long and long long are always // 64 bits. It doesn't matter if you are compiling for a 32 or 64 bit machine. // Pointers, on the other hand, will be 32 or 64 bits depending on the machine. // This prints between 2 and 4 digits after the decimal. // 10 => 10.00 // 10.1 => 10.10 // 10.02 => 10.02 // 10.03401 => 10.034 // 10.03451 => 10.0345 // 10.00005 => 10.0001 // 10.00004 => 10.00 std::string priceToString(double d); // Similar to priceToString(), but leaves between 0 and 2 // digits after the decimal. std::string percentToString(double d); std::string pointerToString(void const *p); // Converts a time into a string. The result is somewhat verbose and good // for log files. This is a wrapper around the ctime() system call to make // it easier to use. std::string ctimeString(time_t t); // This is similar to to ctimeString, but the input is in microseconds. // This works well with the result of getMicroTime() or // TimeValue::asMicroSeconds(). std::string cMicroTimeString(int64_t t); // Converts errno into a string. std::string errorString(); // A wrapper around realpath(). // http://man7.org/linux/man-pages/man3/realpath.3.html // On error this returns "" and errorString() should tell you more. std::string realpathString(std::string const &path); // Describes the type in a user friendly manner. // // If you are dealing with a pointer, you probably want to dereference the // pointer. That will give you the name of the actual object. Otherwise // you will get the type of the variable, which might be a base class. // // Bad example: All calls to this function will return the same string. // The string is known at compile time. This is probably NOT what you want. // unmangledName(typeid (this)) --> IContainerThreadUser* // // Good example: Each call to this function can give a different result // depending on the current value of this. This is usually what you want. // unmangledName(typeid (*this)) --> TopListMicroService::DataProvider std::string unmangledName(std::type_info const &type); ///////////////////////////////////////////////////////////////////// // RefCount // // This class provides a simple smart pointer type. The normal way to use this // to to create a RefCount object around a "new" pointer, then get rid of // any other references to that pointer. When you copy a RefCount object, // you are bumping the internal refcount. When the internal refcount goes to // 0 the underlying pointer is deleted. // // NULL pointers are allowed. A null pointer is interpreted as false, anything // else as true. // // == and != mean that you are pointing to the same underlying pointer. You // cannot compare a RefCount pointer to a normal pointer. These only work // if you follow the rules and only produce one RefCount directly from each // (non-null) pointer. // // A RefCount object is not thread safe. If you duplicate a RefCount object, // the new object and the original object share some underlying data // structures. You cannot safely use these two objects in different threads. // If every access to a RefCount object uses one common mutex, then the // operations become thread safe. ///////////////////////////////////////////////////////////////////// template< class baseType > class RefCount { private: struct Container : FixedMalloc { unsigned int count; baseType *basePointer; } *_container; void decrement() { if (_container) { _container->count--; if (!_container->count) { delete _container->basePointer; delete _container; _container = NULL; } } } void increment() { if (_container) { _container->count++; } } public: RefCount(baseType *basePointer = NULL) { if (basePointer) { _container = new Container; _container->count = 1; _container->basePointer = basePointer; } else { _container = NULL; } } ~RefCount() { decrement(); } RefCount(RefCount const &other) { _container = other._container; increment(); } RefCount &operator =(RefCount const &other) { if (this != &other) { decrement(); _container = other._container; increment(); } return *this; } baseType &operator *() { if (_container) { return *(_container->basePointer); } else { return *(baseType *)NULL; } } baseType *operator ->() { if (_container) { return _container->basePointer; } else { return NULL; } } baseType const &operator *() const { if (_container) { return *(_container->basePointer); } else { return *(baseType *)NULL; } } baseType const *operator ->() const { if (_container) { return _container->basePointer; } else { return NULL; } } operator bool() const { return _container; } bool operator !() const { return !_container; } bool operator ==(RefCount const &other) const { return _container == other._container; } bool operator !=(RefCount const &other) const { return _container != other._container; } }; ///////////////////////////////////////////////////////////////////// // getMicroTime() returns the number of microseconds since the Unix // epoch. // // We typically use TimeVal as a high precision clock. The interface // to TimeVal is unnecessarily complicated. In retrospect, most of // the time we really wanted getMicroTime(). // // When running on Joey-Mousepad I found calls to getMicroTime() // typically took 73 nanoseconds, Maybe one time in 20 jumping up // into the 85 - 110 nanosecond range. // // I ran another test on Joey-Mousepad. I tried clock_gettime(). // Short version, getMicroTime() is just as efficient as // clock_gettime(), but slightly more convenient. The details depend // on which clock you select. // // CLOCK_REALTIME gives results similar to getMicroTime() but precise // to the nanosecond. It consistantly took 73½ nanoseconds. That's // too close to the speed of getMicroTime() to be a coincidence. // Under the hood these two functions must be using the same // mechanism. // // CLOCK_MONOTONIC was actually slightly slower, at 77¼ nanoseconds // per call. I'm not sure why you'd use that. // // CLOCK_PROCESS_CPUTIME_ID and CLOCK_THREAD_CPUTIME_ID took 295¼ // and 276½ nanoseconds, respectively. These offer additional // functionality, but there are some caviets so read the man page // carefully before using them. ///////////////////////////////////////////////////////////////////// int64_t getMicroTime(); ///////////////////////////////////////////////////////////////////// // StopWatch // // Very often we use getMicroTime() (or TimeVal) to measure how long // it took to do a certain task. Or we often have a series of tasks, // one right after the next, and we want to time them all. StopWatch // makes it easy and convenient to so exactly that. // // Create a variable of type StopWatch before you start the first // task. Call getMicroSeconds() each time a task finishes. This // will return the time required to perform the task that just // finished. ///////////////////////////////////////////////////////////////////// class StopWatch { private: int64_t _last; public: // The clock starts as soon as you create this objet. StopWatch() : _last(getMicroTime()) { } // Get the number of microseconds since the last time we reset and then // immediately reset. int64_t getMicroSeconds() { int64_t start = _last; _last = getMicroTime(); return _last - start; } }; ///////////////////////////////////////////////////////////////////// // TimeVal // // This is a simple wrapper around timeval, primarily intended to give us // < so we can use it as a key in a map. ///////////////////////////////////////////////////////////////////// struct TimeVal { time_t tv_sec; /* seconds */ suseconds_t tv_usec; /* microseconds */ bool operator <(TimeVal const &b) const { return (tv_sec < b.tv_sec) || ((tv_sec == b.tv_sec) && (tv_usec < b.tv_usec)); } bool operator >(TimeVal const &b) const { return (tv_sec > b.tv_sec) || ((tv_sec == b.tv_sec) && (tv_usec > b.tv_usec)); } bool operator ==(TimeVal const &b) const { return (tv_sec == b.tv_sec) && (tv_usec == b.tv_usec); } bool operator <=(TimeVal const &b) const { return (*this < b) || (*this == b); } bool operator >=(TimeVal const &b) const { return (*this > b) || (*this == b); } TimeVal &addSeconds(time_t seconds) { tv_sec += seconds; return *this; } TimeVal &addMinutes(time_t minutes) { tv_sec += minutes * 60; return *this; } TimeVal &addHours(time_t hours) { tv_sec += hours * 3600; return *this; } typedef long long int Microseconds; Microseconds asMicroseconds() const { Microseconds returnValue = tv_sec; returnValue *= 1000000; returnValue += tv_usec; return returnValue; } TimeVal &addMicroseconds(Microseconds microseconds) { microseconds += asMicroseconds(); if (microseconds < 0) { microseconds = 0; } tv_sec = microseconds / 1000000; tv_usec = microseconds % 1000000; return *this; } TimeVal &addMilliseconds(int milliseconds) { Microseconds microseconds = milliseconds; microseconds *= 1000; return addMicroseconds(microseconds); } void currentTime() { int result = gettimeofday((timeval *)this, NULL); assert(!result); } TimeVal(bool initializeWithCurrentTime = false) { if (initializeWithCurrentTime) { currentTime(); } else { tv_sec = 0; tv_usec = 0; } } TimeVal(time_t unixTimeStamp) { tv_sec = unixTimeStamp; tv_usec = 0; } TimeVal(timeval const &tv) { tv_sec = tv.tv_sec; tv_usec = tv.tv_usec; } void clear() { // We used to say t = 0, but the compiler found that ambiguous. // (Either converting 0 to a time_t (which is actually a long int, but // that's hidden and hard to find) or to false would have given you the // same result, which is the same as clear, but this makes the code // simpler.) tv_sec = 0; tv_usec = 0; } operator bool() const { return tv_sec || tv_usec; } std::string ctimeString() const { return ::ctimeString(tv_sec); } timeval waitTime() const { // This is ideal for a select() statement. This says how far in the // future the time in this object is. This output is designed for use // with select(). timeval result; TimeVal now(true); if (now < *this) { result.tv_sec = tv_sec - now.tv_sec; // Create a new variable to make sure this is a signed computation. long usec = tv_usec - now.tv_usec; if (usec < 0) { // borrow one second. usec += 1000000; result.tv_sec--; } result.tv_usec = usec; } else { result.tv_sec = 0; result.tv_usec = 0; } return result; } }; ///////////////////////////////////////////////////////////////////// // Support for URLs ///////////////////////////////////////////////////////////////////// // Convert from the standard url encoding. This would take as input everything // after the question mark. This assumes that the names and values are both // url encoded. This overwrites whetever is already in the property list, if // the names overlap. void parseUrlRequest(PropertyList &addToHere, std::string fromHere); // Standard url decode. This will decode %20 or + into space. std::string urlDecode(std::string input); // URL encode. Everything but -, _, and alpha-numeric characters are encoded. // If raw is true, this is like php's rawurlencode, and space is quoted like // all other characters, with % and two hex digits. Otherwise, this is like // php's urlencode, and space is encoded as +, and the decimal point is encoded as itself. std::string urlEncode(std::string input, bool raw = true); std::string urlEncode(PropertyList const &input); // This does two things. First, it looks for illegal characters, like \n. // If it finds any it returns the emptry string. This is used when you are // receiving URL encoded data from one source, the passing it on to another. // // This also translates * into = and ^ into &. This is a trick useful for // testing and debugging. The debug version of the command interpreter uses // URL encoding on each argument, and some of the arguments were already URL // encoded. Doing the URL encoding twice is not a problem for the computer, // it it is a pain to do by hand. So a person can take a urlencoded argument, // and do the reverse of the translations above, then it is safe to add the // result to a command. std::string makeUrlSafe(std::string original); // Appends the second string to the end of the first string. // Adds a "&", but only if required. void addToUrlEncoded(std::string &result, std::string const &toAdd); ///////////////////////////////////////////////////////////////////// // FLF // // This is for reporting to the log. We normally start with these // three items in any message. I would add this to the header file // for the log, but we have more than one of those! Usually this // is used with a TCL list, although I suppose it would work with // a stream, too. ///////////////////////////////////////////////////////////////////// #define FLF __FILE__<<__LINE__<<__FUNCTION__ ///////////////////////////////////////////////////////////////////// // TCL quoting // // These functions allow you to write a string from C++ in such a way // that it can be read back into TCL as a list. This includes // quoting of all characters which are special to a list. This also // quotes characters which could be confusing to a human reader. // // Why TCL style quoting? These functions were originally created // for use in a custom log file. Of course this allows us to produce // files which can be read in by TCL, even though the main program // does not use TCL. This format is also very readable by humans. // And this format is especially nice when outputting recursive // data structures. // // There are multiple legal ways to encode a list in TCL. (In fact // there are an infinite number of ways to encode most lists.) All // of these will be decoded to generate the same result. // // Our choices are generally similar to the choices used by TCL when // it converts a list to a string. We do not quote certain // characters, like [, so these routines are only good for producing // TCL data, not TCL code. We quote all unprintable characters, so a // human reader may read the output more easily. In particular, the // \n and \r characters are quoted, so that you can use \n or \r\n as // a record separator. // // Note: Until recently we assumed that everything above 127 was an // unprintable character. Now that UTF-8 is used in more places, we // now treat these like normal alpha numeric characters. We do not // do any sanity checking on these characters. If the input was valid // UTF-8, the output will also be valid UTF-8. ///////////////////////////////////////////////////////////////////// // This encodes a single string the way that TCL quotes a list item. // If TCL will read in the result, this is not very interesting; // this is like making a list with one item, s. However, TCL-style // quoting can be useful in other places. std::string tclQuote(const std::string s); // This encodes a list as a string in a manner compatible with TCL. // start and end are an STL-style sequence of items which are, or // can be implicitly converted to, strings. template < class In > std::string tclList(In start, In end) { bool first = true; std::string result; for (; start != end; start++) { if (first) { first = false; } else { result += ' '; } result += tclQuote(*start); } return result; } // This encodes a list in the same way as above, but it allows you to do it // one entry at a time, like the lappend command in TCL. // // Some of these operations are slightly faster than the previous way of making // a list. Specifically, the integer versions are faster because we know that // an integer does not need to be quoted. But the primary reason for this // list is that it's just more convenient to have the conversion and the // container combined into one. // // The bulk of these operations are only required to avoid ambiguity, and // this really isn't as complicated as it may appear at first. // char * is converted to string in the obvious way. A character is treated // as a string that is one character long. All other integers are converted // to decimal. class TclList { private: std::string _s; void addSeperator(); public: TclList() { } template< typename A, typename B > TclList(A const &a, B const &b) { (*this)< TclList(A const &a, B const &b, C const &c) { (*this)< TclList(A const &a, B const &b, C const &c, D const &d) { (*this)< TclList(A const &a, B const &b, C const &c, D const &d, E const &e) { (*this)< TclList(A const &a, B const &b, C const &c, D const &d, E const &e, F const &f) { (*this)< TclList(A const &a, B const &b, C const &c, D const &d, E const &e, F const &f, G const &g) { (*this)< TclList &operator <<(T const *p) { TclList pair; if (p) // Check the type of the actual object at run time. This will explicitly // throw an exception if p is NULL. Warning: If a type contains RTTI // but the pointer is invalid, this will cause a segmentation violation. pair< TclList &operator <<(std::vector< T > const &v) { TclList sublist; for (typename std::vector< T >::const_iterator it = v.begin(); it != v.end(); it++) sublist<<*it; (*this)< TclList &operator <<(std::map< K, V > const &map) { TclList mapAsList; for (typename std::map< K, V >::const_iterator it = map.begin(); it != map.end(); it++) mapAsList<<(TclList()<first<second); (*this)< void append(T const &first, Args... rest) { (*this)< void maximize(T &var) { var = std::numeric_limits< T >::max(); } template < class T > void minimize(T &var) { // std::numeric_limits<::min() doesn't mean what you probably think. // lowest() is the smallest finite value of the type, like -128 for int8_t, 0 // for uint8_t, or -FLT_MAX for float. min() gives the same result for // integer types, but strange and totally different things for floating // points types. That's another reason I wrote minimize(), so I don't have // to remember min() vs lowest(). var = std::numeric_limits< T >::lowest(); } template < class T > T maxLike(T var) { maximize(var); return var; } template < class T > T minLike(T var) { minimize(var); return var; } ///////////////////////////////////////////////////////////////////// // ThreadId // // This is a simple and fast way to know what thread you are in. // // There is a system call to ask what thread you are in. That's nice // because that number is available to all processes. But it's bad // because its a system call so it's slow and expensive. // // pthreads offers an alternative. Unfortunately the id is an opaque // type and you can't do much with it. Our implemention uses a // simple pointer in the normal way. E.g. you know that NULL defines // a ThreadId that is never used for a real thread, you can compare // with the standard operators, you can make the value a key in a // tree or hash table. // // Under Linux and GCC the pthreads version is implemented the same // way that this code is. But we export the pointer directly rather // than wrapping it up to look like the pthread version. ///////////////////////////////////////////////////////////////////// class ThreadId { // The class mainly exists as a scope. We could define _memory directly // in current(), but not if current() is inline. And we want current() to // be inline so it will be really fast. static __thread bool _memory; public: // There aren't actually any ThreadId objects. You can manipulate ThreadId // pointers like normal pointers, but you can't do anything else with them. ThreadId() = delete; ThreadId(ThreadId const &) = delete; void operator =(ThreadId const &) = delete; static ThreadId *current() { return (ThreadId *)&_memory; } }; ///////////////////////////////////////////////////////////////////// // Misc string stuff. Mostly based on PHP. ///////////////////////////////////////////////////////////////////// std::string trim(std::string s, std::string whiteSpace = " "); std::string strtoupper(std::string s); // This is the same as above. I have to do this to avoid a name conflict with // SpryWare. std::string trim__(std::string s, std::string whiteSpace = " "); std::vector< std::string > explode(std::string seperator, std::string s); template < class InputIterator > std::string implode(std::string glue, InputIterator piecesStart, InputIterator piecesEnd) { std::string result; for (InputIterator it = piecesStart; it != piecesEnd; it++) { if (it != piecesStart) { result += glue; } result += *it; } return result; } template < class InputIterator, class ConvertFunction > std::string implode(std::string glue, InputIterator piecesStart, InputIterator piecesEnd, ConvertFunction convertFunction) { std::string result; for (InputIterator it = piecesStart; it != piecesEnd; it++) { if (it != piecesStart) { result += glue; } result += convertFunction(*it); } return result; } #endif