#ifndef __HashHelper_h_ #define __HashHelper_h_ #include #include #include #include // This file might be obsolete. C++14 now allows for "heteregeneous lookup". // That means that (a) you can store a std::string as an element in a set or a // key in a map (b) you can look for that item using a a char * and (c) the // search key won't be converted to a std::string. That's basically what // we're doing here. I don't have all the details, but you might start here: // http://stackoverflow.com/questions/20317413/what-are-transparent-comparators struct SimpleCharStarHash { size_t operator()(char const *s) const { // This is the same function used by gcc for hashing std::string types. return std::_Hash_impl::hash(s, strlen(s)); } }; struct SimpleCharStarEqual { bool operator()(char const *a, char const *b) const { return !strcmp(a, b); } }; // The goal is a quick way to look up a char * to see if it is in a set. In // particular, we don't want to create a std::string for every lookup. We // store a std::unique_ptr< char[] > so we know the char * in our table will // remain valid. Creating and updating the table doesn't have to be as fast // as reading from the table. // // Older versions stored a std::string, rather than a unique_ptr. But the // implementation of std::string changed, so that doesn't work on newer // compilers. // // Note that the default unordered_set / unordered_map on char * will only // compare pointer identity. It will not look at the contents of the string. typedef std::unordered_map< char const *, std::unique_ptr< char[] >, SimpleCharStarHash, SimpleCharStarEqual > CharStarHashSet; // We want a char * that will exist until we are done with it. std::string // contains a char * and does automatic memory management, but there are // issues. In particular, if you ask for the underlying char *, that value // is only good for a limited time. That is to say the character data might // get moved to a new location at unexpected times. Worse yet, the details of // how long the char * will be good depends on the version of the compiler! // makeUniquePtr() does what we need. inline std::unique_ptr< char[] > makeUniquePtr(std::string const &src) { std::unique_ptr< char[] > result(new char[src.length()+1]); memcpy(result.get(), src.c_str(), src.length()+1); return result; } // makeSharedPtr() is similar to makeUniquePtr, but it returns a // std::shared_ptr instead of a std::unique_ptr. This is useful if you // are creating a map that needs to be copied. The new and old map can // both use the same char * in their keys, as long as they both contain // the same shard_ptr in the corresponding data. I.e. the default copy // constructor for std::map or std::unordered_map will work perfectly. inline std::shared_ptr< const char > makeSharedPtr(std::string const &src) { char *result = (char *)malloc(src.length()+1); memcpy(result, src.c_str(), src.length()+1); return std::shared_ptr< const char >(result, [](char const *x) { free((void *)x); }); } inline void addString(CharStarHashSet &h, std::string const &s) { // Note: We have to check if the string is already in the table. Otherwise // bad things will happen. If you use [] to replace an element, the key will // stay the same as what was already in the table, but the value in the table // will be replaced by the new value. // // In a lot of cases that's good enough. In fact that's indistinguishable, // most of the time. If two keys are the same, it doesn't matter which one // we use. // // But this is a special case. The pointer in the key has to be the exact // same pointer as is used in the string object associated with that key. // Otherwise, there are no gaurentees what the key pointer is pointing to. // It could point to random data. It my tests it often pointed to an item // that I added to the hash table soon after. But it could also cause a // core dump. if (!h.count(s.c_str())) { auto persistent = makeUniquePtr(s); h[persistent.get()] = std::move(persistent); } } #endif