604 lines
16 KiB
C++
604 lines
16 KiB
C++
// Copyright 2013 Dolphin Emulator Project
|
|
// Licensed under GPLv2
|
|
// Refer to the license.txt file included.
|
|
|
|
#include <boost/range/algorithm.hpp>
|
|
|
|
#include "common/common.h"
|
|
#include "common/string_util.h"
|
|
|
|
#ifdef _WIN32
|
|
#include <Windows.h>
|
|
#include <codecvt>
|
|
#else
|
|
#include <iconv.h>
|
|
#endif
|
|
|
|
namespace Common {
|
|
|
|
/// Make a string lowercase
|
|
std::string ToLower(std::string str) {
|
|
boost::transform(str, str.begin(), ::tolower);
|
|
return str;
|
|
}
|
|
|
|
/// Make a string uppercase
|
|
std::string ToUpper(std::string str) {
|
|
boost::transform(str, str.begin(), ::toupper);
|
|
return str;
|
|
}
|
|
|
|
// faster than sscanf
|
|
bool AsciiToHex(const char* _szValue, u32& result)
|
|
{
|
|
char *endptr = nullptr;
|
|
const u32 value = strtoul(_szValue, &endptr, 16);
|
|
|
|
if (!endptr || *endptr)
|
|
return false;
|
|
|
|
result = value;
|
|
return true;
|
|
}
|
|
|
|
bool CharArrayFromFormatV(char* out, int outsize, const char* format, va_list args)
|
|
{
|
|
int writtenCount;
|
|
|
|
#ifdef _WIN32
|
|
// You would think *printf are simple, right? Iterate on each character,
|
|
// if it's a format specifier handle it properly, etc.
|
|
//
|
|
// Nooooo. Not according to the C standard.
|
|
//
|
|
// According to the C99 standard (7.19.6.1 "The fprintf function")
|
|
// The format shall be a multibyte character sequence
|
|
//
|
|
// Because some character encodings might have '%' signs in the middle of
|
|
// a multibyte sequence (SJIS for example only specifies that the first
|
|
// byte of a 2 byte sequence is "high", the second byte can be anything),
|
|
// printf functions have to decode the multibyte sequences and try their
|
|
// best to not screw up.
|
|
//
|
|
// Unfortunately, on Windows, the locale for most languages is not UTF-8
|
|
// as we would need. Notably, for zh_TW, Windows chooses EUC-CN as the
|
|
// locale, and completely fails when trying to decode UTF-8 as EUC-CN.
|
|
//
|
|
// On the other hand, the fix is simple: because we use UTF-8, no such
|
|
// multibyte handling is required as we can simply assume that no '%' char
|
|
// will be present in the middle of a multibyte sequence.
|
|
//
|
|
// This is why we lookup an ANSI (cp1252) locale here and use _vsnprintf_l.
|
|
static locale_t c_locale = nullptr;
|
|
if (!c_locale)
|
|
c_locale = _create_locale(LC_ALL, ".1252");
|
|
writtenCount = _vsnprintf_l(out, outsize, format, c_locale, args);
|
|
#else
|
|
writtenCount = vsnprintf(out, outsize, format, args);
|
|
#endif
|
|
|
|
if (writtenCount > 0 && writtenCount < outsize)
|
|
{
|
|
out[writtenCount] = '\0';
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
out[outsize - 1] = '\0';
|
|
return false;
|
|
}
|
|
}
|
|
|
|
std::string StringFromFormat(const char* format, ...)
|
|
{
|
|
va_list args;
|
|
char *buf = nullptr;
|
|
#ifdef _WIN32
|
|
int required = 0;
|
|
|
|
va_start(args, format);
|
|
required = _vscprintf(format, args);
|
|
buf = new char[required + 1];
|
|
CharArrayFromFormatV(buf, required + 1, format, args);
|
|
va_end(args);
|
|
|
|
std::string temp = buf;
|
|
delete[] buf;
|
|
#else
|
|
va_start(args, format);
|
|
if (vasprintf(&buf, format, args) < 0)
|
|
ERROR_LOG(COMMON, "Unable to allocate memory for string");
|
|
va_end(args);
|
|
|
|
std::string temp = buf;
|
|
free(buf);
|
|
#endif
|
|
return temp;
|
|
}
|
|
|
|
// For Debugging. Read out an u8 array.
|
|
std::string ArrayToString(const u8 *data, u32 size, int line_len, bool spaces)
|
|
{
|
|
std::ostringstream oss;
|
|
oss << std::setfill('0') << std::hex;
|
|
|
|
for (int line = 0; size; ++data, --size)
|
|
{
|
|
oss << std::setw(2) << (int)*data;
|
|
|
|
if (line_len == ++line)
|
|
{
|
|
oss << '\n';
|
|
line = 0;
|
|
}
|
|
else if (spaces)
|
|
oss << ' ';
|
|
}
|
|
|
|
return oss.str();
|
|
}
|
|
|
|
// Turns " hej " into "hej". Also handles tabs.
|
|
std::string StripSpaces(const std::string &str)
|
|
{
|
|
const size_t s = str.find_first_not_of(" \t\r\n");
|
|
|
|
if (str.npos != s)
|
|
return str.substr(s, str.find_last_not_of(" \t\r\n") - s + 1);
|
|
else
|
|
return "";
|
|
}
|
|
|
|
// "\"hello\"" is turned to "hello"
|
|
// This one assumes that the string has already been space stripped in both
|
|
// ends, as done by StripSpaces above, for example.
|
|
std::string StripQuotes(const std::string& s)
|
|
{
|
|
if (s.size() && '\"' == s[0] && '\"' == *s.rbegin())
|
|
return s.substr(1, s.size() - 2);
|
|
else
|
|
return s;
|
|
}
|
|
|
|
bool TryParse(const std::string &str, u32 *const output)
|
|
{
|
|
char *endptr = nullptr;
|
|
|
|
// Reset errno to a value other than ERANGE
|
|
errno = 0;
|
|
|
|
unsigned long value = strtoul(str.c_str(), &endptr, 0);
|
|
|
|
if (!endptr || *endptr)
|
|
return false;
|
|
|
|
if (errno == ERANGE)
|
|
return false;
|
|
|
|
#if ULONG_MAX > UINT_MAX
|
|
if (value >= 0x100000000ull
|
|
&& value <= 0xFFFFFFFF00000000ull)
|
|
return false;
|
|
#endif
|
|
|
|
*output = static_cast<u32>(value);
|
|
return true;
|
|
}
|
|
|
|
bool TryParse(const std::string &str, bool *const output)
|
|
{
|
|
if ("1" == str || "true" == ToLower(str))
|
|
*output = true;
|
|
else if ("0" == str || "false" == ToLower(str))
|
|
*output = false;
|
|
else
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
std::string StringFromBool(bool value)
|
|
{
|
|
return value ? "True" : "False";
|
|
}
|
|
|
|
bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _pFilename, std::string* _pExtension)
|
|
{
|
|
if (full_path.empty())
|
|
return false;
|
|
|
|
size_t dir_end = full_path.find_last_of("/"
|
|
// windows needs the : included for something like just "C:" to be considered a directory
|
|
#ifdef _WIN32
|
|
":"
|
|
#endif
|
|
);
|
|
if (std::string::npos == dir_end)
|
|
dir_end = 0;
|
|
else
|
|
dir_end += 1;
|
|
|
|
size_t fname_end = full_path.rfind('.');
|
|
if (fname_end < dir_end || std::string::npos == fname_end)
|
|
fname_end = full_path.size();
|
|
|
|
if (_pPath)
|
|
*_pPath = full_path.substr(0, dir_end);
|
|
|
|
if (_pFilename)
|
|
*_pFilename = full_path.substr(dir_end, fname_end - dir_end);
|
|
|
|
if (_pExtension)
|
|
*_pExtension = full_path.substr(fname_end);
|
|
|
|
return true;
|
|
}
|
|
|
|
void BuildCompleteFilename(std::string& _CompleteFilename, const std::string& _Path, const std::string& _Filename)
|
|
{
|
|
_CompleteFilename = _Path;
|
|
|
|
// check for seperator
|
|
if (DIR_SEP_CHR != *_CompleteFilename.rbegin())
|
|
_CompleteFilename += DIR_SEP_CHR;
|
|
|
|
// add the filename
|
|
_CompleteFilename += _Filename;
|
|
}
|
|
|
|
void SplitString(const std::string& str, const char delim, std::vector<std::string>& output)
|
|
{
|
|
std::istringstream iss(str);
|
|
output.resize(1);
|
|
|
|
while (std::getline(iss, *output.rbegin(), delim))
|
|
output.push_back("");
|
|
|
|
output.pop_back();
|
|
}
|
|
|
|
std::string TabsToSpaces(int tab_size, const std::string &in)
|
|
{
|
|
const std::string spaces(tab_size, ' ');
|
|
std::string out(in);
|
|
|
|
size_t i = 0;
|
|
while (out.npos != (i = out.find('\t')))
|
|
out.replace(i, 1, spaces);
|
|
|
|
return out;
|
|
}
|
|
|
|
std::string ReplaceAll(std::string result, const std::string& src, const std::string& dest)
|
|
{
|
|
size_t pos = 0;
|
|
|
|
if (src == dest)
|
|
return result;
|
|
|
|
while ((pos = result.find(src, pos)) != std::string::npos)
|
|
{
|
|
result.replace(pos, src.size(), dest);
|
|
pos += dest.length();
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// UriDecode and UriEncode are from http://www.codeguru.com/cpp/cpp/string/conversions/print.php/c12759
|
|
// by jinq0123 (November 2, 2006)
|
|
|
|
// Uri encode and decode.
|
|
// RFC1630, RFC1738, RFC2396
|
|
|
|
//#include <string>
|
|
//#include <assert.h>
|
|
|
|
const char HEX2DEC[256] =
|
|
{
|
|
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
|
|
/* 0 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
|
|
/* 1 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
|
|
/* 2 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
|
|
/* 3 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,16,16, 16,16,16,16,
|
|
|
|
/* 4 */ 16,10,11,12, 13,14,15,16, 16,16,16,16, 16,16,16,16,
|
|
/* 5 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
|
|
/* 6 */ 16,10,11,12, 13,14,15,16, 16,16,16,16, 16,16,16,16,
|
|
/* 7 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
|
|
|
|
/* 8 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
|
|
/* 9 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
|
|
/* A */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
|
|
/* B */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
|
|
|
|
/* C */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
|
|
/* D */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
|
|
/* E */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
|
|
/* F */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16
|
|
};
|
|
|
|
std::string UriDecode(const std::string & sSrc)
|
|
{
|
|
// Note from RFC1630: "Sequences which start with a percent sign
|
|
// but are not followed by two hexadecimal characters (0-9, A-F) are reserved
|
|
// for future extension"
|
|
|
|
const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
|
|
const size_t SRC_LEN = sSrc.length();
|
|
const unsigned char * const SRC_END = pSrc + SRC_LEN;
|
|
const unsigned char * const SRC_LAST_DEC = SRC_END - 2; // last decodable '%'
|
|
|
|
char * const pStart = new char[SRC_LEN];
|
|
char * pEnd = pStart;
|
|
|
|
while (pSrc < SRC_LAST_DEC)
|
|
{
|
|
if (*pSrc == '%')
|
|
{
|
|
char dec1, dec2;
|
|
if (16 != (dec1 = HEX2DEC[*(pSrc + 1)])
|
|
&& 16 != (dec2 = HEX2DEC[*(pSrc + 2)]))
|
|
{
|
|
*pEnd++ = (dec1 << 4) + dec2;
|
|
pSrc += 3;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
*pEnd++ = *pSrc++;
|
|
}
|
|
|
|
// the last 2- chars
|
|
while (pSrc < SRC_END)
|
|
*pEnd++ = *pSrc++;
|
|
|
|
std::string sResult(pStart, pEnd);
|
|
delete [] pStart;
|
|
return sResult;
|
|
}
|
|
|
|
// Only alphanum is safe.
|
|
const char SAFE[256] =
|
|
{
|
|
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
|
|
/* 0 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
|
|
/* 1 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
|
|
/* 2 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
|
|
/* 3 */ 1,1,1,1, 1,1,1,1, 1,1,0,0, 0,0,0,0,
|
|
|
|
/* 4 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
|
|
/* 5 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
|
|
/* 6 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
|
|
/* 7 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
|
|
|
|
/* 8 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
|
|
/* 9 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
|
|
/* A */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
|
|
/* B */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
|
|
|
|
/* C */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
|
|
/* D */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
|
|
/* E */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
|
|
/* F */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0
|
|
};
|
|
|
|
std::string UriEncode(const std::string & sSrc)
|
|
{
|
|
const char DEC2HEX[16 + 1] = "0123456789ABCDEF";
|
|
const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
|
|
const size_t SRC_LEN = sSrc.length();
|
|
unsigned char * const pStart = new unsigned char[SRC_LEN * 3];
|
|
unsigned char * pEnd = pStart;
|
|
const unsigned char * const SRC_END = pSrc + SRC_LEN;
|
|
|
|
for (; pSrc < SRC_END; ++pSrc)
|
|
{
|
|
if (SAFE[*pSrc])
|
|
*pEnd++ = *pSrc;
|
|
else
|
|
{
|
|
// escape this char
|
|
*pEnd++ = '%';
|
|
*pEnd++ = DEC2HEX[*pSrc >> 4];
|
|
*pEnd++ = DEC2HEX[*pSrc & 0x0F];
|
|
}
|
|
}
|
|
|
|
std::string sResult((char *)pStart, (char *)pEnd);
|
|
delete [] pStart;
|
|
return sResult;
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
|
|
std::string UTF16ToUTF8(const std::u16string& input)
|
|
{
|
|
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
|
|
return convert.to_bytes(input);
|
|
}
|
|
|
|
std::u16string UTF8ToUTF16(const std::string& input)
|
|
{
|
|
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
|
|
return convert.from_bytes(input);
|
|
}
|
|
|
|
static std::string UTF16ToUTF8(const std::wstring& input)
|
|
{
|
|
auto const size = WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), nullptr, 0, nullptr, nullptr);
|
|
|
|
std::string output;
|
|
output.resize(size);
|
|
|
|
if (size == 0 || size != WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), &output[0], output.size(), nullptr, nullptr))
|
|
output.clear();
|
|
|
|
return output;
|
|
}
|
|
|
|
static std::wstring CPToUTF16(u32 code_page, const std::string& input)
|
|
{
|
|
auto const size = MultiByteToWideChar(code_page, 0, input.data(), input.size(), nullptr, 0);
|
|
|
|
std::wstring output;
|
|
output.resize(size);
|
|
|
|
if (size == 0 || size != MultiByteToWideChar(code_page, 0, input.data(), input.size(), &output[0], output.size()))
|
|
output.clear();
|
|
|
|
return output;
|
|
}
|
|
|
|
std::wstring UTF8ToUTF16W(const std::string &input)
|
|
{
|
|
return CPToUTF16(CP_UTF8, input);
|
|
}
|
|
|
|
std::string SHIFTJISToUTF8(const std::string& input)
|
|
{
|
|
return UTF16ToUTF8(CPToUTF16(932, input));
|
|
}
|
|
|
|
std::string CP1252ToUTF8(const std::string& input)
|
|
{
|
|
return UTF16ToUTF8(CPToUTF16(1252, input));
|
|
}
|
|
|
|
#else
|
|
|
|
template <typename T>
|
|
static std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input)
|
|
{
|
|
std::string result;
|
|
|
|
iconv_t const conv_desc = iconv_open("UTF-8", fromcode);
|
|
if ((iconv_t)(-1) == conv_desc)
|
|
{
|
|
ERROR_LOG(COMMON, "Iconv initialization failure [%s]: %s", fromcode, strerror(errno));
|
|
iconv_close(conv_desc);
|
|
return {};
|
|
}
|
|
|
|
const size_t in_bytes = sizeof(T) * input.size();
|
|
// Multiply by 4, which is the max number of bytes to encode a codepoint
|
|
const size_t out_buffer_size = 4 * in_bytes;
|
|
|
|
std::string out_buffer;
|
|
out_buffer.resize(out_buffer_size);
|
|
|
|
auto src_buffer = &input[0];
|
|
size_t src_bytes = in_bytes;
|
|
auto dst_buffer = &out_buffer[0];
|
|
size_t dst_bytes = out_buffer.size();
|
|
|
|
while (0 != src_bytes)
|
|
{
|
|
size_t const iconv_result = iconv(conv_desc, (char**)(&src_buffer), &src_bytes,
|
|
&dst_buffer, &dst_bytes);
|
|
|
|
if (static_cast<size_t>(-1) == iconv_result)
|
|
{
|
|
if (EILSEQ == errno || EINVAL == errno)
|
|
{
|
|
// Try to skip the bad character
|
|
if (0 != src_bytes)
|
|
{
|
|
--src_bytes;
|
|
++src_buffer;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ERROR_LOG(COMMON, "iconv failure [%s]: %s", fromcode, strerror(errno));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
out_buffer.resize(out_buffer_size - dst_bytes);
|
|
out_buffer.swap(result);
|
|
|
|
iconv_close(conv_desc);
|
|
|
|
return result;
|
|
}
|
|
|
|
std::u16string UTF8ToUTF16(const std::string& input)
|
|
{
|
|
std::u16string result;
|
|
|
|
iconv_t const conv_desc = iconv_open("UTF-16", "UTF-8");
|
|
if ((iconv_t)(-1) == conv_desc)
|
|
{
|
|
ERROR_LOG(COMMON, "Iconv initialization failure [UTF-8]: %s", strerror(errno));
|
|
iconv_close(conv_desc);
|
|
return {};
|
|
}
|
|
|
|
const size_t in_bytes = sizeof(char) * input.size();
|
|
// Multiply by 4, which is the max number of bytes to encode a codepoint
|
|
const size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes;
|
|
|
|
std::u16string out_buffer;
|
|
out_buffer.resize(out_buffer_size);
|
|
|
|
char* src_buffer = const_cast<char*>(&input[0]);
|
|
size_t src_bytes = in_bytes;
|
|
char* dst_buffer = (char*)(&out_buffer[0]);
|
|
size_t dst_bytes = out_buffer.size();
|
|
|
|
while (0 != src_bytes)
|
|
{
|
|
size_t const iconv_result = iconv(conv_desc, &src_buffer, &src_bytes,
|
|
&dst_buffer, &dst_bytes);
|
|
|
|
if (static_cast<size_t>(-1) == iconv_result)
|
|
{
|
|
if (EILSEQ == errno || EINVAL == errno)
|
|
{
|
|
// Try to skip the bad character
|
|
if (0 != src_bytes)
|
|
{
|
|
--src_bytes;
|
|
++src_buffer;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ERROR_LOG(COMMON, "iconv failure [UTF-8]: %s", strerror(errno));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
out_buffer.resize(out_buffer_size - dst_bytes);
|
|
out_buffer.swap(result);
|
|
|
|
iconv_close(conv_desc);
|
|
|
|
return result;
|
|
}
|
|
|
|
std::string UTF16ToUTF8(const std::u16string& input)
|
|
{
|
|
return CodeToUTF8("UTF-16", input);
|
|
}
|
|
|
|
std::string CP1252ToUTF8(const std::string& input)
|
|
{
|
|
//return CodeToUTF8("CP1252//TRANSLIT", input);
|
|
//return CodeToUTF8("CP1252//IGNORE", input);
|
|
return CodeToUTF8("CP1252", input);
|
|
}
|
|
|
|
std::string SHIFTJISToUTF8(const std::string& input)
|
|
{
|
|
//return CodeToUTF8("CP932", input);
|
|
return CodeToUTF8("SJIS", input);
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|