ro-webgl/Assets/Src/Utils/UnicodeUtil.cs

104 lines
3.1 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System.Text.RegularExpressions;
using System;
using System.Text;
public class UnicodeUtil {
public static string Convert(string unicodeString) {
if (string.IsNullOrEmpty(unicodeString))
return string.Empty;
string outStr = unicodeString;
Regex re = new Regex("\\\\u[0123456789abcdef]{4}", RegexOptions.IgnoreCase);
MatchCollection mc = re.Matches(unicodeString);
foreach (Match ma in mc) {
outStr = outStr.Replace(ma.Value, ConverUnicodeStringToChar(ma.Value).ToString());
}
return outStr;
}
private static char ConverUnicodeStringToChar(string str) {
char outStr = Char.MinValue;
outStr = (char)int.Parse(str.Remove(0, 2), System.Globalization.NumberStyles.HexNumber);
return outStr;
}
// UTF-8 BOM 字节序列
private static readonly byte[] Utf8BOM = new byte[] { 0xEF, 0xBB, 0xBF };
// 判断字节数组是否包含 UTF-8 BOM
public static bool HasUtf8BOM(byte[] bytes)
{
if (bytes.Length < 3)
{
return false; // 字节数组长度小于 3 不可能包含 UTF-8 BOM
}
// 检查前 3 个字节是否匹配 UTF-8 BOM
for (int i = 0; i < 3; i++)
{
if (bytes[i] != Utf8BOM[i])
{
return false;
}
}
return true;
}
// 删除字节数组中的 UTF-8 BOM
public static byte[] RemoveUtf8BOM(byte[] bytes)
{
if (HasUtf8BOM(bytes))
{
// 删除 BOM返回去掉前 3 个字节的新字节数组
byte[] result = new byte[bytes.Length - 3];
Array.Copy(bytes, 3, result, 0, result.Length);
return result;
}
// 如果没有 BOM直接返回原始字节数组
return bytes;
}
// 将带有 BOM 的字节数组转换为不带 BOM 的字节数组
public static byte[] ConvertToNonBOM(byte[] bytes)
{
return RemoveUtf8BOM(bytes);
}
private static bool AreArraysEqual(byte[] arr1, byte[] arr2)
{
if (arr1.Length != arr2.Length)
return false;
for (int i = 0; i < arr1.Length; i++)
{
if (arr1[i] != arr2[i])
return false;
}
return true;
}
public static string GetEncoding(byte[] bytes)
{
Encoding[] encodings = { Encoding.UTF8, Encoding.ASCII, Encoding.Unicode, Encoding.BigEndianUnicode, Encoding.GetEncoding("ISO-8859-1") };
foreach (var encoding in encodings)
{
try
{
string result = encoding.GetString(bytes);
byte[] testBytes = encoding.GetBytes(result);
if (AreArraysEqual(testBytes, bytes))
{
return encoding.EncodingName;
}
}
catch
{
// Ignore errors and continue to try other encodings
}
}
return "Unknown Encoding";
}
}