為了一個gb2312下post中文參數(shù)到utf-8頁面的程序,隨悶的難受,查了一下午資料,大部分是講Gb2312傳到Gb2312頁面的UrlEncode,沒有提供到Utf-8頁面的UrlEncode,后來找到Unicode轉(zhuǎn)Utf-8碼的資料,參考之下,終于寫出了Utf-8的UrlEncode,這里整理下各種UrlEncode方法,供有需要的朋友參考。
詳細Utf-8編碼規(guī)則請百度一下。
Unicode 與 Utf-8碼間的內(nèi)碼規(guī)則模板為:
原始碼(16進制) UTF-8編碼(二進制)
--------------------------------------------
0000 - 007F 0xxxxxxx
0080 - 07FF 110xxxxx 10xxxxxx
0800 - FFFF 1110xxxx 10xxxxxx 10xxxxxx (中文字在此區(qū)間)
……
--------------------------------------------
例如:
百度中查詢“中國人”,會將中文URL參數(shù)轉(zhuǎn)為Gb2312碼的16進制表示,一個中文字用2個字節(jié)
http://www.baidu.com/s?wd=%D6%D0%B9%FA%C8%CB
Google中查詢“中國人”,會將中文URL參數(shù)轉(zhuǎn)為Utf-8編碼的16進制表示,一個中文字用3個字節(jié)
http://www.google.cn/search?client=opera&rls=en&q=%E4%B8%AD%E5%9B%BD%E4%BA%BA&sourceid=opera&ie=utf-8&oe=utf-8
'Url編碼,Gb2312頁面之間傳遞參數(shù)
Function URLEncode_Gb(ByVal str)
Dim i,s
Dim B,bCode,gb,Hight8b,Low8b
s = ""
For i = 1 To Len(str)
B = Mid(str,i,1)
bCode=Abs(Asc(B))
If (bCode>=48 And bCode<=57) Or (bCode>=65 And bCode<=90) Or (bCode>=97 And bCode<=122) Or bCode=42 Or bCode=45 Or bCode=46 Or bCode=64 Or bCode=95 Then
'48 to 57代表0~9;65 to 90代表A~Z;97 to 122代表a~z
'42代表*;46代表.;64代表@;45代表-;95代表_
s=s & B
ElseIf bCode=32 Then '空格轉(zhuǎn)成+
s=s & "+"
ElseIf bCode<128 Then '低于128的Ascii轉(zhuǎn)成1個字節(jié)
s=s & "%" & Right("00" & Hex(bCode),2)
Else
gb = Asc(B)
If gb < 0 Then
gb = gb + &H10000 'gb編碼為負數(shù),要加上65536
End If
Hight8b = (gb And &HFF00) / &H100 '二進制高8位
Low8b = gb And &HFF '二進制低8位
s = s & "%" & Hex(Hight8b) & "%" & Hex(Low8b)
End If
Next
URLEncode_Gb = s
End Function
'Url解碼,Gb2312頁面之間傳遞參數(shù)
Function URLDecode_Gb(ByVal str)
Dim i,s
Dim B,bCode,gb,Hight8b,Low8b
s = ""
For i = 1 To Len(str)
B = Mid(str,i,1)
Select Case B
Case "+"
s=s & " "
Case "%"
gb=Mid(str,i+1,2)
bCode=CInt("&H" & gb)
If bCode<128 Then
i=i+2
Else
bCode=CInt("&H" & gb & Mid(str,i+4,2))
i=i+5
End If
s=s & Chr(bCode)
Case Else
s=s & B
End Select
Next
URLDecode_Gb = s
End Function
'URL編碼,Gb2312頁面提交到Utf-8頁面
Function UrlEncode_GBToUtf8(ByVal str)
Dim B '單個字符
Dim ub '中文字的Unicode碼(2字節(jié))
Dim High8b, Low8b 'Unicode碼的高低位字節(jié)
Dim UtfB1, UtfB2, UtfB3 'Utf-8碼的三個字節(jié)
Dim i, s
For i = 1 To Len(str)
B=Mid(str, i, 1)
ub = AscW(B)
If (ub>=48 And ub<=57) Or (ub>=65 And ub<=90) Or (ub>=97 And ub<=122) Or ub=42 Or ub=45 Or ub=46 Or ub=64 Or ub=95 Then
'48 to 57代表0~9;65 to 90代表A~Z;97 to 122代表a~z
'42代表*;46代表.;64代表@;45代表-;95代表_
s=s & B
ElseIf ub=32 Then '空格轉(zhuǎn)成+
s=s & "+"
ElseIf ub<128 Then '低于128的Ascii轉(zhuǎn)成1個字節(jié)
s=s & "%" & Right("00" & Hex(ub),2)
Else
High8b = (ub And &HFF00) / &H100 'Unicode碼高位
Low8b = ub And &HFF 'Unicode碼低位
UtfB1 = (High8b And &HF0) / &H10 Or &HE0 '取Unicode高位字節(jié)的二進制的前4位 + 11100000
UtfB2 = ((High8b And &HF) * &H4 + (Low8b And &HC0) / &H40) Or &H80 '取Unicode高位字節(jié)的后4位及低位字節(jié)的前2位 +10000000
UtfB3 = (Low8b And &H3F) Or &H80 '取Unicode低位字節(jié)的二進制后6位 + 10000000
s = s & "%" & Hex(UtfB1) & "%" & Hex(UtfB2) & "%" & Hex(UtfB3)
End If
Next
UrlEncode_GBToUtf8 = s
End Function
'“漢”-AscW("漢")=27721(十進制) 01101100 01001001(二進制) 6C49(十六進制)
'將Gb2312碼轉(zhuǎn)成Utf-8碼(十六進制表示)的方法為,先用AscW將Gb2312轉(zhuǎn)為Unicode碼(2字節(jié)),再'將Unicode碼的二進制中的位按utf-8(3字節(jié))模板規(guī)則填充 x 位:
'URL解碼,Gb2312頁面提交到Utf-8頁面
Function UrlDecode_GBToUtf8(ByVal str)
Dim B,ub '中文字的Unicode碼(2字節(jié))
Dim UtfB 'Utf-8單個字節(jié)
Dim UtfB1, UtfB2, UtfB3 'Utf-8碼的三個字節(jié)
Dim i, n, s
n=0
ub=0
For i = 1 To Len(str)
B=Mid(str, i, 1)
Select Case B
Case "+"
s=s & " "
Case "%"
ub=Mid(str, i + 1, 2)
UtfB = CInt("&H" & ub)
If UtfB<128 Then
i=i+2
s=s & ChrW(UtfB)
Else
UtfB1=(UtfB And &H0F) * &H1000 '取第1個Utf-8字節(jié)的二進制后4位
UtfB2=(CInt("&H" & Mid(str, i + 4, 2)) And &H3F) * &H40 '取第2個Utf-8字節(jié)的二進制后6位
UtfB3=CInt("&H" & Mid(str, i + 7, 2)) And &H3F '取第3個Utf-8字節(jié)的二進制后6位
s=s & ChrW(UtfB1 Or UtfB2 Or UtfB3)
i=i+8
End If
Case Else 'Ascii碼
s=s & B
End Select
Next
UrlDecode_GBToUtf8 = s
End Function
'URL編碼,Gb2312頁面提交到Utf-8頁面,另一種位計算方法
Private Function UrlEncode_GBToUtf8_V2(szInput)
Dim wch, uch, szRet
Dim x
Dim nAsc, nAsc2, nAsc3
If szInput = "" Then
UrlEncode_GBToUtf8_V2= szInput
Exit Function
End If
For x = 1 To Len(szInput)
wch = Mid(szInput, x, 1)
nAsc = AscW(wch)
If nAsc < 0 Then nAsc = nAsc + 65536
If (nAsc And &HFF80) = 0 Then
szRet = szRet & wch
Else
If (nAsc And &HF000) = 0 Then
uch = "%" & Hex(((nAsc 2 ^ 6)) Or &HC0) & Hex(nAsc And &H3F Or &H80)
szRet = szRet & uch
Else
uch = "%" & Hex((nAsc 2 ^ 12) Or &HE0) & "%" & _
Hex((nAsc 2 ^ 6) And &H3F Or &H80) & "%" & _
Hex(nAsc And &H3F Or &H80)
szRet = szRet & uch
End If
End If
Next
UrlEncode_GBToUtf8_V2= szRet
End Function
'VB下用API方法的Unicode轉(zhuǎn)Utf-8方法:
Private Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As String, ByVal lpUsedDefaultChar As Long) As Long
Private Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpMultiByteStr As Long, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Private Const CP_UTF8 = 65001
Function Utf8ToUnicode(ByRef Utf() As Byte) As String
Dim lRet As Long
Dim lLength As Long
Dim lBufferSize As Long
lLength = UBound(Utf) - LBound(Utf) + 1
If lLength <= 0 Then Exit Function
lBufferSize = lLength * 2
Utf8ToUnicode = String$(lBufferSize, Chr(0))
lRet = MultiByteToWideChar(CP_UTF8, 0, VarPtr(Utf(0)), lLength, StrPtr(Utf8ToUnicode), lBufferSize)
If lRet <> 0 Then
Utf8ToUnicode = Left(Utf8ToUnicode, lRet)
End If
End Function
Function UnicodeToUtf8(ByVal UCS As String) As Byte()
Dim lLength As Long
Dim lBufferSize As Long
Dim lResult As Long
Dim abUTF8() As Byte
lLength = Len(UCS)
If lLength = 0 Then Exit Function
lBufferSize = lLength * 3 + 1
ReDim abUTF8(lBufferSize - 1)
lResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(UCS), lLength, abUTF8(0), lBufferSize, vbNullString, 0)
If lResult <> 0 Then
lResult = lResult - 1
ReDim Preserve abUTF8(lResult)
UnicodeToUtf8 = abUTF8
End If
End Function