|
作者:林灿斌 |
积分:48 |
手机:诺基亚 5130XM |
注册:2009-07-19 |
|
<%@CodePage=65001 Language="VBScript"%>
<%Server.ScriptTimeOut=9999999
dim binFileData
i=0
Set XMLDOM=Server.CreateObject("Microsoft.XMLDOM")
XMLDOM.load Server.MapPath("wiki/zhwikiquote-20090820-pages-articles.xml")'要转换的文件的目录和文件名
Set XMLRoot=XMLDOM.documentElement
TotalCount=XMLRoot.childNodes.length-1
For NodeIndex=0 To TotalCount'循环所有page标签
On Error Resume Next
Set ParentNode=XMLRoot.childNodes(NodeIndex)
Title=HTMLEncode(ParentNode.childNodes(0).text)
ChildNode=HTMLEncode(ParentNode.childNodes(2).childNodes(4).text)
if instr(Title,":")=0 and Title<>"" and ChildNode<>"" then
binFileData=binFileData&Title&" "&ChildNode
if NodeIndexi=i+1
if i>100 then
SaveToFile binFileData,"zhwikiquote.txt"'转换后的文件的目录和文件名
i=0
binFileData=""
end if
end if
Title=""
ChildNode=""
Next
Set ParentNode=nothing
Set XMLRoot=nothing
Set XMLDOM=nothing
Sub SaveToFile(ByVal strBody,ByVal File)
Dim objStream
On Error Resume Next
Set objStream=Server.CreateObject("ADODB.Stream")
With objStream
.Type=2
.Open
.LoadFromFile Server.MapPath(File)
.Charset="Unicode"
.Position=objStream.Size
.WriteText=.ReadText&strBody
.SaveToFile Server.MapPath(File),2
.Close
End With
Set objStream=Nothing
End Sub
function HTMLEncode(fString)
fString=Replace(fString,"'''","")'“'''”
fString=Replace(fString,CHR(34),"")'“"”
fString=Replace(fString,CHR(39),"")'“'”
fString=Replace(fString,"-","-")'“-”
fString=Replace(fString,CHR(92),"\")'“”
fString=Replace(fString,CHR(10)," ")
fString=ReplaceText(fString,"{{.[^>]*}}","")
HTMLEncode=fString
end function
Function ReplaceText(fString,patrn,replStr)
Set regEx=New RegExp
regEx.Pattern=patrn
regEx.IgnoreCase=True
regEx.Global=True
ReplaceText=regEx.Replace(""&fString&"",""&replStr&"")
Set reg=nothing
End Function
%>
维基XML To TXT文件转换器
<%=TotalCount%>条数据转换完成,耗时<%=fix((timer()-startime)*1000)%>毫秒
文件格式:
词条名称+Tab制表符+词条内容
|
|