tips:zwiki2dokuwiki
 Table of Contents
Zwiki2DokuWiki
Zwiki2Dokuwiki is a perl script to translate HTML pages from Zwiki to DokuWiki 
Zwiki2DokuWiki Home Page
Extract HTML Page
The HTML pages were extract with w3mir
Launch Zwiki2DokuWiki
#!/usr/bin/perl #/*********************************************************************** # Copyright (C) 2006 foxmask (foxmask at gmail dot com) http://foxmask.info # version 0.2 # Zwiki2Dokuwiki is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published # by the Free Software Foundation; either version 2 of the License, # or (at your option) any later version. # Zwiki2DokuWiki is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, # MA 02111-1307 USA # #************************************************************************/ # This script will # 1 - read a given directory # 2 - copy each file foobar to foobar.tmp # 3 - eopn each foobar.tmp file and extract the "wiki content" # 4 - rebuild a clean html page # 5 - "translate" the html page to dokuwiki code use strict; # Package to use File use File::Find; use File::Basename; use File::Copy; use File::Path; # Package to use Wiki use HTML::WikiConverter; use HTML::WikiConverter::DokuWiki; # Package to read HTML file as a Tree use HTML::TreeBuilder; # Package to estimate the elapsed time of the execution of current script use Date::Calc qw (Today_and_Now Delta_YMDHMS Add_Delta_YMDHMS Delta_DHMS Date_to_Text); my $wc = new HTML::WikiConverter( encoding => 'utf8', dialect => 'DokuWiki', base_uri => 'http://wiki.intranet/' , wiki_uri => [ 'http://wiki.intranet/' ] ); #Starting Date my ($y1,$m1,$d1,$h1,$mi1,$s1)= Today_and_Now(); if ($#ARGV != 1) { exit (1); } # globals for callback our ($src_dir, $dst_dir); ($src_dir, $dst_dir) = @ARGV; # $src_dir = $ENV{HOME}.'/wiki.intranet.fimasys.fr'; # for test purpoose #$src_dir = $ENV{HOME}.'/mirror'; #$dst_dir = $ENV{HOME}.'/dokuwiki/data/pages'; # print "src_dir : $src_dir \ndst dir : $dst_dir\n"; # if the directory does not exists we exit if (! -e $src_dir ) { exit (1); } # build the final directory mkpath([ $dst_dir ]); # Startup on Zwiki2Dokuwiki find (\&zwiki2dokuwiki,$src_dir); #Elapsed Time my ($y2,$m2,$d2,$h2,$mi2,$s2)=Today_and_Now(); my ($Dd,$Dh,$Dm,$Ds) = Delta_DHMS($y1,$m1,$d1, $h1,$mi1,$s1, $y2,$m2,$d2, $h2,$mi2,$s2); print "Elapse time: $Dd day(s) $Dh hre(s) $Dm mn $Ds s\n"; # recursive sub to read each files and subdirectory sub zwiki2dokuwiki { my $src = $File::Find::name; # relative path from $src_dir my $relative_pathname = $src; $relative_pathname =~ s !\Q$src_dir!!; # destination my $dst = lc "$dst_dir/$relative_pathname"; $dst =~ s/\.html$//; #if the current "file" ( $_ )is a directory, we make if ( -d ) { mkpath([ $dst ]); # if its not a directory # we read the file } else { # we do not read file with . in the name ; just copy them if ( $src =~ /\./ ) { if (! copy($src, $dst)) { warn ("Can't copy file $src to $dst: $!\n"); } } # if not . else { my $tmp = $src . ".tmp"; #we will extract only the "body" identified by <div class="content"> #the rest (header and footer) are Zwiki specific #that is why we just extract this div and just this one cleanup( $tmp, $src ) ; if (! -e $dst."txt" ) { if (! copy($tmp, $dst)) { warn ("Can't copy file $tmp to $dst: $!\n"); } else { print "Processing ",$dst,".txt\n"; open FILE, ">$dst.txt"; print FILE $wc->html2wiki( file => $dst ); close FILE; unlink $dst; unlink $tmp; } } } } } sub cleanup { # this sub will read a html file # extract the div class=content # and create a clean html page with UTF 8 header # get the parm given to cleanup ( $tmp , $src) ; my ($filetmp,$filesrc) = @_; #read the source file my $tree = HTML::TreeBuilder->new(); $tree->parse_file($filesrc); my $root = $tree->find_by_tag_name('html'); #make the temp file open SRC, ">$filetmp"; print SRC "<html>\n"; print SRC "\t<head>\n"; print SRC "\t\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"; print SRC "\t</head>\n"; #read the body content foreach my $body ($root->find_by_tag_name('body')) { print SRC "\t<body>\n"; # search the div foreach my $divs ($body->find_by_tag_name('div')) { #isolate the <div class="content"> foreach my $div ($divs->find_by_attribute('class','content')) { #if we find a form in the div .... foreach my $forms ($div->find_by_tag_name('form')) { #... we remove it $forms->detach; } foreach my $uls ($div->find_by_tag_name('ul') ) { foreach my $lis ($uls->find_by_tag_name('li') ) { foreach my $hrefs ($lis->find_by_tag_name('a') ) { my $links = $hrefs->attr_get_i('href'); my ($content) = $hrefs->content_list(); if ( $links !~ /^(http|ftp|file)/) { # if the link contain things like # <a href="Admin/FrontPage">AdminWiki:FrontPage</a> # we replace the / (slash) by : (semi column) # thus Dokuwiki will "understand" the content of the href. $links =~ s/\//:/g; # ~literal is the only way to 'rebuld' a clean link from scratch # because as_HTML encode < and > to > $lt; my $new_content = HTML::Element->new('~literal', 'text' => '<a href="'.$links.'">'.$content.'</a>' ); #we replace the content no well formed for dokuwiki #by this new one $hrefs->replace_with($new_content); } } } } # we add the rest of the file to the temp file print SRC $div->as_HTML(); } } # end of body print SRC "\t</body>\n"; } # end of the html page and close the tmp file print SRC "</html>"; close SRC; }
Installation in Dokuwiki
copy of the working directory to a Dokuwiki installation (which can be done in step 2 if you give the final directory on the command lines
tips/zwiki2dokuwiki.txt · Last modified:  by 84.100.218.121