Difference between revisions of "Eom refs.pl"
From Encyclopedia of Mathematics
Thomas Unger (talk | contribs) (created file eom_refs.pl) |
Ulf Rehmann (talk | contribs) m (change of diagnostic message) |
||
Line 6: | Line 6: | ||
# standard output. | # standard output. | ||
− | # It rewrites eom reference tables into wikipedia table style. It uses | + | # It rewrites old eom reference tables into wikipedia table style. It uses |
# the Ref and Cite templates. | # the Ref and Cite templates. | ||
Line 52: | Line 52: | ||
my $Collator = Unicode::Collate->new(); | my $Collator = Unicode::Collate->new(); | ||
− | # collect all reference lists in array | + | # collect all reference lists in array |
− | my @refs = ($f =~ /\=+References\=+\s+(\<table\>.*?\<\/table\>)/sg); | + | my @refs = ($f =~ /\=+References\=+\s+(\<table\>.*?\<\/table\>)/sg); |
− | if ($#refs < 0) { | + | if ($#refs < 0) { |
− | + | my $error = <<"END_ERR"; | |
+ | Error: no refs: length of refs array: $#refs. | ||
+ | |||
+ | There is no References section in the old EoM style, using the format | ||
+ | between the html tags <table> ... </table>. | ||
+ | |||
+ | Reasons for this could be that the references have been already | ||
+ | processed into new (Mediawiki) style, or that they are just scrambled, | ||
+ | or that there are no references at all in the article. | ||
+ | |||
+ | Please check or contact one of the editors. | ||
+ | END_ERR | ||
+ | |||
+ | print $error; | ||
+ | exit(1); | ||
} | } | ||
Latest revision as of 13:10, 12 November 2014
#!/usr/bin/perl -w # This script operates like a filter, i.e, it reads a file from # standard input, rewrites its content and writes the transcript to # standard output. # It rewrites old eom reference tables into wikipedia table style. It uses # the Ref and Cite templates. # It replaces the reference descriptors (like [1], [2], # etc. by the first two letters of the author names like [Ab], [Ha], # [KuZi] etc. and orders the tables alphabetically with respect to # these descriptors. Moreover, in the tables, anchors are set by # {{Ref|Ab}} using the Ref template while, within the text, references # {{Cite|Ab}} to these anchors are installed by the Cite template. # Author names are retrieved by the following heuristic: # All those words after the ref descriptor (in brackets) are collected # wich start with a capital followed by a small letter and ended by a # comma (like Abel, ), and the first two letters of all these words # are concatenated and used as new reference descriptor [Ab]. # In case of multiplicities of such strings the second, third, ... gets a # '2','3', ... appended like [Ab2], [Ab3] etc. use strict; use utf8; use Encode; use Unicode::Collate; # get file content: undef $/; my $f = decode('UTF-8', readline STDIN); $f = &transcribe($f); $f = encode('UTF-8',$f); #print "...............\n"; print "{{MSC|}}\n{{TEX|done}}\n\n"; print $f; exit(0); sub transcribe { my $f = $_[0]; # for unicode sorting/collating, see below # Package libunicode-collate-perl required my $Collator = Unicode::Collate->new(); # collect all reference lists in array my @refs = ($f =~ /\=+References\=+\s+(\<table\>.*?\<\/table\>)/sg); if ($#refs < 0) { my $error = <<"END_ERR"; Error: no refs: length of refs array: $#refs. There is no References section in the old EoM style, using the format between the html tags <table> ... </table>. Reasons for this could be that the references have been already processed into new (Mediawiki) style, or that they are just scrambled, or that there are no references at all in the article. Please check or contact one of the editors. END_ERR print $error; exit(1); } my %H = (); # Hash for ref keys like [1] etc. my %K = (); # Hash for multiplicity of ref key occurence # Array for all initial keys: my @items = ($f =~ /\>\[(\w+?)\]\<.*?,/sg); foreach(@items) { my $key = $_; my $y = ""; # find names after $key and before next comma: if ($f =~ /\>\[$key\]\<(.*?),\s*["\']/sg) { my $x = $1; # remove material in [..] (alternative name spelling) $x =~ s/\[.*?\]//sg; # collect first two letters of names # and concat into $y, will serve as new key: # my @A = ($x =~ / ([A-Z]\w)[^\.]/sg); # required for unicode match: my @A = ($x =~ / (\p{Lu}\p{Ll})[^\.]/sg); foreach(@A) { $y .= $_; } } $K{$y}++; if ($K{$y} > 1) { # append count if bigger than 1 $H{$key} = $y.$K{$y}; } else { $H{$key} = $y; } } $f =~ s/\[\[#References\|\[(\w+?)\]\]\]/{{Cite|$H{$1}}}/sg; $f =~ s/\|\s*\[(\w?)\]\s*\|/|{{Ref|$1}}|/sg; foreach(@refs) { # process all ref lists: # print "$_\n"; my $x = $_; # do table conversion: my $y = &replace($x); foreach(keys %H) { $y =~ s/\[$_\]/{{Ref|$H{$_}}}/sg; } # sort bib entries by ref keys my %R = (); my @A = split(/\|\-/,$y); foreach(@A) { if (/\{\{Ref\|(\w+)\}\}/) { $R{$1} = $_; } } my $z = '{|'."\n".'|-'; # foreach(sort keys %R) { $z .= $R{$_}.'|-'; } foreach($Collator->sort(keys %R)) { $z .= $R{$_}.'|-'; } $z .= "\n\|\}\n"; # do replacements: # prepend a backslash before each of <>"=()[]+-?|^$*\~ $x =~ s/([\<\>\"\=\(\)\[\]\+\-\?\|\^\$\*\\\~])/\\$1/sg; $f =~ s/$x/$z/sg; } return $f; } ## converts bib table from html to wiki structure sub replace{ ## cf. http://en.wikipedia.org/wiki/Help:Table#Pipe_syntax_tutorial my $f = $_[0]; $f =~ s/\n/ /sg; # replace data cells (last | still to be removed!): $f =~ s/\<td\s(.*?)\>(.*?)\<\/td\>/|$1|$2|/sgi; # replace row (consider removal of last | from above): $f =~ s/\<tr\>(.*?)[\s\|]*\<\/tr\>\s*/|\-\n$1\n/sgi; # no spaces between consecutive |: $f =~s/\|\s*\|/||/sgi; # replace <table> and </table>: $f =~ s/\<table\>/\{|\n/sgi; $f =~ s/\<\/table\>/|\-\n|\}/sgi; # remove possible | before template calls: $f =~ s/\|\s*\{\{/\{\{/sg; # remove space before some punctuation: $f =~ s/ +([\,\.])/$1/sg; return $f; }
How to Cite This Entry:
Eom refs.pl. Encyclopedia of Mathematics. URL: http://encyclopediaofmath.org/index.php?title=Eom_refs.pl&oldid=32264
Eom refs.pl. Encyclopedia of Mathematics. URL: http://encyclopediaofmath.org/index.php?title=Eom_refs.pl&oldid=32264