Eom refs.pl
From Encyclopedia of Mathematics
#!/usr/bin/perl -w
# This script operates like a filter, i.e, it reads a file from
# standard input, rewrites its content and writes the transcript to
# standard output.
# It rewrites eom reference tables into wikipedia table style. It uses
# the Ref and Cite templates.
# It replaces the reference descriptors (like [1], [2],
# etc. by the first two letters of the author names like [Ab], [Ha],
# [KuZi] etc. and orders the tables alphabetically with respect to
# these descriptors. Moreover, in the tables, anchors are set by
# {{Ref|Ab}} using the Ref template while, within the text, references
# {{Cite|Ab}} to these anchors are installed by the Cite template.
# Author names are retrieved by the following heuristic:
# All those words after the ref descriptor (in brackets) are collected
# wich start with a capital followed by a small letter and ended by a
# comma (like Abel, ), and the first two letters of all these words
# are concatenated and used as new reference descriptor [Ab].
# In case of multiplicities of such strings the second, third, ... gets a
# '2','3', ... appended like [Ab2], [Ab3] etc.
use strict;
use utf8;
use Encode;
use Unicode::Collate;
# get file content:
undef $/;
my $f = decode('UTF-8', readline STDIN);
$f = &transcribe($f);
$f = encode('UTF-8',$f);
#print "...............\n";
print "{{MSC|}}\n{{TEX|done}}\n\n";
print $f;
exit(0);
sub transcribe {
my $f = $_[0];
# for unicode sorting/collating, see below
# Package libunicode-collate-perl required
my $Collator = Unicode::Collate->new();
# collect all reference lists in array
my @refs = ($f =~ /\=+References\=+\s+(\<table\>.*?\<\/table\>)/sg);
if ($#refs < 0) {
print "Error: no refs: length of refs array: $#refs\n"; exit(1);
}
my %H = (); # Hash for ref keys like [1] etc.
my %K = (); # Hash for multiplicity of ref key occurence
# Array for all initial keys:
my @items = ($f =~ /\>\[(\w+?)\]\<.*?,/sg);
foreach(@items) {
my $key = $_;
my $y = "";
# find names after $key and before next comma:
if ($f =~ /\>\[$key\]\<(.*?),\s*["\']/sg) {
my $x = $1;
# remove material in [..] (alternative name spelling)
$x =~ s/\[.*?\]//sg;
# collect first two letters of names
# and concat into $y, will serve as new key:
# my @A = ($x =~ / ([A-Z]\w)[^\.]/sg);
# required for unicode match:
my @A = ($x =~ / (\p{Lu}\p{Ll})[^\.]/sg);
foreach(@A) { $y .= $_; }
}
$K{$y}++;
if ($K{$y} > 1) { # append count if bigger than 1
$H{$key} = $y.$K{$y};
} else {
$H{$key} = $y;
}
}
$f =~ s/\[\[#References\|\[(\w+?)\]\]\]/{{Cite|$H{$1}}}/sg;
$f =~ s/\|\s*\[(\w?)\]\s*\|/|{{Ref|$1}}|/sg;
foreach(@refs) { # process all ref lists:
# print "$_\n";
my $x = $_;
# do table conversion:
my $y = &replace($x);
foreach(keys %H) {
$y =~ s/\[$_\]/{{Ref|$H{$_}}}/sg;
}
# sort bib entries by ref keys
my %R = ();
my @A = split(/\|\-/,$y);
foreach(@A) {
if (/\{\{Ref\|(\w+)\}\}/) { $R{$1} = $_; }
}
my $z = '{|'."\n".'|-';
# foreach(sort keys %R) { $z .= $R{$_}.'|-'; }
foreach($Collator->sort(keys %R)) { $z .= $R{$_}.'|-'; }
$z .= "\n\|\}\n";
# do replacements:
# prepend a backslash before each of <>"=()[]+-?|^$*\~
$x =~ s/([\<\>\"\=\(\)\[\]\+\-\?\|\^\$\*\\\~])/\\$1/sg;
$f =~ s/$x/$z/sg;
}
return $f;
}
## converts bib table from html to wiki structure
sub replace{
## cf. http://en.wikipedia.org/wiki/Help:Table#Pipe_syntax_tutorial
my $f = $_[0];
$f =~ s/\n/ /sg;
# replace data cells (last | still to be removed!):
$f =~ s/\<td\s(.*?)\>(.*?)\<\/td\>/|$1|$2|/sgi;
# replace row (consider removal of last | from above):
$f =~ s/\<tr\>(.*?)[\s\|]*\<\/tr\>\s*/|\-\n$1\n/sgi;
# no spaces between consecutive |:
$f =~s/\|\s*\|/||/sgi;
# replace <table> and </table>:
$f =~ s/\<table\>/\{|\n/sgi;
$f =~ s/\<\/table\>/|\-\n|\}/sgi;
# remove possible | before template calls:
$f =~ s/\|\s*\{\{/\{\{/sg;
# remove space before some punctuation:
$f =~ s/ +([\,\.])/$1/sg;
return $f;
}
How to Cite This Entry:
Eom refs.pl. Encyclopedia of Mathematics. URL: http://encyclopediaofmath.org/index.php?title=Eom_refs.pl&oldid=34473
Eom refs.pl. Encyclopedia of Mathematics. URL: http://encyclopediaofmath.org/index.php?title=Eom_refs.pl&oldid=34473