Namespaces
Variants
Actions

Eom refs.pl

From Encyclopedia of Mathematics
Revision as of 13:43, 20 June 2014 by Thomas Unger (talk | contribs) (created file eom_refs.pl)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search
#!/usr/bin/perl -w

# This script operates like a filter, i.e, it reads a file from
# standard input, rewrites its content and writes the transcript to
# standard output.

# It rewrites eom reference tables into wikipedia table style. It uses
# the Ref and Cite templates.

# It replaces the reference descriptors (like [1], [2],
# etc. by the first two letters of the author names like [Ab], [Ha],
# [KuZi] etc.  and orders the tables alphabetically with respect to
# these descriptors.  Moreover, in the tables, anchors are set by
# {{Ref|Ab}} using the Ref template while, within the text, references
# {{Cite|Ab}} to these anchors are installed by the Cite template.

# Author names are retrieved by the following heuristic: 

# All those words after the ref descriptor (in brackets) are collected
# wich start with a capital followed by a small letter and ended by a
# comma (like Abel, ), and the first two letters of all these words
# are concatenated and used as new reference descriptor [Ab]. 

# In case of multiplicities of such strings the second, third, ... gets a
# '2','3', ... appended like [Ab2], [Ab3] etc.


use strict;
use utf8;
use Encode;
use  Unicode::Collate;

# get file content:
undef $/;
my $f = decode('UTF-8', readline STDIN);




$f = &transcribe($f);
$f = encode('UTF-8',$f);
#print "...............\n";
print "{{MSC|}}\n{{TEX|done}}\n\n";
print $f;
exit(0);

sub transcribe {
    my $f = $_[0];
# for unicode sorting/collating, see below
# Package libunicode-collate-perl required
    my $Collator = Unicode::Collate->new();

# collect all reference lists in array
    my @refs = ($f =~ /\=+References\=+\s+(\<table\>.*?\<\/table\>)/sg);
    if ($#refs < 0) {
	print "Error:  no refs: length of refs array: $#refs\n"; exit(1);
    }

    my %H = (); # Hash for ref keys like [1] etc.
    my %K = (); # Hash for multiplicity of ref key occurence

# Array for all initial keys:
    my @items = ($f =~ /\>\[(\w+?)\]\<.*?,/sg);
    foreach(@items) { 
	my $key = $_;
	my $y = "";
	# find names after $key and before next comma:
	if ($f =~ /\>\[$key\]\<(.*?),\s*["\']/sg) {
	    my $x = $1;
	    # remove material in [..] (alternative name spelling)
	    $x =~ s/\[.*?\]//sg;
	    # collect first two letters of names 
	    # and concat into $y, will serve as new key:
#	    my @A = ($x =~ / ([A-Z]\w)[^\.]/sg);
            # required for unicode match:
	    my @A = ($x =~ / (\p{Lu}\p{Ll})[^\.]/sg);
	    foreach(@A) { $y .= $_;	}
	}
	$K{$y}++; 
	if ($K{$y} > 1) { # append count if bigger than 1
	    $H{$key} = $y.$K{$y};
	} else {
	    $H{$key} = $y;
	}
    }

    $f =~ s/\[\[#References\|\[(\w+?)\]\]\]/{{Cite|$H{$1}}}/sg;
    $f =~ s/\|\s*\[(\w?)\]\s*\|/|{{Ref|$1}}|/sg;


    foreach(@refs) { # process all ref lists:
#    print "$_\n";
	my $x = $_;
	# do table conversion:
	my $y = &replace($x);
	foreach(keys %H) {
	    $y =~ s/\[$_\]/{{Ref|$H{$_}}}/sg;
	}
	# sort bib entries by ref keys 
	my %R = ();
	my @A = split(/\|\-/,$y);
	foreach(@A) {
	    if (/\{\{Ref\|(\w+)\}\}/) { $R{$1} = $_; }
	}
	my $z = '{|'."\n".'|-';
#	foreach(sort keys %R) { $z .= $R{$_}.'|-';  }
	foreach($Collator->sort(keys %R)) { $z .= $R{$_}.'|-';  }

	$z .= "\n\|\}\n";
	
	# do replacements:
	# prepend a backslash before each of <>"=()[]+-?|^$*\~
	$x =~ s/([\<\>\"\=\(\)\[\]\+\-\?\|\^\$\*\\\~])/\\$1/sg;
	$f =~ s/$x/$z/sg;
    }
    return $f;
}

## converts bib table from html to wiki structure
sub replace{ 

## cf. http://en.wikipedia.org/wiki/Help:Table#Pipe_syntax_tutorial
    my $f = $_[0];
    $f =~ s/\n/ /sg;
    # replace data cells (last | still to be removed!):
    $f =~ s/\<td\s(.*?)\>(.*?)\<\/td\>/|$1|$2|/sgi;
    # replace row (consider removal of last | from above):
    $f =~ s/\<tr\>(.*?)[\s\|]*\<\/tr\>\s*/|\-\n$1\n/sgi;
    # no spaces between consecutive |:
    $f =~s/\|\s*\|/||/sgi;
    # replace <table> and </table>:
    $f =~ s/\<table\>/\{|\n/sgi;
    $f =~ s/\<\/table\>/|\-\n|\}/sgi;
    # remove possible | before template calls:
    $f =~ s/\|\s*\{\{/\{\{/sg;
    # remove space before some punctuation:
    $f =~ s/ +([\,\.])/$1/sg;
    return $f;
}
How to Cite This Entry:
Eom refs.pl. Encyclopedia of Mathematics. URL: http://encyclopediaofmath.org/index.php?title=Eom_refs.pl&oldid=32264