#! /usr/local/bin/perl
#
# xmp-name2key.pl - assign a disambiguation and check digit suffix to a
#	name or pseudoname for the Brown University Women Writers Project.
#	This is an extract from the completel internal use version, which
#	includes some project-specific portions, particularly writing keys
#	to a file.
#
# Copyright 1997 by Syd Bauman and Brown University.
#   This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#   This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# merchantability or fitness for a particular purpose. See the GNU
# General Public License for more details.
#   You should have received a copy of the GNU General Public
# License along with this file; if not, write to the
#   Free Software Foundation, Inc.
#   675 Mass Ave
#   Cambridge, MA  02139
#   USA
#   gnu@prep.ai.mit.edu.
#
#
#
# Intro
# -----
# This routine collects names from the user, and writes out a valid
# key to STDOUT. We presume that all names can be written in the
# modern English alphabet of 26 letters.
#
#
# Basic Idea
# ----- ----
# We regularize the user's input to a "pseudoname" (A "pseudoname" is
# composed of the first letter of the first name followed by up to 8
# letters of the last name. All hyphens, apostrophes, and accents are
# dropped.), and then append two randomly generated letters in order
# to be able to disambiguate collisions (e.g., both Lois Lane and her
# sister Lucy have "LLane" as their pseudoname). Then a checksum
# letter is calculated and appended. A period is placed between the
# pseudoname and the disambiguation & checksum characters for
# readability.
#
#
# The Gory Details
# --- ---- -------
# 
# 
# 
# Credits
# -------
# Original concept: John Lavagnino
# Details: WWP encoding staff
# Revised concept: Syd Bauman
# Algorithm: Norman Bauman
# Coding: Syd Bauman
# 
#
# revision hx:
# -------- --
# 1997-05-29 by Syd Bauman: created, based on WWP's "name2key.pl"
# 	program version 3.5. Note that this program is intended to run
# 	under either unix Perl5 or MacPerl5; the original "name2key.pl"
# 	ran only in MacPerl.
#

if ($MacPerl'Version) {
	require "StandardFile.pl";
	$dirsep = ":";
	$namelen = 32;
	$endmsg = "To end you must quit MacPerl (cmd-Q).";
	} # mac-specific
else {
	$dirsep = "/";
	$namelen = 78;	# chosen because it's longest length that won't wrap a ls -l on a 132 column screen
	$endmsg = "(To end enter ctl-C.)";
	} # unix-specific

#
# initalize "constants"
#
$alphaplus = "abcdefghijklmnopqrstuvwxyz*";
( $mypath, $myname ) = $0 =~ m/^(.*$dirsep)?([^$dirsep]+)$/;
$debug = substr("$myname", 0, $namelen-5 );
$debug .= ".debug";

#
# open debugging file
#
open( DEBUG, ">$debug") || warn "WARNING: couldn't open debug file '$debug'.\n";


while( TRUE ) {
	#
	# We're going to loop forever: if user wants to get out,
	# she is going to have to quit MacPerl (Mac) or enter
	# ctl-C (unix). There must be a better way to do this.
	#
	print STDOUT "\n$endmsg";

	#
	# ask user to enter the regularized name
	#
	print STDOUT "\nEnter regularized name, w/o accents (for example 'Flanders, Julia H.'):\n>";
	$entered_name = <STDIN>;
	
	#
	# nuke the terminating newline
	#
	chop $entered_name;
	
	#
	# If there's a comma, presume format is "Surname, Firstname otherpartsofname";
	# if no comma, presume format is "Name", as in "God" or "Jesus" or "Zeus".
	# In either case, put together the parts of the inital portion of the key.
	#
	if ( $entered_name =~ m/,/ ) {
		( $surname, $first_initial ) = $entered_name =~ m/^([^,]+), *([A-Za-z])/;
		$surname =~ tr/a-zA-Z//cd;
		$first_key_part = pack("A1A8", "$first_initial", $surname );
		}
	else {
		$first_key_part = $wholename = $entered_name;
		$first_key_part =~ tr/a-zA-Z//cd;
		$first_key_part = pack("A9", $first_key_part );
		}
	#
	# Now we have the first key part padded with blanks.
	# Translate the blanks to asterisk, and convert to
	# lower case.
	#
	$first_key_part =~ tr/A-Z /a-z*/;
	
	TWORAND: {
		#
		# "pnamed" stands for processing name w/ disambiguation
		#
		$pnamed = $first_key_part . substr( $alphaplus, int( rand(26) ), 1 ) . substr( $alphaplus, int( rand(26) ), 1 );
		#
		# The following loop emulates treating pnamed as a number
		# base 27 (26 alphabetics + '*'), and taking that number
		# mod 29. The reason for doing this in a loop at all is
		# that perl doesn't have an easy way to treat a string
		# like a number like that (I don't know a language that
		# does, for that matter); the reason we chop the number
		# down mod 29 each time through the loop (rather than once
		# at the end) is because we'd cause an overflow if we
		# didn't (and perl wouldn't tell us...). We could have
		# used the bigint.pl package, and perhaps someday...
		#
		$u = 0;
		for( $i = 0; $i < 11; $i++ ) {
			$u += index( $alphaplus, substr( $pnamed, $i, 1 ) );
			$u *= 27;
			$u %= 29;
			}
		$u = 56 - $u;	# 56 is a magic constant
		$u %= 29;
		#
		# Now u is a check number between 0 and 28, inclusive.
		# Sadly, we only have 26 letters available with which
		# to aesthetically represent this number. So, if u is
		# outside that range (0-25), we just start over with a
		# new pair of disambiguation letters.
		#
		if ( $u > 25 ) { redo TWORAND; }	# <=== unstructured ===
		#
		# Yay! u is now a check number between 0 and 25 inclusive.
		# Get the corresponding letter of the alphabet and append
		# it to pnamed.
		#
		$cchar = substr( $alphaplus, $u, 1 );
		$pnamed .= $cchar;
		#
		# If there is a surname, then make the 1st two characters
		# (the first initial and 1st letter of surname) uppercase;
		# otherwise, there's only one name, make only the 1st
		# letter (1st letter of name) uppercase.
		#
		if ( $surname ) { $pnamed =~ m/^([a-z][a-z])(.*)$/; }
		else            { $pnamed =~ m/^([a-z])(.*)$/; }
		$pnamed = "\U$1" . "$2";
		#
		# insert period, remomve asterisks, save result as key
		#
		$pnamed =~ s/^([a-zA-Z]+)\**([a-z]{3})$/\1.\2/;
		$key = $pnamed;
		}
	print "\nKey is: $key\n\n";
	}# main while loop

close( DEBUG );
exit;
