
/** ***************************************************************************
 * 
 * soundex2_fr.c, Implementation de l'algorithme Soundex adapte au Francais 
 * Algorithme decrit par Joe Celko dans 'SQL Avance' (ISBN 978-2711786503)
 * 
 * (C) Benjamin Sonntag <benjamin@octopuce.fr> 08/2008
 * 
 * Ce programme est distribue sous licence BSD. Il est fourni sans aucune 
 * garantie d'aucune sorte. 
 * 
 */ 


#include <stdio.h>
#include <string.h>
#include <stdlib.h>


void soundex2_fr(char *s1);
void replace(char* from, char* to, char* in);

/**
 * soundex2_fr.c : get a word on stdin and spit its soundex code to stdout.
 * note: we used it as an inetd service that may be used by any language 
 * who knows socket calls. 
 * the word may contain low or upcase characters, but MUST not contain an accent
 */
int main(int argc,char* argv[]) {
  char s[256];
  int l;
  while (1) {
    fgets(s,255,stdin);
    l=strlen(s);
    if (s[l-1]=='\n') {
      s[l-1]=0;
      l--;
    }
    if (s[l-1]=='\r') {
      s[l-1]=0;
      l--;
    }
    if (l==0) {
      exit(0);
    }
    soundex2_fr(s);
    fflush(stdout);
  }
}


/** Remplace toute apparition de la chaine 'from' dans la chaine 'in' par la 
 * chaine 'to'. Attention, from doit etre de longueur superieure ou egale a
 * to ! la chaine in est modifiee directement, cette fonction ne retourne rien
 */ 
void replace(char* from, char* to, char* in) {
  int inl,froml,tol,i;
  inl=strlen(in);
  froml=strlen(from);
  tol=strlen(to);
  for(i=0;i<inl;i++) {
    if (strncmp(&in[i],from,froml)==0) {
      // found, let's replace and shift the remaining string (if needed)
      strncpy(&in[i],to,tol); // yes, on purpose: no \0 at the end of the copy
      if (tol<froml) {
        strcpy(&in[i+tol],&in[i+froml]);
        inl=strlen(in);
      }
    }
  }
}



void soundex2_fr(char *s1) {
  int l,i;
  char d;
  l=strlen(s1);
  if (l==0) {
    printf("0000\n");
    return;
  }

  for(i=0;i<l;i++) {
    if (s1[i]=='-' || s1[i]==' ') {
      strcpy(&s1[i],&s1[i+1]); // ugly
      l=strlen(s1);
    }
  }
  
  // strtoupper sauvage :
  // + must be alpha only
  for(i=0;i<l;i++) {
    if (s1[i]>='a' && s1[i]<='z') {
      s1[i]=s1[i]+'A'-'a';
    } else {
      if (s1[i]<'A' || s1[i]>'Z') {
	printf("0000\n");
	return;
      }
    }
  }

  // replace (dans cet ordre) : 
  replace("GUI","KI",s1);
  replace("GUE","KE",s1);
  replace("GA" ,"KA",s1);
  replace("GO" ,"KO",s1);
  replace("GU" ,"K",s1);
  replace("CA" ,"KA",s1);
  replace("CO" ,"KO",s1);
  replace("CU" ,"KU",s1);
  replace("Q"  ,"K",s1);
  replace("CC" ,"K",s1);
  replace("CK" ,"K",s1);

  // substitute vowels
  for(i=1;i<l;i++) { // note : i=1 don't substitute first letter 
    if (s1[i]=='E' || s1[i]=='I' || s1[i]=='O' || s1[i]=='U') {
      s1[i]='A';
    }
  }
  // replace at the beginning : 
  if (strncmp(s1,"MAC",3)==0) {
    strncpy(s1,"MCC",3);
  }
  if (strncmp(s1,"SCH",3)==0) {
    strncpy(s1,"SSS",3);
  }
  if (strncmp(s1,"ASA",3)==0) {
    strncpy(s1,"AZA",3);
  }
  if (strncmp(s1,"KN",2)==0) {
    strncpy(s1,"NN",2);
  }
  if (strncmp(s1,"PH",2)==0) {
    strncpy(s1,"FF",2);
  }
  if (strncmp(s1,"PF",2)==0) {
    strncpy(s1,"FF",2);
  }

  // H -> '' sauf SH et CH : 
  for(i=1;i<l;i++) {
    if (s1[i]=='H' && (s1[i-1]!='C' && s1[i-1]!='S')) {
      strcpy(&s1[i],&s1[i+1]);
      l=strlen(s1);
    }
  }
  // Y -> '' sauf AY :
  for(i=1;i<l;i++) {
    if (s1[i]=='1' && s1[i-1]!='A') {
      strcpy(&s1[i],&s1[i+1]);
      l=strlen(s1);
    }
  }
  // remove A D T S at the end
  if (s1[l-1]=='A' || s1[l-1]=='D' || s1[l-1]=='T' || s1[l-1]=='S') {
    s1[l-1]=0;
    l--;
  }
  // remove A (but on top)
  for(i=1;i<l;i++) {
    if (s1[i]=='A') {
      strcpy(&s1[i],&s1[i+1]);
      l=strlen(s1);
      i--;
    }
  }
  // remove consecutive duplicates : 
  d=s1[0];
  for(i=1;i<l;i++) {
    if (d==s1[i]) {
      strcpy(&s1[i],&s1[i+1]);
      l=strlen(s1);
    }
    d=s1[i];
  }
  printf("%.4s\n",s1);
  return;
}
