#ifndef lint static char sccsid[] = "@(#)pscan.c 4.2 (Berkeley) 82/11/06"; #endif not lint #include "names.h" #include "conp.h" #define isadv(c) (c == ADV || c == ADJ_ADV) #define isadj(c) (c == ADJ || c == NOUN_ADJ || c == ADJ_ADV || c == UNK) #define notnoun(c) (c != NOUN && c != UNK && c != NOUN_VERB && c != NV_PL && c != PNOUN) #define issing(c) (c == UNK || c == NOUN_VERB || c == NOUN || c == NOM || c == NOUN_ADJ) #define isnoun(c) (c == NOUN || c == NOUN_VERB || c == NV_PL || c == UNK || c == PNOUN) #define nounnom(c) (c == NOUN || c == NOM) char lastc,nextc; int savsub; scan(si,ce,command) /*scan a phrase */ int si; char ce; { int savi; char c; i=si; if(command == 1)subty = PLURAL; else subty = 0; if(sent[i].cc==ADJ_ADV && sent[i+1].cc != NOUN && sent[i+1].cc != UNK && sent[i+1].cc != ADJ) sent[i++].cc = ADV; done=0; verb = 0; verb=getv(i,ce); /*get verb if obvious*/ if(command == 0)j=getnoun(i,ce); /*get subject*/ else j = si; if(i != j || ((i==j) && sent[i].cc == NOUN)){ i = j+1; } for(;((c=sent[i].cc) != ce) && (c != END) && (c != '\0');i++){ nextc=sent[i+1].cc; if(i>0){ lastc=sent[i-1].cc; if(lastc==BE)be=1; else{ if(lastc != ADV)be=0; } } else lastc=0; if(verb==1)question=0; switch(c){ case '"': if(nextc==ED || nextc == MVERB){ verb=1; sent[++i].cc=VERB; subty=SING; continue; } subty=0; verb=getv(++i,ce); i=getnoun(i,ce); continue; case MVERB: sent[i].cc = VERB; if(i < nsleng*.5){ verb = getv(++i,ce); i = getnoun(i,ce); continue; } continue; case ART: case ADJ: case POS: case ING: aflg=1; i=getnoun(i,ce); aflg=0; continue; case PREP: if(nextc == ce){ sent[i].cc=ADV; goto sdone; } prep=1; i=getnoun(++i,ce); prep=0; continue; case VERB_ADJ: if(verb==0){ sent[i].cc=VERB; verb=1; continue; } case NOUN_ADJ: if(be==1){ sent[i].cc=ADJ; continue; } case PRONP_ADJ: case PRONS_ADJ: i=getnoun(i,ce); continue; case NOUN_ADV: if(verb == 1 && iverb == i+1){ sent[i].cc = NOUN; } else { sent[i].cc = ADV; } continue; case ADJ_ADV: if(be == 1){ if (isadj(nextc)) { sent[i].cc = ADV; continue; } if(nextc == ',' && isadv(sent[i+2].cc)) { sent[i++].cc = ADV; sent[++i].cc = ADV; comma--; continue; } sent[i].cc = ADJ; continue; } if(lastc == NOUN && (nextc == ',' || nextc == END)){ sent[i].cc=ADJ; continue; } if(notnoun(nextc)){ sent[i].cc=ADV; continue; } sent[i].cc=ADJ; continue; case WHO: i=who(i,ce); continue; case PRONP: subty=PLURAL; continue; case NOUN: if(nextc==NOUN){ sent[i].cc=ADJ; continue; } case PRONS: subty=SING; continue; case PNOUN: sent[i].cc = NOUN; if(subty == 0)subty=PLURAL; continue; case CONJ: if(nextc==UNK || nextc == NOUN_VERB || nextc == NV_PL){ if(lastc == ADJ)sent[++i].cc = ADJ; else sent[++i].cc = VERB; } if(nextc == ED){ sent[++i].cc = VERB; } continue; case AUXP: case AUXS: case BES: case BEP: case AUXV: case AUX: case HAS: case HAVE: case HAD: verb=getv(i,ce); continue; case ADV: case AUXX: case VERB: case BE: case INTER: continue; case THAT: if(nextc==SUBCONJ){ sent[i].cc=PRONP; goto subc; } verb=getv(i+1,ce); j = i+1; while(isadv(sent[j].cc)) { j++; } nextc = sent[j].cc; if((verb==1 && iverb== j) || nextc==ED || nextc == VERB_ADJ){ sent[i].cc=PRONP; while(i+1 < j)sent[++i].cc = ADV; } else if(verb==0 && nextc==NV_PL && ((lastc==NOUN && issing(sent[i-1].ic)) || lastc == ',')) { sent[i].cc=PRONP; subty=SING; while(i+1 < j)sent[++i].cc = ADV; } else if(verb==0 && (nextc==UNK || nextc==NOUN_VERB) && (lastc==NOUN && sent[i-1].ic==NV_PL)){ subty=PLURAL; sent[i].cc=PRONP; while(i+1 < j)sent[++i].cc = ADV; } else { if(i == 0)sent[i].cc=ADJ; else sent[i].cc=SUBCONJ; j=i; subty=0; i=getnoun(i+1,ce); if(done)sent[j].cc=ADJ; } continue; case ',': if(nextc != CONJ){ /*parenthetical*/ continue; } i++; comma--; if(sent[i+1].cc == ED || sent[i+1].cc == MVERB){ sent[++i].cc = VERB; continue; } if(command == 1){ verb = getv(++i,ce); continue; } case SUBCONJ: subc: savi = i; if(nextc==END||(lastc==',' && nextc==',')){ sent[i].cc=ADV; continue; } subty=0; ce=','; verb=getv(++i,ce); if(sent[savi+1].cc == VERB || sent[savi+1].cc == AUXX) if(sent[savi].cc == SUBCONJ)sent[savi].cc = ADV; i=getnoun(i,ce); continue; case PREP_ADV: if(sent[i+2].cc==PREP_ADV &&(sent[i+1].cc != NOUN && sent[i+1].cc != NOUN_VERB && sent[i+1].cc != NV_PL && sent[i+1].cc != PNOUN)){ sent[i].cc=ADV; sent[i+2].cc=CONJ; continue; } sav=i++; savsub = subty; pverb=getv(i,ce); if((pverb==1 && iverb == i) || sent[i].cc== ED){ sent[sav].cc=PRONP; sent[i].cc=VERB; continue; } i=getnoun(i,ce); if(done==1){ sent[sav].cc=ADV; goto sdone; } if(pverb==1 && iverb == i+1){ sent[sav].cc=SUBCONJ; ce=','; continue; } switch(sent[i+1].cc){ case UNK: case NV_PL: case NOUN_VERB: case ED: sent[sav].cc=SUBCONJ; verb=0; ce=','; continue; default: sent[sav].cc=PREP; subty = savsub; continue; } case TO: savi = i; sent[i++].cc=VERB; sw: switch(nextc){ case UNK: case AUXS: case VERB_ADJ: case NOUN_VERB: case VERB: case MVERB: sent[i].cc=VERB; continue; case HAVE: sent[i].cc = VERB; if(sent[i+1].cc == ED){ sent[++i].cc = VERB; continue; } if(sent[i+1].cc == ADV)i++; if(sent[i+1].cc != BE)continue; i++; case BE: sent[i].cc=VERB; if(sent[i+1].cc == ADV)i++; if(sent[i+1].cc == ED || sent[i+1].cc == ING){ sent[++i].cc = VERB; } else if(sent[i+1].cc == UNK){ sent[++i].cc = ADJ; } continue; case ADV: nextc = sent[++i].cc; goto sw; default: sent[savi].cc = PREP; prep=1; i=getnoun(i,ce); prep=0; continue; } case NV_PL: if(subty==0){ i=getnoun(i,ce); subty=PLURAL; } else if(verb== 0){ /*need verb*/ sent[i].cc=VERB; verb=1; } else{ i=getnoun(i,ce); } continue; case UNK: case NOUN_VERB: if(verb==1){ if(be==1 && nextc != WHO){ sent[i].cc=ADJ; continue; } i=getnoun(i,ce); } else if(nextc==NV_PL && isnoun(sent[i+2].cc)) { sent[i].cc=NOUN; if(sent[i-1].cc == NOUN){ sent[i-1].cc = ADJ; } subty=SING; continue; } else if(subty==0){ i=getnoun(i,ce); continue; } else { /* desparation */ sent[i].cc=VERB; verb=1; } continue; case ED: if(verb == 0){ sent[i].cc=VERB; verb=1; } else if(sent[i-1].cc == BE){ sent[i].cc = VERB; } else{ i=getnoun(i,ce); } continue; default: printf("got a %c %o on %sat %d\n",sent[i].cc,sent[i].cc,sent[i].sp,i); } } sdone: if(sent[i].cc==END && sent[i].ic != ';')return(-1); else { if(sent[i].ic == ';')sent[i].cc=';'; else comma--; return(i); } } getv(si,ce) int si; char ce; { int conj; char c; int ik; must=0; bflg=0; hflg=0; verbty=0; for(j=si;((c=sent[j].cc) != ce) && (c != END); j++){ iverb=j; switch(c){ case ED: continue; case HAS: hflg=1; verbty=SING; sent[j].cc=AUXX; goto next; case HAVE: if(sent[j-1].cc==TO){ continue; } case HAD: hflg=1; verbty=PLURAL; sent[j].cc=AUXX; goto next; case BE: if(sent[j].ic != BE){ return(1); } continue; case VERB: if(sent[j-1].cc==TO){ continue; } return(1); case AUXX: return(1); case AUXP: must=1; verbty=PLURAL; sent[j].cc=AUXX; goto next; case AUXS: if(sent[j-1].cc==TO){ continue; } verbty=SING; sent[j].cc=AUXX; goto next; case AUX: case AUXV: must=1; sent[j].cc=AUXX; next: if(sent[j-1].cc == CONJ && verb == 0)conj = 0; else conj = 1; if(question==1){ j=getnoun(j+1,ce); question=0; } getv2(ce); if(sent[j].cc == AUXX){ sent[j].cc = VERB; } if((sent[j].cc!=VERB && sent[j].cc!=BE) && sent[iverb].cc==AUXX){ sent[iverb].cc=VERB; for(ik=iverb+1;ik<=j;ik++){ if(sent[ik].ic == NOM)sent[ik].cc=NOUN; else sent[ik].cc=sent[ik].ic; } } return(conj); case BES: verbty=SING; bflg=1; sent[j].cc=BE; goto next; case BEP: verbty=PLURAL; bflg=1; sent[j].cc=BE; goto next; case SUBCONJ: if(sent[j-1].cc== ',' && sent[j+1].cc == ','){ continue; } case THAT: case WHO: goto vdone; case ',': if(comma==1 && sent[j+1].cc==CONJ){ goto vdone; } } } vdone: return(0); } getv2(ce) char ce; { int vflg; char c; vflg=0; while(((c=sent[++j].cc) != ce) && (c != END)){ switch(c){ case NOUN_ADV: case ADJ_ADV: if(bflg == 0 || sent[j+1].cc == ED || sent[j+1].cc == ING){ sent[j].cc = ADV; } else{ j -= 1; return; } case ADV: continue; case SUBCONJ: if(sent[j+1].cc == ED || sent[j+1].cc == ING || sent[j+1].cc == ADV || sent[j+1].cc == ADJ_ADV)sent[j].cc = ADV; else return; continue; case VERB: return; case VERB_ADJ: case ED: sent[j].cc=VERB; return; case BE: must=0; bflg=1; continue; case ING: if(bflg == 1){ sent[j].cc = VERB; } else { j -= 1; } return; case NOUN_VERB: case UNK: if(bflg==1 || vflg==1 || hflg==1){ j -= 1; return; } sent[j].cc=VERB; return; case PRONP_ADJ: case PRONS_ADJ: c=sent[j+1].cc; if(c==ED||c==ING||c==BE||must==1){ sent[j].cc = ADV; } else{ j -= 1; return; } continue; case AUXV: case AUX: case AUXP: case AUXS: case HAD: case HAVE: case HAS: must=0; vflg=1; sent[j].cc=AUXX; continue; default: j -= 1; return; } } j -= 1; } getnoun(k,ce) int k,ce; { char c; int st,t,nextst; int rep; int t1,tt; st=k; rep=0; b: nextst=sent[st+1].cc; switch(sent[st].cc){ case END: done=1; break; case ',': if(ce==','){ done=1; break; } else { goto getdef; } case '"': if(aflg == 1){ st++; goto b; } break; case NOUN: switch(nextst){ case UNK: case NOUN_VERB: if(verb==1){ goto nn; } if(subty != PLURAL){ goto nn; } break; case NV_PL: if(verb==1){ goto nn; } if(subty == PLURAL){ goto nn; } break; case PNOUN: case NOUN_ADJ: case POS: case NOUN: nn: sent[st].cc=ADJ; goto b; } if(prep==0)subty=SING; break; case PNOUN: sent[st].cc = NOUN; if(prep == 0)subty=PLURAL; break; case ADV: st++; goto b; case WHO: st=who(st,ce); st++; goto b; case ADJ_ADV: case VERB_ADJ: case ED: sent[st++].cc=ADJ; goto b; case ING: if(isnoun(nextst)){ sent[st++].cc=ADJ; goto b; } sent[st].cc=NOUN; if(prep==0)subty=SING; break; case ART: case POS: if(st != k){ goto getdef; } case ADJ: st++; goto b; case PRONP: if(prep==0)subty=PLURAL; break; case PRONS: if(prep==0)subty=SING; break; case NOUN_VERB: if((nextst==NOUN|| nextst==PNOUN)||(verb==1 && (nextst== NV_PL|| nextst==NOUN_VERB) )){ sent[st].cc=ADJ; goto b; } case NV_PL: sent[st].cc=NOUN; if(prep==0){ if(sent[st].ic==NV_PL)subty=PLURAL; else subty=SING; } if(sent[st-1].cc == NOUN && nounnom(sent[st-1].ic)) { sent[st-1].cc = ADJ; } break; case PRONP_ADJ: switch(nextst){ case PNOUN: case NOUN_ADJ: case NV_PL: case ADJ_ADV: case PRONS_ADJ: case ADJ: case ING: sent[st++].cc=ADJ; goto b; case NOUN_VERB: case UNK: case ED: if(verb == 0){ goto def; } else{ sent[st++].cc=ADJ; goto b; } case NOUN: sent[st++].cc=ADJ; if(prep==0)subty=SING; break; case SUBCONJ: sent[st++].cc=PRONP; /* more than .. */ sent[st++].cc=CONJ; if(verb==0)verb=getv(st,ce); goto b; case '"': sent[st++].cc = ADJ; st++; goto b; default: def: sent[st].cc=PRONP; if(prep==0)subty=PLURAL; break; } break; case PRONS_ADJ: if(aflg == 1 && nextst != END && nextst != ART){ sent[st++].cc=ADJ; goto b; } switch(nextst){ case '"': sent[st++].cc = ADJ; st++; goto b; case PNOUN: case ING: case UNK: case NOUN_VERB: case NOUN_ADJ: case ADJ_ADV: case ADJ: sent[st++].cc=ADJ; goto b; case NOUN: sent[st++].cc=ADJ; if(prep==0)subty=SING; break; case PRONS_ADJ: sent[st++].cc=ADJ; sent[st].cc=PRONP; if(prep==0)subty=SING; break; case NV_PL: if(verb==1){ sent[st++].cc=ADJ; sent[st].cc=NOUN; if(prep==0)subty=PLURAL; break; } sent[st].cc=PRONP; if(prep==0)subty=SING; break; default: sent[st].cc=PRONP; if(prep==0)subty=SING; break; } break; case NOUN_ADJ: if(nextst==NOUN||nextst==ADJ||nextst == NOUN_ADJ||nextst==PRONS_ADJ||nextst==PRONP_ADJ||nextst==PNOUN){ sent[st++].cc=ADJ; goto b; } if(nextst!=UNK && nextst != NOUN_VERB && nextst != NV_PL){ sent[st].cc=NOUN; if(prep==0)subty=SING; break; } if(verb==1 || sent[st+2].cc==ED){ sent[st++].cc=ADJ; goto b; } sent[st].cc=NOUN; if(prep==0)subty=SING; break; case UNK: if(nextst != UNK){ if(nextst == ',' && rep==0){ c=sent[st+2].cc; if((c==UNK||c==ADJ||c==NOUN_ADJ||c==ING) && isnoun(sent[st+3].cc)){ comma--; sent[st].cc=ADJ; sent[st+2].cc=ADJ; sent[st+3].cc=NOUN; if(prep==0){ if(sent[st+3].ic==NV_PL)subty=PLURAL; else subty=SING; } return(st+3); } else { sent[st].cc=NOUN; if(prep==0)subty=SING; break; } } if(nextst==NOUN||nextst==ING ||nextst==NOUN_ADJ|| sent[st-1].cc==ADV || nextst==ADJ || nextst==PNOUN){ sent[st++].cc=ADJ; goto b; } if((nextst==NV_PL || nextst==NOUN_VERB) &&(verb==1 ||(verb==0 && (sent[st+2].cc==UNK || sent[st+2].cc==NOUN_VERB || sent[st+2].cc==ED||sent[st+2].cc==VERB_ADJ)))){ sent[st++].cc=ADJ; sent[st].cc=NOUN; if(prep == 0)subty=PLURAL; break; } if(nextst == CONJ && isadv(sent[st+2].cc)){ sent[st].cc=ADJ; goto b; } sent[st].cc=NOUN; if(prep==0)subty=SING; break; } for(t=st+1;sent[t].cc== UNK;t++); if(verb==0 && prep == 0){ /* UUU. */ if(prep==0)subty=SING; if(sent[t].cc==NV_PL|| sent[t].cc == PNOUN){ /* UUZ.*/ if(sent[t+1].cc==UNK || sent[t+1].cc==NOUN_VERB){ /* UUZU */ sent[t+1].cc=VERB; verb=1; sent[t].cc=NOUN; t1=t; } else if(sent[t].cc == NV_PL){ /* UUZ. */ sent[t].cc=VERB; verb=1; sent[t-1].cc=NOUN; t1=t-1; } else { sent[t].cc = NOUN; t1 = t; } } else{ /* UU. */ if(sent[t].cc!= ED){ sent[t-1].cc=VERB; verb=1; sent[t-2].cc=NOUN; t1=t-2; t--; } else { sent[--t].cc=NOUN; t1=t; } } } else{ if(sent[t].cc==NOUN_VERB|| sent[t].cc==NOUN|| sent[t].cc==NV_PL|| sent[t].cc==PNOUN){ sent[t].cc=NOUN; if(prep==0)subty=PLURAL; t1=t; } else{ sent[--t].cc=NOUN; if(prep==0)subty=SING; t1=t; } } for(tt=st;tt