\n"; ?>
\n"; ?> \n"; ?> \n"; ?> \n"; ?>
wie Factoid Memory Machine

Nederlandse versie
nieuwsberichten 1985-1998

© 2002 ILK / Taal en Informatica, Tilburg University

deed wat
waar
andere termen

select($querystring); $instring = ""; while (list($ltemp) = $db->getNextRow()) { //echo "ltemp = $ltemp"; //echo " instring = $instring"; if ($instring == "") { $instring = $ltemp; } else { $instring = $instring . "," . $ltemp; } } if ($instring != "") { if ($hasspace == 0) { $querystring = "select distinct upper(infvalue) from inflexions where upper(infvalue) = upper('$deedwat')"; } else { $querystring = "select distinct upper(infvalue) from inflexions where iid in ($instring)"; } $db->select($querystring); $deedcounter = 0; while (list($deed[$deedcounter]) = $db->getNextRow()) { //echo "# $deed[$deedcounter] #"; $deedcounter++; } } } /* $selectstring = "select distinct factoids.fid, factoids.factid, factoids.factoid, titles.tdate,titles.title "; $fromstring = " from factoidsIndex, factoids, titles "; $wherestring = " where factoidsIndex.facval = factoids.factid "; $wherestring .= " and factoidsIndex.fackey = '$wie' "; $wherestring .= " and titles.titleid = factoids.factid "; some explanation. I have tried 2 versions, one with the join between factoidsIndex, factoids and titles en one whithout the join with titles. I have looked at the speed and I have the impression that the one with a separate query on title is faster. */ // $wieids = array(); $wieassoc = array(); $wieidscounter = 0; $wietocs = array(); $tok = strtok($wie," \t\n"); while ($tok) { $tok = trim($tok); $wietocs[$wieidscounter] = $tok; //echo "$tok"; $querystring = "select distinct facval from factoidsIndex where fackey = upper('$tok') "; $db->select($querystring); $i = 0; while (list($id) = $db->getNextRow()) { $wieassoc[$id]++; /* if ($i == 0) { $wieids[$wieidscounter] = $id; } else { $wieids[$wieidscounter] .= "," . $id ; } $i++; */ } //echo "$wieids[$wieidscounter]"; $wieidscounter++; $tok = strtok(" \t\n"); } $selectstring = "select distinct factoids.fid, factoids.factid, factoids.factoid "; $fromstring = " from factoidsIndex, factoids "; $wherestring = " where factoidsIndex.facval = factoids.factid "; $wherestring .= " and factoidsIndex.facval in ( "; $wieassocsize = count($wieassoc); $init = 0; $i = 0; //while ($i < $wieassocsize) { reset($wieassoc); while (next($wieassoc)) { $xx = key($wieassoc); if ($wieassoc[$xx] == $wieidscounter) { if ($init == 0) { $init = 1; $wherestring .= " $xx "; } else { $wherestring .= " , $xx "; } } $i++; } $wherestring .= " )"; /* for ($i = 0; $i < $wieidscounter; $i++) { $wherestring .= " and factoidsIndex.facval in ( $wieids[$i] ) "; } */ // $wherestring .= " and factoidsIndex.fackey = upper('$wie') "; $querystring = $selectstring . $fromstring . $wherestring; //echo $querystring; //echo "wieassocsize: $wieassocsize"; if ($wieassocsize == 0) { echo "geen match op wie gevonden "; } else { $db->select($querystring); echo ""; echo "\n"); } else fprintf(stdout,"Geen onderwerp (wie) gespecificeerd.

\n"); fprintf(stdout,"

\n"; $rowcount = $db->getNumberOfRows(); if ($rowcount == 0) { echo "geen voorkomens gevonden "; } else { echo "
$wie onderwerp in $rowcount docs; zoeken naar factoids in $rowcount documenten
"; echo "\n"; echo "
\n"; echo "
"; echo "\n"; echo "\n"; echo "\n"; echo "\n"; echo "\n"; echo "\n"; //while(list($fid,$factid,$factoid,$tdate,$title) = $db->getNextRow()) { // echo "factid $factid factoid $factoid fid $fid\n"; while(list($fid,$factid,$factoid) = $db->getNextRow()) { // begin 'deedwat' // if 'deedwat' check 'deedwat' // if 'deedwat' not in factoid, continue with next factoid if ($deedwat != "") { $found = 0; $upperfactoid = strtoupper($factoid); // debug // echo "# $upperfactoid #"; for ($i = 0; $i < $deedcounter; $i++) { //echo "# $deed[$i] #"; if ($deed[$i] != "") { $temp = "]" . $deed[$i]; if (strstr($upperfactoid,$temp) != FALSE) { $found = 1; break; } } } if ($found == 0) { continue; } } // end 'deedwat' // waar if ($waar != "") { $tok = strtok($waar," \t\n"); $upperwaar = ""; $foundwaar = 0; $partsfound = 0; $tokcounter = 0; while ($tok) { $tokcounter++; //$upperwaar = $upperwaar . " "; $uppertok = strtoupper($tok); // $upperwaar = "{city}".$uppertok; $upperwaar = "}".$uppertok; $upperwaar = trim($upperwaar); if (strstr($factoid,$upperwaar) == FALSE) { $tok = strtok(" \t\n"); continue; } else { $foundwaar = 1; $partsfound++; break; } } if ($foundwaar == 0) { continue; } if ($tokcounter != $partsfound) { continue; } } // end waar // begin metwat if ($metwat != "") { $tok = strtok($metwat," \t\n"); $upperfactoid = strtoupper($factoid); $found = 0; while ($tok) { $uppertok = strtoupper($tok); if (stristr($upperfactoid,$uppertok) == FALSE) { // seems like AND operation all other terms have to be included $tok = strtok(" \t\n"); continue; } else { $found = 1; break; } } if ($found == 0) { continue; } } // end metwat $db2 = newDBconn(); $db2->select("select distinct titles.tdate, titles.title from titles where tid = $factid"); list ($tdate,$title) = $db2->getNextRow(); $db2->disconnect(); //echo "$wie\n"; $pipeindex = strpos($factoid,"|"); $wietitle = substr($factoid,0,$pipeindex); $wieallpresent = 1; for ($i = 0; $i < $wieidscounter; $i++) { if (stristr($wietitle,$wietocs[$i]) != "") { } else { $wieallpresent = 0; break; } } if ($wieallpresent == 0) { continue; } echo "\n"; echo "\n"; echo "\n"; echo "\n"; echo "\n"; echo "\n"; } echo "\n"; echo "\n"; echo "\n"; echo "\n"; echo "\n"; } // while(list($fid,$factid,$factoid) = $db->getNextRow()) $db->disconnect(); } } else if ($wie == "") { echo "Geen onderwerp (wie) gespecificeerd.

\n"; } } ?>

wiefactoidbronartikeldatum
\n"; $tok = strtok($wietitle," \n\t"); $sem = 0; //echo "fid = $fid factid = $factid"; while ($tok) { if ($sem == 1) { $sem = 0; echo "\n"; } if ((strstr($tok,"{person}") != FALSE) || (strstr($tok,"{personage}") != FALSE)) { $sem = 1; echo "\n"; echo substr($tok,strpos($tok,"}") + 1); echo " "; } else if ((strstr($tok,"{brand}") != FALSE) || (strstr($tok,"{company}") != FALSE) || (strstr($tok,"{companyloc}") != FALSE) || (strstr($tok,"{media}") != FALSE) || (strstr($tok,"{department}") != FALSE) || (strstr($tok,"{education}") != FALSE) || (strstr($tok,"{nonname}") != FALSE) || (strstr($tok,"{title}") != FALSE) || (strstr($tok,"{nonprofit}") != FALSE) || (strstr($tok,"{adjectival}") != FALSE)) { $sem = 1; echo "\n"; echo substr($tok,strpos($tok,"}") + 1); echo " "; } else if (strstr($tok,"{") != FALSE) { $sem = 1; echo "\n"; echo substr($tok,strpos($tok,"}") + 1); echo " "; } else { echo "$tok "; } $tok = strtok(" \n\t"); } //while($tok) echo "\n"; $tok = strtok($factoid," \n\t"); $sem = 0; $pipefound = 0; $fontsem = 0; while ($tok) { //debug //echo "# $tok #"; if ($tok == "|") { echo "..."; $pipefound = 1; $tok = strtok(" \n\t"); continue; } if ($pipefound == 0) { $tok = strtok(" \n\t"); continue; } if ($sem == 1) { $sem = 0; echo "\n"; } if ((strstr($tok,"{person}") != FALSE) || (strstr($tok,"{personage}") != FALSE)) { if ($fontsem == 1) { $fontsem = 0; echo "\n"; } $sem = 1; echo "\n"; echo substr($tok,strpos($tok,"}") + 1); echo " "; } else if ((strstr($tok,"[hr]") != FALSE)) { echo "
\n"; } else if ((strstr($tok,"{brand}") != FALSE) || (strstr($tok,"{company}") != FALSE) || (strstr($tok,"{companyloc}") != FALSE) || (strstr($tok,"{media}") != FALSE) || (strstr($tok,"{department}") != FALSE) || (strstr($tok,"{education}") != FALSE) || (strstr($tok,"{nonname}") != FALSE) || (strstr($tok,"{title}") != FALSE) || (strstr($tok,"{nonprofit}") != FALSE) || (strstr($tok,"{adjectival}") != FALSE)) { $sem = 1; if ($fontsem == 1) { $fontsem = 0; echo "
\n"; } echo "\n"; echo substr($tok,strpos($tok,"}") + 1); echo " "; } else if (strstr($tok,"{") != FALSE) { $sem = 1; if ($fontsem == 1) { $fontsem = 0; echo "\n"; } echo "\n"; echo substr($tok,strpos($tok,"}") + 1); echo " "; } else { if (strstr($tok, "]") != FALSE) { if ($fontsem == 1) { $fontsem = 0; echo "\n"; } $hookindex = strpos($tok,"]"); if ($hookindex >= 0) { $tok = substr($tok,$hookindex + 1); } echo "$tok "; } else { if ($fontsem == 0) { echo "\n"; $fontsem = 1; } echo "$tok "; } } $tok = strtok(" \n\t"); } //while ($tok) // echo "$factoid\n"; echo "
\n"; echo "\n"; if ($factid < 52019) { echo "ANDA\n"; } else if ($factid < 168097) { echo "Gelder-
lander\n
\n"; } else if ($factid < 193589) { echo "Brabants Dagblad\n"; } else if ($factid < 216238) { echo "Eindhovens Dagblad\n"; } else { echo "feitenboek Semjonov\n"; } echo "
\n"; echo "
\n"; echo "$title\n"; echo "\n"; echo "$tdate\n"; echo "
     
0) { for (i=0; i
\n"); nrtempindexed=0; for (i=0; i"); for (j=1; j onderwerp in %d docs; ", leftcondcounter[i]); } if (nrindexed>0) fprintf(stdout,"zoeken naar factoids in %d documenten ", nrindexed); else fprintf(stdout,"geen voorkomens gevonden "); fprintf(stdout,"
\n"); if (nrindexed>0) { nrtoopen=0; for (i=0; i\n", // i,indexed[i]); if ((nrtoopen==0)|| ((indexed[i]/1000)!=toopen[nrtoopen-1])) { toopen[nrtoopen]=indexed[i]/1000; //fprintf(stdout,"to open %d: %d
\n", // nrtoopen,indexed[i]/1000); nrtoopen++; } } fprintf(stdout,"\n"); fprintf(stdout,"
\n"); fprintf(stdout,"\n"); fprintf(stdout,"\n"); fprintf(stdout,"\n"); fprintf(stdout,"\n"); fprintf(stdout,"\n"); fprintf(stdout,"\n"); nextinline=0; for (f=0; ((f\n", line); sscanf(line,"%d ",&docnr); docs=fopen("/mnt/corpus/demo/doctitles","r"); fgets(memline,LINELEN,docs); sscanf(memline,"%d ",&readnr); readnr=-1; while ((!feof(docs))&& (readnr!=docnr)) { fgets(memline,LINELEN,docs); sscanf(memline,"%d ",&readnr); } for (i=0; i\n", memline); fprintf(stdout,"\n"); } } } fgets(line,LINELEN,result); } fclose(result); } fprintf(stdout,"\n"); fprintf(stdout,"\n"); fprintf(stdout,"\n"); fprintf(stdout,"\n"); fprintf(stdout,"\n"); } if (DEBUG) fprintf(stdout,"ready.

\n"); */ ?>

wiefactoidbronartikeldatum
\n"); i=0; while ((i"); j=0; while (printstring[j]!='}') j++; j++; while (j "); } else { fprintf(stdout,"%s ", printstring); } i++; } fprintf(stdout,"... \n"); i=0; while ((i"); else { if ((strstr(printstring,"brand"))|| (strstr(printstring,"company"))|| (strstr(printstring,"companyloc"))|| (strstr(printstring,"media"))|| (strstr(printstring,"department"))|| (strstr(printstring,"education"))|| (strstr(printstring,"nonname"))|| (strstr(printstring,"title"))|| (strstr(printstring,"nonprofit"))|| (strstr(printstring,"adjectival"))) fprintf(stdout,""); else fprintf(stdout,""); } j=0; while (printstring[j]!='}') j++; j++; while (j "); } else { if (printstring[0]=='[') { if (strcmp(printstring,"[hr]")==0) fprintf(stdout,"
"); else { fprintf(stdout,""); j=0; while (printstring[j]!=']') j++; j++; while (j "); } } else { fprintf(stdout,"%s ", printstring); } } i++; } fprintf(stdout,"
"); if (docnr<52019) fprintf(stdout,"ANDA"); else { if (docnr<168097) fprintf(stdout,"Gelder-
lander\n
"); else { if (docnr<193589) fprintf(stdout,"Brabants Dagblad\n"); else { if (docnr<216238) fprintf(stdout,"Eindhovens Dagblad\n"); else fprintf(stdout,"feitenboek Semjonov\n"); } } } fprintf(stdout,"
\n"); i=0; while ((i\n"); while ((i
     
laatste wijziging: 31 okt 2002