// Copyright 2007 Nanorex, Inc. See LICENSE file for details. #include "simulator.h" static char const rcsid[] = "$Id$"; static int debugMatch = 0; static struct patternMatch * makeMatch(struct part *part, struct compiledPattern *pattern, int sequenceNumber) { struct patternMatch *match; int i; match = (struct patternMatch *)allocate(sizeof(struct patternMatch)); match->patternName = pattern->name; match->p = part; match->numberOfAtoms = pattern->numberOfAtoms; match->atomIndices = (int *)allocate(sizeof(int) * pattern->numberOfAtoms); match->introducedAtTraversal = (int *)allocate(sizeof(int) * pattern->numberOfAtoms); match->sequenceNumber = sequenceNumber; for (i=pattern->numberOfAtoms-1; i>=0; i--) { match->atomIndices[i] = -1; match->introducedAtTraversal[i] = pattern->numberOfTraversals + 1; } return match; } static void destroyMatch(struct patternMatch *match) { if (match == NULL) { return; } if (match->atomIndices != NULL) { free(match->atomIndices); match->atomIndices = NULL; } if (match->introducedAtTraversal != NULL) { free(match->introducedAtTraversal); match->introducedAtTraversal = NULL; } free(match); } void printMatch(struct patternMatch *match) { int i; printf("match for pattern %s\n", match->patternName); for (i=0; inumberOfAtoms; i++) { printAtom(stdout, match->p, match->p->atoms[match->atomIndices[i]]); } } void traceMatch(struct patternMatch *match) { char buf[1024]; // relatively safe, as the length is based on the pattern char buf1[32]; int i; struct atom *a; buf[0] = '\0'; sprintf(buf, "# Pattern match: [%d] (%s)", match->sequenceNumber, match->patternName); for (i=0; inumberOfAtoms; i++) { a = match->p->atoms[match->atomIndices[i]]; sprintf(buf1, " %d", a->atomID); strcat(buf, buf1); } strcat(buf, "\n"); write_traceline(buf); } // debug routine static void printMatchStatus(struct patternMatch *match, char *label, int traversalIndex, struct atom *matchedAtom) { int i; int index; struct atom *atom; if (debugMatch == 0) { return; } printf("%s%s[%d]: ", label, match->patternName, traversalIndex); for (i=0; inumberOfAtoms; i++) { if (match->introducedAtTraversal[i] <= traversalIndex) { index = match->atomIndices[i]; if (index < 0 || index > match->p->num_atoms) { printf("{%d}", index); } else { atom = match->p->atoms[match->atomIndices[i]]; if (atom == matchedAtom) { printf("***"); } printAtomShort(stdout, atom); } printf("[%d] ", match->introducedAtTraversal[i]); } else { printf("{{%d, %d}} ", match->atomIndices[i], match->introducedAtTraversal[i]); } } printf("\n"); } // This routine erases the state of any matches past the indicated // point in the iteration through the pattern. While we are iterating // through possibilities for atom a (which==1) at the start of each // loop, we must forget the state of atom b (which==2). static int resetMatchForThisTraversal(struct patternMatch *match, int traversalIndex, struct compiledPatternTraversal *traversal, int which) { int i; if (which == 1) { i = traversal->a->idInPattern; } else { i = traversal->b->idInPattern; } if (match->introducedAtTraversal[i] >= traversalIndex) { if (debugMatch) { printf("reset traversal %d, which %d, atom %d; ", traversalIndex, which, i); } match->atomIndices[i] = -1; match->introducedAtTraversal[i] = traversalIndex + 1; } else if (debugMatch) { printf("did not reset traversal %d, which %d, atom %d; ", traversalIndex, which, i); } if (debugMatch) { printMatchStatus(match, "", traversalIndex, NULL); } return 1; } int atomIsType(struct atom *a, struct atomType *type) { struct atomType *atype = a->type; while (atype != NULL) { if (atype == type) { return 1; } atype = atype->parent; } return 0; } // Returns true if the indicated atom (id) has not yet appeared in the // match. This keeps a pattern from matching the same atom twice. static int atomNotUsed(struct patternMatch *match, int traversalIndex, int id) { int trial; int i; trial = match->atomIndices[id]; for (i=match->numberOfAtoms-1; i>=0; i--) { if (i != id && match->introducedAtTraversal[i] <= traversalIndex && match->atomIndices[i] == trial) { return 0; } } return 1; } // Returns the next atom to match at the given position in the // pattern, or NULL if the possibilities have been exhausted. If an // atom was fixed at a previous traversal (earlier in the matching // process), this routing will return that atom on the first call, and // NULL on the second call. That state is kept in callCount. static struct atom * matchOneAtom(struct patternMatch *match, struct compiledPattern *pattern, struct compiledPatternAtom *patternAtom, int traversalIndex, int *callCount) { int id = patternAtom->idInPattern; struct part *p = match->p; struct atom *a; if (match->introducedAtTraversal[id] < traversalIndex) { // this atom has been fixed at an earlier traversal if ((*callCount)++ == 0) { // first call at this level, return fixed atom a = match->p->atoms[match->atomIndices[id]]; if (debugMatch) { printMatchStatus(match, "firstfixed: ", traversalIndex, a); } return a; } // later call at this level, no more possible matches return NULL; } if (match->introducedAtTraversal[id] > traversalIndex) { // haven't matched this atom yet, start search at top match->atomIndices[id] = -1; match->introducedAtTraversal[id] = traversalIndex; } while (++(match->atomIndices[id]) < p->num_atoms) { if (atomNotUsed(match, traversalIndex, id)) { a = p->atoms[match->atomIndices[id]]; if (atomIsType(a, patternAtom->type)) { if (debugMatch) { printMatchStatus(match, "searched: ", traversalIndex, a); } return a; } } } return NULL; } // Returns true if the specified targetAtom matches at the given point // in the pattern. The atom may have been fixed at a previous // traversal, in which case we must be looking at the same atom. If // this is a new atom at this traversal, we fix it here if the type is // correct. static int matchSpecificAtom(struct patternMatch *match, struct compiledPatternAtom *patternAtom, struct atom *targetAtom, int traversalIndex) { int id; if (atomIsType(targetAtom, patternAtom->type)) { id = patternAtom->idInPattern; if (match->introducedAtTraversal[id] > traversalIndex) { match->atomIndices[id] = targetAtom->index; if (atomNotUsed(match, traversalIndex, id)) { match->introducedAtTraversal[id] = traversalIndex; if (debugMatch) { printMatchStatus(match, "specific: ", traversalIndex, targetAtom); } return 1; } return 0; } if (match->atomIndices[id] == targetAtom->index) { if (debugMatch) { printMatchStatus(match, "fixed specific: ", traversalIndex, targetAtom); } return 1; } } return 0; } static struct hashtable *matchSet = NULL; static int integerSortCompare(const void *a, const void *b) { return *(int *)a - *(int *)b; } // Returns true if the matched set of atoms has already been found // while matching this pattern. Keeps symmetric patterns from // matching more than once on the same set of atoms. The set of // matched atoms is canonicalized (by sorting the numerical ids), // turned into a string, and used as a hashtable key. static int checkForDuplicateMatch(struct patternMatch *match) { int i; int len; int *atomIndices = (int *)allocate(sizeof(int) * match->numberOfAtoms); char *buf1; char buf2[20]; int dup; for (i=match->numberOfAtoms-1; i>=0; i--) { atomIndices[i] = match->atomIndices[i]; } qsort(atomIndices, match->numberOfAtoms, sizeof(int), integerSortCompare); len = 0; for (i=match->numberOfAtoms-1; i>=0; i--) { sprintf(buf2, "%d.", atomIndices[i]); len += strlen(buf2); } buf1 = (char *)allocate(sizeof(char) * (len + 1)); buf1[0] = '\0'; for (i=match->numberOfAtoms-1; i>=0; i--) { sprintf(buf2, "%d.", atomIndices[i]); strcat(buf1, buf2); } dup = hashtable_get(matchSet, buf1) != NULL; if (!dup) { hashtable_put(matchSet, buf1, (void *)1); } return !dup; } static int matchBondOrder(char order, struct compiledPatternTraversal *traversal) { int i; char matchBondOrder; for (i=0; i<4; i++) { matchBondOrder = traversal->bondOrder[i]; if (matchBondOrder) { if (matchBondOrder == order) { return 1; } } else { return 0; } } return 0; } // Matches one traversal in a compiled pattern. See // http://www.nanoengineer-1.net/mediawiki/index.php?title=User:Emessick/Molecular_pattern_matching // for a description of the algorithm. Basically, each traversal // represents an expansion of the set of criteria for matching the // pattern. The first traversal establishes a seed, and later // traversals add to the seed, crystallizing the pattern around it. // The crystallization only proceeds in the presence of a matching // target structure. If the crystal grows to the end of the traversal // list, the whole pattern matched. static void matchOneTraversal(struct patternMatch *match, struct compiledPattern *pattern, int traversalIndex) { struct atom *atomA; struct atom *atomB; int atomACallCount = 0; int atomBCallCount = 0; int isBonded; int bondNumber; int atomsAreTheSame; struct bond *bond; struct compiledPatternTraversal *traversal; if (traversalIndex >= pattern->numberOfTraversals) { if (debugMatch) { printf("found match, checking for duplicate\n"); printMatch(match); } if (pattern->allowDuplicates || checkForDuplicateMatch(match)) { traceMatch(match); pattern->matchFunction(match); BAIL(); match->sequenceNumber++; } return; } traversal = pattern->traversals[traversalIndex]; resetMatchForThisTraversal(match, traversalIndex, traversal, 1); if (debugMatch) { printf("%s[%d]\n", match->patternName, traversalIndex); } atomsAreTheSame = traversal->a == traversal->b; while ((atomsAreTheSame || resetMatchForThisTraversal(match, traversalIndex, traversal, 2)) && (atomA = matchOneAtom(match, pattern, traversal->a, traversalIndex, &atomACallCount)) != NULL) { if (traversal->a == traversal->b) { // introducing new atom, bond order doesn't matter matchOneTraversal(match, pattern, traversalIndex+1); BAIL(); continue; } // build list of atoms bonded to a if (traversal->bondOrder[0] == '0') { // a must not be bonded to b while ((atomB = matchOneAtom(match, pattern, traversal->b, traversalIndex, &atomBCallCount)) != NULL) { isBonded = 0; for (bondNumber=atomA->num_bonds-1; bondNumber>=0; bondNumber--) { bond = atomA->bonds[bondNumber]; if ((atomA == bond->a1 && atomB == bond->a2) || (atomA == bond->a2 && atomB == bond->a1)) { isBonded = 1; break; } } if (!isBonded) { matchOneTraversal(match, pattern, traversalIndex+1); BAIL(); } } continue; } for (bondNumber=atomA->num_bonds-1; bondNumber>=0; bondNumber--) { bond = atomA->bonds[bondNumber]; if (matchBondOrder(bond->order, traversal)) { if (atomA == bond->a1) { atomB = bond->a2; } else { atomB = bond->a1; } if (debugMatch) { printf("[%d] bond %d checking ", traversalIndex, bondNumber); printAtomShort(stdout, atomB); printf("\n"); } if (matchSpecificAtom(match, traversal->b, atomB, traversalIndex)) { matchOneTraversal(match, pattern, traversalIndex+1); BAIL(); } resetMatchForThisTraversal(match, traversalIndex, traversal, 2); } } } } // Housekeeping that has to happen around the recursive // matchOneTraversal() routine. static int matchPartToPattern(struct part *part, struct compiledPattern *pattern, int sequenceNumber) { struct patternMatch *match; match = makeMatch(part, pattern, sequenceNumber); matchSet = hashtable_new(64); matchOneTraversal(match, pattern, 0); BAILR(sequenceNumber); destroyMatch(match); hashtable_destroy(matchSet, NULL); matchSet = NULL; BAILR(sequenceNumber); addQueuedComponents(part); return match->sequenceNumber; } static struct atomType *unmatchable = NULL; // Creates an atomType which cannot represent any atoms. Used only if // a pattern references an unknown type. In that case, the pattern // can never match. static void makeUnmatchableType(void) { if (unmatchable != NULL) { return; } unmatchable = (struct atomType *)allocate(sizeof(struct atomType)); unmatchable->protons = -1; unmatchable->group = -1; unmatchable->period = -1; unmatchable->name = "error"; unmatchable->symbol = "error"; unmatchable->mass = -1; unmatchable->vanDerWaalsRadius = -1; unmatchable->e_vanDerWaals = -1; unmatchable->n_bonds = -1; unmatchable->covalentRadius = -1; unmatchable->charge = 0; unmatchable->refCount = 1; } struct compiledPatternAtom * makePatternAtom(int id, char *type) { struct compiledPatternAtom *a; struct atomType *t = getAtomTypeByName(type); if (t == NULL) { fprintf(stderr, "unknown atom type: %s\n", type); t = unmatchable; } a = (struct compiledPatternAtom *) allocate(sizeof(struct compiledPatternAtom)); a->type = t; a->idInPattern = id; return a; } struct compiledPatternTraversal * makeTraversal2(struct compiledPatternAtom *a, struct compiledPatternAtom *b, char *bondOrders) { struct compiledPatternTraversal *t; t = (struct compiledPatternTraversal *) allocate(sizeof(struct compiledPatternTraversal)); t->a = a; t->b = b; t->bondOrder[0] = bondOrders[0]; t->bondOrder[1] = bondOrders[1]; t->bondOrder[2] = bondOrders[2]; t->bondOrder[3] = bondOrders[3]; return t; } struct compiledPatternTraversal * makeTraversal(struct compiledPatternAtom *a, struct compiledPatternAtom *b, char bondOrder) { char buf[4]; buf[0] = bondOrder; buf[1] = '\0'; return makeTraversal2(a, b, buf); } static int numPatterns; static struct compiledPattern **allPatterns; struct compiledPattern * makePattern(char *name, void (*matchFunction)(struct patternMatch *match), int numAtoms, int numTraversals, struct compiledPatternTraversal **traversals) { int i; struct compiledPattern *pat; pat = (struct compiledPattern *)allocate(sizeof(struct compiledPattern)); pat->name = name; pat->matchFunction = matchFunction; pat->numberOfAtoms = numAtoms; pat->numberOfTraversals = numTraversals; pat->allowDuplicates = 0; pat->traversals = (struct compiledPatternTraversal **) allocate(sizeof(struct compiledPatternTraversal *) * numTraversals); for (i=0; itraversals[i] = traversals[i]; } numPatterns++; allPatterns = (struct compiledPattern **)accumulator(allPatterns, sizeof(struct compiledPattern *) * numPatterns, 0); allPatterns[numPatterns - 1] = pat; return pat; } void matchPartToAllPatterns(struct part *part) { int i; int sequenceNumber = 1; for (i=0; i