Need Faster Word Builder Algorithm











up vote
2
down vote

favorite












I have an app that assists studying for Scrabble. Most searches are much faster than by desktop version in C#, except the Word Builder. This search shows all the words that can be formed from a given set of letters A-Z, or blanks.
What can I do to get it to run faster?
I've considered using a Trie, but haven't found a way to support the use of blanks.
I am using a SimpleCursorAdapter to populate the ListView, which is why I am returning a cursor.



    public Cursor getCursor_subanagrams(String term, String filters, String ordering) {
if (term.trim() == "")
return null;
// only difference between this and anagram is changing the length filter
char a = term.toCharArray(); // anagram

int first = new int[26]; // letter count of anagram
int c; // array position
int blankcount = 0;

// initialize word to anagram
for (c = 0; c < a.length; c++) {
if (a[c] == '?') {
blankcount++;
continue;
}
first[a[c] - 'A']++;
}

// gets pool of words to search through
String lenFilter = String.format("Length(Word) <= %1$s AND Length(Word) <= %2$s", LexData.getMaxLength(), term.length());
Cursor cursor = database.rawQuery("SELECT WordID as _id, Word, WordID, FrontHooks, BackHooks, " +
"InnerFront, InnerBack, Anagrams, ProbFactor, OPlayFactor, Score n" +
"FROM `" + LexData.getLexName() + "` n" +
"WHERE (" + lenFilter +
filters +
" ) " + ordering, null);

// creates new cursor to add valid words to
MatrixCursor matrixCursor = new MatrixCursor(new String{"_id", "Word", "WordID", "FrontHooks", "BackHooks", "InnerFront", "InnerBack",
"Anagrams", "ProbFactor", "OPlayFactor", "Score"});

// THIS NEEDS TO BE FASTER
while (cursor.moveToNext()) {
String word = cursor.getString(1);
char b = word.toCharArray();
if (isAnagram(first, b, blankcount)) {
matrixCursor.addRow(get_CursorRow(cursor));
}
}
cursor.close();
return matrixCursor;
}


private boolean isAnagram(int anagram, char word, int blankcount) {
int matchcount = blankcount;
int c; // each letter
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};

for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;

for (c = 0; c < 26; c++)
{
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}

if (matchcount == word.length)
return true;
return false;
}









share|improve this question






















  • Try doing all the "non UI" work on a separate thread, if you're not doing it already.
    – HB.
    Nov 22 at 18:11










  • This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.
    – Dana Bell
    Nov 22 at 22:54















up vote
2
down vote

favorite












I have an app that assists studying for Scrabble. Most searches are much faster than by desktop version in C#, except the Word Builder. This search shows all the words that can be formed from a given set of letters A-Z, or blanks.
What can I do to get it to run faster?
I've considered using a Trie, but haven't found a way to support the use of blanks.
I am using a SimpleCursorAdapter to populate the ListView, which is why I am returning a cursor.



    public Cursor getCursor_subanagrams(String term, String filters, String ordering) {
if (term.trim() == "")
return null;
// only difference between this and anagram is changing the length filter
char a = term.toCharArray(); // anagram

int first = new int[26]; // letter count of anagram
int c; // array position
int blankcount = 0;

// initialize word to anagram
for (c = 0; c < a.length; c++) {
if (a[c] == '?') {
blankcount++;
continue;
}
first[a[c] - 'A']++;
}

// gets pool of words to search through
String lenFilter = String.format("Length(Word) <= %1$s AND Length(Word) <= %2$s", LexData.getMaxLength(), term.length());
Cursor cursor = database.rawQuery("SELECT WordID as _id, Word, WordID, FrontHooks, BackHooks, " +
"InnerFront, InnerBack, Anagrams, ProbFactor, OPlayFactor, Score n" +
"FROM `" + LexData.getLexName() + "` n" +
"WHERE (" + lenFilter +
filters +
" ) " + ordering, null);

// creates new cursor to add valid words to
MatrixCursor matrixCursor = new MatrixCursor(new String{"_id", "Word", "WordID", "FrontHooks", "BackHooks", "InnerFront", "InnerBack",
"Anagrams", "ProbFactor", "OPlayFactor", "Score"});

// THIS NEEDS TO BE FASTER
while (cursor.moveToNext()) {
String word = cursor.getString(1);
char b = word.toCharArray();
if (isAnagram(first, b, blankcount)) {
matrixCursor.addRow(get_CursorRow(cursor));
}
}
cursor.close();
return matrixCursor;
}


private boolean isAnagram(int anagram, char word, int blankcount) {
int matchcount = blankcount;
int c; // each letter
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};

for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;

for (c = 0; c < 26; c++)
{
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}

if (matchcount == word.length)
return true;
return false;
}









share|improve this question






















  • Try doing all the "non UI" work on a separate thread, if you're not doing it already.
    – HB.
    Nov 22 at 18:11










  • This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.
    – Dana Bell
    Nov 22 at 22:54













up vote
2
down vote

favorite









up vote
2
down vote

favorite











I have an app that assists studying for Scrabble. Most searches are much faster than by desktop version in C#, except the Word Builder. This search shows all the words that can be formed from a given set of letters A-Z, or blanks.
What can I do to get it to run faster?
I've considered using a Trie, but haven't found a way to support the use of blanks.
I am using a SimpleCursorAdapter to populate the ListView, which is why I am returning a cursor.



    public Cursor getCursor_subanagrams(String term, String filters, String ordering) {
if (term.trim() == "")
return null;
// only difference between this and anagram is changing the length filter
char a = term.toCharArray(); // anagram

int first = new int[26]; // letter count of anagram
int c; // array position
int blankcount = 0;

// initialize word to anagram
for (c = 0; c < a.length; c++) {
if (a[c] == '?') {
blankcount++;
continue;
}
first[a[c] - 'A']++;
}

// gets pool of words to search through
String lenFilter = String.format("Length(Word) <= %1$s AND Length(Word) <= %2$s", LexData.getMaxLength(), term.length());
Cursor cursor = database.rawQuery("SELECT WordID as _id, Word, WordID, FrontHooks, BackHooks, " +
"InnerFront, InnerBack, Anagrams, ProbFactor, OPlayFactor, Score n" +
"FROM `" + LexData.getLexName() + "` n" +
"WHERE (" + lenFilter +
filters +
" ) " + ordering, null);

// creates new cursor to add valid words to
MatrixCursor matrixCursor = new MatrixCursor(new String{"_id", "Word", "WordID", "FrontHooks", "BackHooks", "InnerFront", "InnerBack",
"Anagrams", "ProbFactor", "OPlayFactor", "Score"});

// THIS NEEDS TO BE FASTER
while (cursor.moveToNext()) {
String word = cursor.getString(1);
char b = word.toCharArray();
if (isAnagram(first, b, blankcount)) {
matrixCursor.addRow(get_CursorRow(cursor));
}
}
cursor.close();
return matrixCursor;
}


private boolean isAnagram(int anagram, char word, int blankcount) {
int matchcount = blankcount;
int c; // each letter
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};

for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;

for (c = 0; c < 26; c++)
{
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}

if (matchcount == word.length)
return true;
return false;
}









share|improve this question













I have an app that assists studying for Scrabble. Most searches are much faster than by desktop version in C#, except the Word Builder. This search shows all the words that can be formed from a given set of letters A-Z, or blanks.
What can I do to get it to run faster?
I've considered using a Trie, but haven't found a way to support the use of blanks.
I am using a SimpleCursorAdapter to populate the ListView, which is why I am returning a cursor.



    public Cursor getCursor_subanagrams(String term, String filters, String ordering) {
if (term.trim() == "")
return null;
// only difference between this and anagram is changing the length filter
char a = term.toCharArray(); // anagram

int first = new int[26]; // letter count of anagram
int c; // array position
int blankcount = 0;

// initialize word to anagram
for (c = 0; c < a.length; c++) {
if (a[c] == '?') {
blankcount++;
continue;
}
first[a[c] - 'A']++;
}

// gets pool of words to search through
String lenFilter = String.format("Length(Word) <= %1$s AND Length(Word) <= %2$s", LexData.getMaxLength(), term.length());
Cursor cursor = database.rawQuery("SELECT WordID as _id, Word, WordID, FrontHooks, BackHooks, " +
"InnerFront, InnerBack, Anagrams, ProbFactor, OPlayFactor, Score n" +
"FROM `" + LexData.getLexName() + "` n" +
"WHERE (" + lenFilter +
filters +
" ) " + ordering, null);

// creates new cursor to add valid words to
MatrixCursor matrixCursor = new MatrixCursor(new String{"_id", "Word", "WordID", "FrontHooks", "BackHooks", "InnerFront", "InnerBack",
"Anagrams", "ProbFactor", "OPlayFactor", "Score"});

// THIS NEEDS TO BE FASTER
while (cursor.moveToNext()) {
String word = cursor.getString(1);
char b = word.toCharArray();
if (isAnagram(first, b, blankcount)) {
matrixCursor.addRow(get_CursorRow(cursor));
}
}
cursor.close();
return matrixCursor;
}


private boolean isAnagram(int anagram, char word, int blankcount) {
int matchcount = blankcount;
int c; // each letter
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};

for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;

for (c = 0; c < 26; c++)
{
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}

if (matchcount == word.length)
return true;
return false;
}






java android algorithm simplecursoradapter






share|improve this question













share|improve this question











share|improve this question




share|improve this question










asked Nov 22 at 16:05









Dana Bell

246




246












  • Try doing all the "non UI" work on a separate thread, if you're not doing it already.
    – HB.
    Nov 22 at 18:11










  • This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.
    – Dana Bell
    Nov 22 at 22:54


















  • Try doing all the "non UI" work on a separate thread, if you're not doing it already.
    – HB.
    Nov 22 at 18:11










  • This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.
    – Dana Bell
    Nov 22 at 22:54
















Try doing all the "non UI" work on a separate thread, if you're not doing it already.
– HB.
Nov 22 at 18:11




Try doing all the "non UI" work on a separate thread, if you're not doing it already.
– HB.
Nov 22 at 18:11












This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.
– Dana Bell
Nov 22 at 22:54




This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.
– Dana Bell
Nov 22 at 22:54












3 Answers
3






active

oldest

votes

















up vote
0
down vote













Focus on speeding up the most typical case, which is where the word is not a (sub)anagram, and you return false. If you can identify as quickly as possible when it is not possible to make word out of anagram then you can avoid the expensive test.



One way to do this is using a bitmask of the letters in the words. You don't need to store letter counts, because if the number of unique letters in word that are not in anagram is greater than the number of blanks, then there is no way you can make it and you can quickly return false. If not then you can proceed to the more expensive test taking letter counts into account.



You can precompute the bitmasks like this:



private int letterMask(char word)
{
int c, mask = 0;
for (c = 0; c < word.length; c++)
mask |= (1 << (word[c] - 'A'));
return mask;
}


Add an extra column to your database to store the letter bitmask for each word, add it to your cursor, and compute the letter bitmask for the letters in term and store in termMask. Then inside your cursor loop you can do a test like this:



// compute mask of bits in mask that are not in term:
int missingLettersMask = cursor.getInt(8) & ~termMask;
if(missingLettersMask != 0)
{
// check if we could possibly make up for these letters using blanks:
int remainingBlanks = blankcount;
while((remainingBlanks-- > 0) && (missingLettersMask != 0))
missingLettersMask &= (missingLettersMask - 1); // remove one bit

if(missingLettersMask != 0)
continue; // move onto the next word
}

// word can potentially be made from anagram, call isAnagram:





share|improve this answer























  • Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
    – Dana Bell
    Nov 24 at 16:22










  • Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
    – Dana Bell
    Nov 24 at 16:29










  • That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
    – samgak
    Nov 25 at 2:18










  • Do you get much of a speed up by removing the ordering clause?
    – samgak
    Nov 26 at 1:32










  • The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
    – Dana Bell
    Nov 26 at 17:18


















up vote
0
down vote













There are ways to speed up your anagram checking function. Samgak has pointed out one. Another obvious optimisation is to return false if the word is longer than the number of available letters plus blanks. In the end, these are all micro-optimisations and you will end up checking your whole dictionary.



You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:




  • Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".

  • Insert the sorted word into the trie.

  • Where you would place the end-marker in a normal trie, you place a list of possible words.


If you were looking for exact anagrams, you would sort the word to check, traverse the trie and print out the list of possible anagrams at the end. But here, you have to deal with partial anagrams and with blanks:




  • Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.

  • Partial anagrams can be found by ignoring the next letter without descending in the trie.

  • Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.


When you have blanks, you will end up with duplicates. For example, if you have the letters A, B and C and a blank tile, you can make the word CAB, but you can get there on four different ways: CAB, _AB, C_B, CA_.



You could get around this by storing the result list in a data structure that eliminates duplicates, such as a set or ordered set, but you would still go down the same paths several times in order to create the duplicates.



A better solution is to keep track of which trie nodes you have visited with which parameters, i.e. with with remaining unused letters and blanks. You can then cut such paths short. Here's an implementation in pseudocode:



function find_r(t, str, blanks, visited)
{
// don't revisit explored paths
key = make_key(t, str, blanks);

if (key in visited) return ;
visited ~= key;

if (str.length == 0 and blanks == 0) {
// all resources have been used: return list of anagrams
return t.word;
} else {
res = ;
c = 0;

if (str.length > 0) {
c = str[0];

// regular traversal: use current letter and descend
if (c in t.next) {
res ~= find_r(t.next[c], str[1:], blanks, visited);
}

# partial anagrams: skip current letter and don't descend
l = 1
while (l < str.length and str[l] == c) l++;

res ~= find_r(t, str[l:], blanks, visited);
}

if (blanks > 0) {
// blanks: decrease blanks and descend
for (i in t.next) {
if (i < c) {
res ~= find_r(t.next[i], str, blanks - 1, visited);
}
}
}

return res;
}
}


(Here, ~ denotes list concatenation or set insertion; [beg=0:end=length] denotes string slices; in tests whether a dictionary or set contains a key.)



Once you've built the tree, this solution is fast when there are no blanks, but it gets exponentially worse with each blank and with larger letter pools. Testing with one blank is still reasonably fast, but with two blanks, it is on par with your existing solution.



Now there are at most two blanks in a Scrabble game and the rack can hold only up to seven tiles, so it may not be as bad in practice. Another question is whether the search should consider words obtained with two blanks. The results list will be very long and it will contain all two-letter words. The player may be more interested in high-scoring words that can be played with a single blank.






share|improve this answer























  • Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
    – Dana Bell
    Nov 24 at 17:15




















up vote
0
down vote













I used samgak's recommendation to check for "mismatches".
Instead of going through the process of checking each letter before continuing, I exit during the process of checking each letter. I thought I could just check mismatches and ignore matchcounting, but couldn't get it to work accurately. More analysis must be needed.



This speeded things up about 20% according to initial tests with an emulator.
I may be able to speed things up even more by checking letters based on letter frequency; check most common letters(LNSTE) before others (ZQJ). Now to figure out how to sort an array by one field in the array, but that's another question.



    private boolean isAnagram(int anagram, char word, int blankcount) {
// anagram is letters in term
int matchcount = blankcount;
int mismatchcount = 0;
int c;
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};

for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;

for (c = 0; c < 26; c++)
{
mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);
if (mismatchcount > blankcount)
return false;
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}

if (matchcount == word.length)
return true;
return false;
}


EDIT:
Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.



    private boolean isAnagram(int anagram, char word, int blankcount) {
int mismatchcount = 0;
int c;
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
int letter = 0;

for (c = 0; c < word.length; c++) {
letter = ++second[word[c] - 'A'];
mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);
if (mismatchcount > blankcount)
return false;
}
return true;
}





share|improve this answer























    Your Answer






    StackExchange.ifUsing("editor", function () {
    StackExchange.using("externalEditor", function () {
    StackExchange.using("snippets", function () {
    StackExchange.snippets.init();
    });
    });
    }, "code-snippets");

    StackExchange.ready(function() {
    var channelOptions = {
    tags: "".split(" "),
    id: "1"
    };
    initTagRenderer("".split(" "), "".split(" "), channelOptions);

    StackExchange.using("externalEditor", function() {
    // Have to fire editor after snippets, if snippets enabled
    if (StackExchange.settings.snippets.snippetsEnabled) {
    StackExchange.using("snippets", function() {
    createEditor();
    });
    }
    else {
    createEditor();
    }
    });

    function createEditor() {
    StackExchange.prepareEditor({
    heartbeatType: 'answer',
    convertImagesToLinks: true,
    noModals: true,
    showLowRepImageUploadWarning: true,
    reputationToPostImages: 10,
    bindNavPrevention: true,
    postfix: "",
    imageUploader: {
    brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
    contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
    allowUrls: true
    },
    onDemand: true,
    discardSelector: ".discard-answer"
    ,immediatelyShowMarkdownHelp:true
    });


    }
    });














    draft saved

    draft discarded


















    StackExchange.ready(
    function () {
    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53434695%2fneed-faster-word-builder-algorithm%23new-answer', 'question_page');
    }
    );

    Post as a guest















    Required, but never shown

























    3 Answers
    3






    active

    oldest

    votes








    3 Answers
    3






    active

    oldest

    votes









    active

    oldest

    votes






    active

    oldest

    votes








    up vote
    0
    down vote













    Focus on speeding up the most typical case, which is where the word is not a (sub)anagram, and you return false. If you can identify as quickly as possible when it is not possible to make word out of anagram then you can avoid the expensive test.



    One way to do this is using a bitmask of the letters in the words. You don't need to store letter counts, because if the number of unique letters in word that are not in anagram is greater than the number of blanks, then there is no way you can make it and you can quickly return false. If not then you can proceed to the more expensive test taking letter counts into account.



    You can precompute the bitmasks like this:



    private int letterMask(char word)
    {
    int c, mask = 0;
    for (c = 0; c < word.length; c++)
    mask |= (1 << (word[c] - 'A'));
    return mask;
    }


    Add an extra column to your database to store the letter bitmask for each word, add it to your cursor, and compute the letter bitmask for the letters in term and store in termMask. Then inside your cursor loop you can do a test like this:



    // compute mask of bits in mask that are not in term:
    int missingLettersMask = cursor.getInt(8) & ~termMask;
    if(missingLettersMask != 0)
    {
    // check if we could possibly make up for these letters using blanks:
    int remainingBlanks = blankcount;
    while((remainingBlanks-- > 0) && (missingLettersMask != 0))
    missingLettersMask &= (missingLettersMask - 1); // remove one bit

    if(missingLettersMask != 0)
    continue; // move onto the next word
    }

    // word can potentially be made from anagram, call isAnagram:





    share|improve this answer























    • Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
      – Dana Bell
      Nov 24 at 16:22










    • Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
      – Dana Bell
      Nov 24 at 16:29










    • That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
      – samgak
      Nov 25 at 2:18










    • Do you get much of a speed up by removing the ordering clause?
      – samgak
      Nov 26 at 1:32










    • The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
      – Dana Bell
      Nov 26 at 17:18















    up vote
    0
    down vote













    Focus on speeding up the most typical case, which is where the word is not a (sub)anagram, and you return false. If you can identify as quickly as possible when it is not possible to make word out of anagram then you can avoid the expensive test.



    One way to do this is using a bitmask of the letters in the words. You don't need to store letter counts, because if the number of unique letters in word that are not in anagram is greater than the number of blanks, then there is no way you can make it and you can quickly return false. If not then you can proceed to the more expensive test taking letter counts into account.



    You can precompute the bitmasks like this:



    private int letterMask(char word)
    {
    int c, mask = 0;
    for (c = 0; c < word.length; c++)
    mask |= (1 << (word[c] - 'A'));
    return mask;
    }


    Add an extra column to your database to store the letter bitmask for each word, add it to your cursor, and compute the letter bitmask for the letters in term and store in termMask. Then inside your cursor loop you can do a test like this:



    // compute mask of bits in mask that are not in term:
    int missingLettersMask = cursor.getInt(8) & ~termMask;
    if(missingLettersMask != 0)
    {
    // check if we could possibly make up for these letters using blanks:
    int remainingBlanks = blankcount;
    while((remainingBlanks-- > 0) && (missingLettersMask != 0))
    missingLettersMask &= (missingLettersMask - 1); // remove one bit

    if(missingLettersMask != 0)
    continue; // move onto the next word
    }

    // word can potentially be made from anagram, call isAnagram:





    share|improve this answer























    • Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
      – Dana Bell
      Nov 24 at 16:22










    • Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
      – Dana Bell
      Nov 24 at 16:29










    • That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
      – samgak
      Nov 25 at 2:18










    • Do you get much of a speed up by removing the ordering clause?
      – samgak
      Nov 26 at 1:32










    • The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
      – Dana Bell
      Nov 26 at 17:18













    up vote
    0
    down vote










    up vote
    0
    down vote









    Focus on speeding up the most typical case, which is where the word is not a (sub)anagram, and you return false. If you can identify as quickly as possible when it is not possible to make word out of anagram then you can avoid the expensive test.



    One way to do this is using a bitmask of the letters in the words. You don't need to store letter counts, because if the number of unique letters in word that are not in anagram is greater than the number of blanks, then there is no way you can make it and you can quickly return false. If not then you can proceed to the more expensive test taking letter counts into account.



    You can precompute the bitmasks like this:



    private int letterMask(char word)
    {
    int c, mask = 0;
    for (c = 0; c < word.length; c++)
    mask |= (1 << (word[c] - 'A'));
    return mask;
    }


    Add an extra column to your database to store the letter bitmask for each word, add it to your cursor, and compute the letter bitmask for the letters in term and store in termMask. Then inside your cursor loop you can do a test like this:



    // compute mask of bits in mask that are not in term:
    int missingLettersMask = cursor.getInt(8) & ~termMask;
    if(missingLettersMask != 0)
    {
    // check if we could possibly make up for these letters using blanks:
    int remainingBlanks = blankcount;
    while((remainingBlanks-- > 0) && (missingLettersMask != 0))
    missingLettersMask &= (missingLettersMask - 1); // remove one bit

    if(missingLettersMask != 0)
    continue; // move onto the next word
    }

    // word can potentially be made from anagram, call isAnagram:





    share|improve this answer














    Focus on speeding up the most typical case, which is where the word is not a (sub)anagram, and you return false. If you can identify as quickly as possible when it is not possible to make word out of anagram then you can avoid the expensive test.



    One way to do this is using a bitmask of the letters in the words. You don't need to store letter counts, because if the number of unique letters in word that are not in anagram is greater than the number of blanks, then there is no way you can make it and you can quickly return false. If not then you can proceed to the more expensive test taking letter counts into account.



    You can precompute the bitmasks like this:



    private int letterMask(char word)
    {
    int c, mask = 0;
    for (c = 0; c < word.length; c++)
    mask |= (1 << (word[c] - 'A'));
    return mask;
    }


    Add an extra column to your database to store the letter bitmask for each word, add it to your cursor, and compute the letter bitmask for the letters in term and store in termMask. Then inside your cursor loop you can do a test like this:



    // compute mask of bits in mask that are not in term:
    int missingLettersMask = cursor.getInt(8) & ~termMask;
    if(missingLettersMask != 0)
    {
    // check if we could possibly make up for these letters using blanks:
    int remainingBlanks = blankcount;
    while((remainingBlanks-- > 0) && (missingLettersMask != 0))
    missingLettersMask &= (missingLettersMask - 1); // remove one bit

    if(missingLettersMask != 0)
    continue; // move onto the next word
    }

    // word can potentially be made from anagram, call isAnagram:






    share|improve this answer














    share|improve this answer



    share|improve this answer








    edited Nov 23 at 8:34

























    answered Nov 23 at 0:29









    samgak

    18.8k33058




    18.8k33058












    • Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
      – Dana Bell
      Nov 24 at 16:22










    • Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
      – Dana Bell
      Nov 24 at 16:29










    • That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
      – samgak
      Nov 25 at 2:18










    • Do you get much of a speed up by removing the ordering clause?
      – samgak
      Nov 26 at 1:32










    • The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
      – Dana Bell
      Nov 26 at 17:18


















    • Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
      – Dana Bell
      Nov 24 at 16:22










    • Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
      – Dana Bell
      Nov 24 at 16:29










    • That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
      – samgak
      Nov 25 at 2:18










    • Do you get much of a speed up by removing the ordering clause?
      – samgak
      Nov 26 at 1:32










    • The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
      – Dana Bell
      Nov 26 at 17:18
















    Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
    – Dana Bell
    Nov 24 at 16:22




    Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
    – Dana Bell
    Nov 24 at 16:22












    Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
    – Dana Bell
    Nov 24 at 16:29




    Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
    – Dana Bell
    Nov 24 at 16:29












    That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
    – samgak
    Nov 25 at 2:18




    That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
    – samgak
    Nov 25 at 2:18












    Do you get much of a speed up by removing the ordering clause?
    – samgak
    Nov 26 at 1:32




    Do you get much of a speed up by removing the ordering clause?
    – samgak
    Nov 26 at 1:32












    The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
    – Dana Bell
    Nov 26 at 17:18




    The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
    – Dana Bell
    Nov 26 at 17:18












    up vote
    0
    down vote













    There are ways to speed up your anagram checking function. Samgak has pointed out one. Another obvious optimisation is to return false if the word is longer than the number of available letters plus blanks. In the end, these are all micro-optimisations and you will end up checking your whole dictionary.



    You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:




    • Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".

    • Insert the sorted word into the trie.

    • Where you would place the end-marker in a normal trie, you place a list of possible words.


    If you were looking for exact anagrams, you would sort the word to check, traverse the trie and print out the list of possible anagrams at the end. But here, you have to deal with partial anagrams and with blanks:




    • Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.

    • Partial anagrams can be found by ignoring the next letter without descending in the trie.

    • Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.


    When you have blanks, you will end up with duplicates. For example, if you have the letters A, B and C and a blank tile, you can make the word CAB, but you can get there on four different ways: CAB, _AB, C_B, CA_.



    You could get around this by storing the result list in a data structure that eliminates duplicates, such as a set or ordered set, but you would still go down the same paths several times in order to create the duplicates.



    A better solution is to keep track of which trie nodes you have visited with which parameters, i.e. with with remaining unused letters and blanks. You can then cut such paths short. Here's an implementation in pseudocode:



    function find_r(t, str, blanks, visited)
    {
    // don't revisit explored paths
    key = make_key(t, str, blanks);

    if (key in visited) return ;
    visited ~= key;

    if (str.length == 0 and blanks == 0) {
    // all resources have been used: return list of anagrams
    return t.word;
    } else {
    res = ;
    c = 0;

    if (str.length > 0) {
    c = str[0];

    // regular traversal: use current letter and descend
    if (c in t.next) {
    res ~= find_r(t.next[c], str[1:], blanks, visited);
    }

    # partial anagrams: skip current letter and don't descend
    l = 1
    while (l < str.length and str[l] == c) l++;

    res ~= find_r(t, str[l:], blanks, visited);
    }

    if (blanks > 0) {
    // blanks: decrease blanks and descend
    for (i in t.next) {
    if (i < c) {
    res ~= find_r(t.next[i], str, blanks - 1, visited);
    }
    }
    }

    return res;
    }
    }


    (Here, ~ denotes list concatenation or set insertion; [beg=0:end=length] denotes string slices; in tests whether a dictionary or set contains a key.)



    Once you've built the tree, this solution is fast when there are no blanks, but it gets exponentially worse with each blank and with larger letter pools. Testing with one blank is still reasonably fast, but with two blanks, it is on par with your existing solution.



    Now there are at most two blanks in a Scrabble game and the rack can hold only up to seven tiles, so it may not be as bad in practice. Another question is whether the search should consider words obtained with two blanks. The results list will be very long and it will contain all two-letter words. The player may be more interested in high-scoring words that can be played with a single blank.






    share|improve this answer























    • Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
      – Dana Bell
      Nov 24 at 17:15

















    up vote
    0
    down vote













    There are ways to speed up your anagram checking function. Samgak has pointed out one. Another obvious optimisation is to return false if the word is longer than the number of available letters plus blanks. In the end, these are all micro-optimisations and you will end up checking your whole dictionary.



    You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:




    • Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".

    • Insert the sorted word into the trie.

    • Where you would place the end-marker in a normal trie, you place a list of possible words.


    If you were looking for exact anagrams, you would sort the word to check, traverse the trie and print out the list of possible anagrams at the end. But here, you have to deal with partial anagrams and with blanks:




    • Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.

    • Partial anagrams can be found by ignoring the next letter without descending in the trie.

    • Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.


    When you have blanks, you will end up with duplicates. For example, if you have the letters A, B and C and a blank tile, you can make the word CAB, but you can get there on four different ways: CAB, _AB, C_B, CA_.



    You could get around this by storing the result list in a data structure that eliminates duplicates, such as a set or ordered set, but you would still go down the same paths several times in order to create the duplicates.



    A better solution is to keep track of which trie nodes you have visited with which parameters, i.e. with with remaining unused letters and blanks. You can then cut such paths short. Here's an implementation in pseudocode:



    function find_r(t, str, blanks, visited)
    {
    // don't revisit explored paths
    key = make_key(t, str, blanks);

    if (key in visited) return ;
    visited ~= key;

    if (str.length == 0 and blanks == 0) {
    // all resources have been used: return list of anagrams
    return t.word;
    } else {
    res = ;
    c = 0;

    if (str.length > 0) {
    c = str[0];

    // regular traversal: use current letter and descend
    if (c in t.next) {
    res ~= find_r(t.next[c], str[1:], blanks, visited);
    }

    # partial anagrams: skip current letter and don't descend
    l = 1
    while (l < str.length and str[l] == c) l++;

    res ~= find_r(t, str[l:], blanks, visited);
    }

    if (blanks > 0) {
    // blanks: decrease blanks and descend
    for (i in t.next) {
    if (i < c) {
    res ~= find_r(t.next[i], str, blanks - 1, visited);
    }
    }
    }

    return res;
    }
    }


    (Here, ~ denotes list concatenation or set insertion; [beg=0:end=length] denotes string slices; in tests whether a dictionary or set contains a key.)



    Once you've built the tree, this solution is fast when there are no blanks, but it gets exponentially worse with each blank and with larger letter pools. Testing with one blank is still reasonably fast, but with two blanks, it is on par with your existing solution.



    Now there are at most two blanks in a Scrabble game and the rack can hold only up to seven tiles, so it may not be as bad in practice. Another question is whether the search should consider words obtained with two blanks. The results list will be very long and it will contain all two-letter words. The player may be more interested in high-scoring words that can be played with a single blank.






    share|improve this answer























    • Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
      – Dana Bell
      Nov 24 at 17:15















    up vote
    0
    down vote










    up vote
    0
    down vote









    There are ways to speed up your anagram checking function. Samgak has pointed out one. Another obvious optimisation is to return false if the word is longer than the number of available letters plus blanks. In the end, these are all micro-optimisations and you will end up checking your whole dictionary.



    You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:




    • Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".

    • Insert the sorted word into the trie.

    • Where you would place the end-marker in a normal trie, you place a list of possible words.


    If you were looking for exact anagrams, you would sort the word to check, traverse the trie and print out the list of possible anagrams at the end. But here, you have to deal with partial anagrams and with blanks:




    • Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.

    • Partial anagrams can be found by ignoring the next letter without descending in the trie.

    • Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.


    When you have blanks, you will end up with duplicates. For example, if you have the letters A, B and C and a blank tile, you can make the word CAB, but you can get there on four different ways: CAB, _AB, C_B, CA_.



    You could get around this by storing the result list in a data structure that eliminates duplicates, such as a set or ordered set, but you would still go down the same paths several times in order to create the duplicates.



    A better solution is to keep track of which trie nodes you have visited with which parameters, i.e. with with remaining unused letters and blanks. You can then cut such paths short. Here's an implementation in pseudocode:



    function find_r(t, str, blanks, visited)
    {
    // don't revisit explored paths
    key = make_key(t, str, blanks);

    if (key in visited) return ;
    visited ~= key;

    if (str.length == 0 and blanks == 0) {
    // all resources have been used: return list of anagrams
    return t.word;
    } else {
    res = ;
    c = 0;

    if (str.length > 0) {
    c = str[0];

    // regular traversal: use current letter and descend
    if (c in t.next) {
    res ~= find_r(t.next[c], str[1:], blanks, visited);
    }

    # partial anagrams: skip current letter and don't descend
    l = 1
    while (l < str.length and str[l] == c) l++;

    res ~= find_r(t, str[l:], blanks, visited);
    }

    if (blanks > 0) {
    // blanks: decrease blanks and descend
    for (i in t.next) {
    if (i < c) {
    res ~= find_r(t.next[i], str, blanks - 1, visited);
    }
    }
    }

    return res;
    }
    }


    (Here, ~ denotes list concatenation or set insertion; [beg=0:end=length] denotes string slices; in tests whether a dictionary or set contains a key.)



    Once you've built the tree, this solution is fast when there are no blanks, but it gets exponentially worse with each blank and with larger letter pools. Testing with one blank is still reasonably fast, but with two blanks, it is on par with your existing solution.



    Now there are at most two blanks in a Scrabble game and the rack can hold only up to seven tiles, so it may not be as bad in practice. Another question is whether the search should consider words obtained with two blanks. The results list will be very long and it will contain all two-letter words. The player may be more interested in high-scoring words that can be played with a single blank.






    share|improve this answer














    There are ways to speed up your anagram checking function. Samgak has pointed out one. Another obvious optimisation is to return false if the word is longer than the number of available letters plus blanks. In the end, these are all micro-optimisations and you will end up checking your whole dictionary.



    You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:




    • Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".

    • Insert the sorted word into the trie.

    • Where you would place the end-marker in a normal trie, you place a list of possible words.


    If you were looking for exact anagrams, you would sort the word to check, traverse the trie and print out the list of possible anagrams at the end. But here, you have to deal with partial anagrams and with blanks:




    • Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.

    • Partial anagrams can be found by ignoring the next letter without descending in the trie.

    • Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.


    When you have blanks, you will end up with duplicates. For example, if you have the letters A, B and C and a blank tile, you can make the word CAB, but you can get there on four different ways: CAB, _AB, C_B, CA_.



    You could get around this by storing the result list in a data structure that eliminates duplicates, such as a set or ordered set, but you would still go down the same paths several times in order to create the duplicates.



    A better solution is to keep track of which trie nodes you have visited with which parameters, i.e. with with remaining unused letters and blanks. You can then cut such paths short. Here's an implementation in pseudocode:



    function find_r(t, str, blanks, visited)
    {
    // don't revisit explored paths
    key = make_key(t, str, blanks);

    if (key in visited) return ;
    visited ~= key;

    if (str.length == 0 and blanks == 0) {
    // all resources have been used: return list of anagrams
    return t.word;
    } else {
    res = ;
    c = 0;

    if (str.length > 0) {
    c = str[0];

    // regular traversal: use current letter and descend
    if (c in t.next) {
    res ~= find_r(t.next[c], str[1:], blanks, visited);
    }

    # partial anagrams: skip current letter and don't descend
    l = 1
    while (l < str.length and str[l] == c) l++;

    res ~= find_r(t, str[l:], blanks, visited);
    }

    if (blanks > 0) {
    // blanks: decrease blanks and descend
    for (i in t.next) {
    if (i < c) {
    res ~= find_r(t.next[i], str, blanks - 1, visited);
    }
    }
    }

    return res;
    }
    }


    (Here, ~ denotes list concatenation or set insertion; [beg=0:end=length] denotes string slices; in tests whether a dictionary or set contains a key.)



    Once you've built the tree, this solution is fast when there are no blanks, but it gets exponentially worse with each blank and with larger letter pools. Testing with one blank is still reasonably fast, but with two blanks, it is on par with your existing solution.



    Now there are at most two blanks in a Scrabble game and the rack can hold only up to seven tiles, so it may not be as bad in practice. Another question is whether the search should consider words obtained with two blanks. The results list will be very long and it will contain all two-letter words. The player may be more interested in high-scoring words that can be played with a single blank.







    share|improve this answer














    share|improve this answer



    share|improve this answer








    edited Nov 24 at 16:25

























    answered Nov 24 at 8:53









    M Oehm

    21.3k31731




    21.3k31731












    • Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
      – Dana Bell
      Nov 24 at 17:15




















    • Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
      – Dana Bell
      Nov 24 at 17:15


















    Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
    – Dana Bell
    Nov 24 at 17:15






    Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
    – Dana Bell
    Nov 24 at 17:15












    up vote
    0
    down vote













    I used samgak's recommendation to check for "mismatches".
    Instead of going through the process of checking each letter before continuing, I exit during the process of checking each letter. I thought I could just check mismatches and ignore matchcounting, but couldn't get it to work accurately. More analysis must be needed.



    This speeded things up about 20% according to initial tests with an emulator.
    I may be able to speed things up even more by checking letters based on letter frequency; check most common letters(LNSTE) before others (ZQJ). Now to figure out how to sort an array by one field in the array, but that's another question.



        private boolean isAnagram(int anagram, char word, int blankcount) {
    // anagram is letters in term
    int matchcount = blankcount;
    int mismatchcount = 0;
    int c;
    int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};

    for (c = 0; c < word.length; c++)
    second[word[c] - 'A']++;

    for (c = 0; c < 26; c++)
    {
    mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);
    if (mismatchcount > blankcount)
    return false;
    matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
    }

    if (matchcount == word.length)
    return true;
    return false;
    }


    EDIT:
    Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.



        private boolean isAnagram(int anagram, char word, int blankcount) {
    int mismatchcount = 0;
    int c;
    int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
    int letter = 0;

    for (c = 0; c < word.length; c++) {
    letter = ++second[word[c] - 'A'];
    mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);
    if (mismatchcount > blankcount)
    return false;
    }
    return true;
    }





    share|improve this answer



























      up vote
      0
      down vote













      I used samgak's recommendation to check for "mismatches".
      Instead of going through the process of checking each letter before continuing, I exit during the process of checking each letter. I thought I could just check mismatches and ignore matchcounting, but couldn't get it to work accurately. More analysis must be needed.



      This speeded things up about 20% according to initial tests with an emulator.
      I may be able to speed things up even more by checking letters based on letter frequency; check most common letters(LNSTE) before others (ZQJ). Now to figure out how to sort an array by one field in the array, but that's another question.



          private boolean isAnagram(int anagram, char word, int blankcount) {
      // anagram is letters in term
      int matchcount = blankcount;
      int mismatchcount = 0;
      int c;
      int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};

      for (c = 0; c < word.length; c++)
      second[word[c] - 'A']++;

      for (c = 0; c < 26; c++)
      {
      mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);
      if (mismatchcount > blankcount)
      return false;
      matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
      }

      if (matchcount == word.length)
      return true;
      return false;
      }


      EDIT:
      Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.



          private boolean isAnagram(int anagram, char word, int blankcount) {
      int mismatchcount = 0;
      int c;
      int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
      int letter = 0;

      for (c = 0; c < word.length; c++) {
      letter = ++second[word[c] - 'A'];
      mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);
      if (mismatchcount > blankcount)
      return false;
      }
      return true;
      }





      share|improve this answer

























        up vote
        0
        down vote










        up vote
        0
        down vote









        I used samgak's recommendation to check for "mismatches".
        Instead of going through the process of checking each letter before continuing, I exit during the process of checking each letter. I thought I could just check mismatches and ignore matchcounting, but couldn't get it to work accurately. More analysis must be needed.



        This speeded things up about 20% according to initial tests with an emulator.
        I may be able to speed things up even more by checking letters based on letter frequency; check most common letters(LNSTE) before others (ZQJ). Now to figure out how to sort an array by one field in the array, but that's another question.



            private boolean isAnagram(int anagram, char word, int blankcount) {
        // anagram is letters in term
        int matchcount = blankcount;
        int mismatchcount = 0;
        int c;
        int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};

        for (c = 0; c < word.length; c++)
        second[word[c] - 'A']++;

        for (c = 0; c < 26; c++)
        {
        mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);
        if (mismatchcount > blankcount)
        return false;
        matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
        }

        if (matchcount == word.length)
        return true;
        return false;
        }


        EDIT:
        Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.



            private boolean isAnagram(int anagram, char word, int blankcount) {
        int mismatchcount = 0;
        int c;
        int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
        int letter = 0;

        for (c = 0; c < word.length; c++) {
        letter = ++second[word[c] - 'A'];
        mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);
        if (mismatchcount > blankcount)
        return false;
        }
        return true;
        }





        share|improve this answer














        I used samgak's recommendation to check for "mismatches".
        Instead of going through the process of checking each letter before continuing, I exit during the process of checking each letter. I thought I could just check mismatches and ignore matchcounting, but couldn't get it to work accurately. More analysis must be needed.



        This speeded things up about 20% according to initial tests with an emulator.
        I may be able to speed things up even more by checking letters based on letter frequency; check most common letters(LNSTE) before others (ZQJ). Now to figure out how to sort an array by one field in the array, but that's another question.



            private boolean isAnagram(int anagram, char word, int blankcount) {
        // anagram is letters in term
        int matchcount = blankcount;
        int mismatchcount = 0;
        int c;
        int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};

        for (c = 0; c < word.length; c++)
        second[word[c] - 'A']++;

        for (c = 0; c < 26; c++)
        {
        mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);
        if (mismatchcount > blankcount)
        return false;
        matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
        }

        if (matchcount == word.length)
        return true;
        return false;
        }


        EDIT:
        Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.



            private boolean isAnagram(int anagram, char word, int blankcount) {
        int mismatchcount = 0;
        int c;
        int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
        int letter = 0;

        for (c = 0; c < word.length; c++) {
        letter = ++second[word[c] - 'A'];
        mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);
        if (mismatchcount > blankcount)
        return false;
        }
        return true;
        }






        share|improve this answer














        share|improve this answer



        share|improve this answer








        edited Dec 8 at 15:50

























        answered Dec 7 at 17:17









        Dana Bell

        246




        246






























            draft saved

            draft discarded




















































            Thanks for contributing an answer to Stack Overflow!


            • Please be sure to answer the question. Provide details and share your research!

            But avoid



            • Asking for help, clarification, or responding to other answers.

            • Making statements based on opinion; back them up with references or personal experience.


            To learn more, see our tips on writing great answers.





            Some of your past answers have not been well-received, and you're in danger of being blocked from answering.


            Please pay close attention to the following guidance:


            • Please be sure to answer the question. Provide details and share your research!

            But avoid



            • Asking for help, clarification, or responding to other answers.

            • Making statements based on opinion; back them up with references or personal experience.


            To learn more, see our tips on writing great answers.




            draft saved


            draft discarded














            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53434695%2fneed-faster-word-builder-algorithm%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown





















































            Required, but never shown














            Required, but never shown












            Required, but never shown







            Required, but never shown

































            Required, but never shown














            Required, but never shown












            Required, but never shown







            Required, but never shown







            Popular posts from this blog

            How to ignore python UserWarning in pytest?

            What visual should I use to simply compare current year value vs last year in Power BI desktop

            Script to remove string up to first number