KMP algorithm
KMP algorithm is used to find a “Pattern” in a “Text”. This algorithm compares character by character from left to right. But whenever a mismatch occurs, it uses a preprocessed table called “Prefix Table” to skip characters comparison while matching. Sometimes prefix table is also known as LPS Table. Here LPS stands for “Longest proper Prefix which is also Suffix”.
How to use LPS Table
We use the LPS table to decide how many characters are to be skipped for comparison when a mismatch has occurred.
When a mismatch occurs, check the LPS value of the previous character of the mismatched character in the pattern. If it is ‘0’ then start comparing the first character of the pattern with the next character to the mismatched character in the text. If it is not ‘0’ then start comparing the character which is at an index value equal to the LPS value of the previous character to the mismatched character in pattern with the mismatched character in the Text.
How the KMP Algorithm Works
Let’s take a look on working example of KMP Algorithm to find a Pattern in a Text.
Implementation of the KMP algorithm:
C++
// C++ program for implementation of KMP pattern searching // algorithm #include <bits/stdc++.h> void computeLPSArray( char * pat, int M, int * lps); // Prints occurrences of txt[] in pat[] void KMPSearch( char * pat, char * txt) { int M = strlen (pat); int N = strlen (txt); // create lps[] that will hold the longest prefix suffix // values for pattern int lps[M]; // Preprocess the pattern (calculate lps[] array) computeLPSArray(pat, M, lps); int i = 0; // index for txt[] int j = 0; // index for pat[] while ((N - i) >= (M - j)) { if (pat[j] == txt[i]) { j++; i++; } if (j == M) { printf ( "Found pattern at index %d " , i - j); j = lps[j - 1]; } // mismatch after j matches else if (i < N && pat[j] != txt[i]) { // Do not match lps[0..lps[j-1]] characters, // they will match anyway if (j != 0) j = lps[j - 1]; else i = i + 1; } } } // Fills lps[] for given pattern pat[0..M-1] void computeLPSArray( char * pat, int M, int * lps) { // length of the previous longest prefix suffix int len = 0; lps[0] = 0; // lps[0] is always 0 // the loop calculates lps[i] for i = 1 to M-1 int i = 1; while (i < M) { if (pat[i] == pat[len]) { len++; lps[i] = len; i++; } else // (pat[i] != pat[len]) { // This is tricky. Consider the example. // AAACAAAA and i = 7. The idea is similar // to search step. if (len != 0) { len = lps[len - 1]; // Also, note that we do not increment // i here } else // if (len == 0) { lps[i] = 0; i++; } } } } // Driver program to test above function int main() { char txt[] = "ABABDABACDABABCABAB" ; char pat[] = "ABABCABAB" ; KMPSearch(pat, txt); return 0; } |
Java
// Java program for implementation of KMP pattern searching // algorithm public class KMP_String_Matching { void KMPSearch(String pat, String txt) { int M = pat.length(); int N = txt.length(); // create lps[] that will hold the longest prefix suffix // values for pattern int lps[] = new int [M]; int j = 0 ; // index for pat[] // Preprocess the pattern (calculate lps[] array) computeLPSArray(pat, M, lps); int i = 0 ; // index for txt[] while (i < N) { if (pat.charAt(j) == txt.charAt(i)) { j++; i++; } if (j == M) { System.out.println( "Found pattern " + "at index " + (i - j)); j = lps[j - 1 ]; } // mismatch after j matches else if (i < N && pat.charAt(j) != txt.charAt(i)) { // Do not match lps[0..lps[j-1]] characters, // they will match anyway if (j != 0 ) j = lps[j - 1 ]; else i = i + 1 ; } } } void computeLPSArray(String pat, int M, int lps[]) { // length of the previous longest prefix suffix int len = 0 ; int i = 1 ; lps[ 0 ] = 0 ; // lps[0] is always 0 // the loop calculates lps[i] for i = 1 to M-1 while (i < M) { if (pat.charAt(i) == pat.charAt(len)) { len++; lps[i] = len; i++; } else // (pat[i] != pat[len]) { // This is tricky. Consider the example. // AAACAAAA and i = 7. The idea is similar // to search step. if (len != 0 ) { len = lps[len - 1 ]; // Also, note that we do not increment // i here } else // if (len == 0) { lps[i] = len; i++; } } } } // Driver program to test above function public static void main(String[] args) { String txt = "ABABDABACDABABCABAB" ; String pat = "ABABCABAB" ; new KMP_String_Matching().KMPSearch(pat, txt); } } |
Python3
# Python program for implementation of KMP pattern searching # algorithm def computeLPSArray(pat, M, lps): len = 0 # length of the previous longest prefix suffix lps[ 0 ] # lps[0] is always 0 i = 1 # the loop calculates lps[i] for i = 1 to M-1 while i < M: if pat[i] = = pat[ len ]: len + = 1 lps[i] = len i + = 1 else : # This is tricky. Consider the example. # AAACAAAA and i = 7. The idea is similar # to search step. if len ! = 0 : len = lps[ len - 1 ] # Also, note that we do not increment i here else : lps[i] = 0 i + = 1 def KMPSearch(pat, txt): M = len (pat) N = len (txt) # create lps[] that will hold the longest prefix suffix # values for pattern lps = [ 0 ] * M j = 0 # index for pat[] # Preprocess the pattern (calculate lps[] array) computeLPSArray(pat, M, lps) i = 0 # index for txt[] while (N - i) > = (M - j): if pat[j] = = txt[i]: j + = 1 i + = 1 if j = = M: print ( "Found pattern at index:" , i - j) j = lps[j - 1 ] # mismatch after j matches elif i < N and pat[j] ! = txt[i]: # Do not match lps[0..lps[j-1]] characters, # they will match anyway if j ! = 0 : j = lps[j - 1 ] else : i + = 1 txt = "ABABDABACDABABCABAB" pat = "ABABCABAB" KMPSearch(pat, txt) # This code is contributed by ishankhandelwals. |
C#
using System; using System.Collections.Generic; public class GFG { // Prints occurrences of txt[] in pat[] public static void KMPSearch( char [] pat, char [] txt) { int M = pat.Length; int N = txt.Length; // create lps[] that will hold the longest prefix // suffix values for pattern int [] lps = new int [M]; // Preprocess the pattern (calculate lps[] array) computeLPSArray(pat, M, lps); int i = 0; // index for txt[] int j = 0; // index for pat[] while ((N - i) >= (M - j)) { if (pat[j] == txt[i]) { j++; i++; } if (j == M) { int temp = i - j; Console.WriteLine( "Found pattern at index " + temp); j = lps[j - 1]; } // mismatch after j matches else if (i < N && pat[j] != txt[i]) { // Do not match lps[0..lps[j-1]] characters, // they will match anyway if (j != 0) j = lps[j - 1]; else i = i + 1; } } } // Fills lps[] for given pattern pat[0..M-1] public static void computeLPSArray( char [] pat, int M, int [] lps) { // length of the previous longest prefix suffix int len = 0; lps[0] = 0; // lps[0] is always 0 // the loop calculates lps[i] for i = 1 to M-1 int i = 1; while (i < M) { if (pat[i] == pat[len]) { len++; lps[i] = len; i++; } else // (pat[i] != pat[len]) { // This is tricky. Consider the example. // AAACAAAA and i = 7. The idea is similar // to search step. if (len != 0) { len = lps[len - 1]; // Also, note that we do not increment // i here } else // if (len == 0) { lps[i] = 0; i++; } } } } static public void Main() { char [] txt = "ABABDABACDABABCABAB" .ToCharArray(); char [] pat = "ABABCABAB" .ToCharArray(); KMPSearch(pat, txt); } } // This code is contributed by akashish__ |
Javascript
// JS program for implementation of KMP pattern searching // algorithm // Prlets occurrences of txt[] in pat[] function computeLPSArray(pat, M, lps) { // length of the previous longest prefix suffix let len = 0; lps[0] = 0; // lps[0] is always 0 // the loop calculates lps[i] for i = 1 to M-1 let i = 1; while (i < M) { if (pat[i] == pat[len]) { len++; lps[i] = len; i++; } else // (pat[i] != pat[len]) { // This is tricky. Consider the example. // AAACAAAA and i = 7. The idea is similar // to search step. if (len != 0) { len = lps[len - 1]; // Also, note that we do not increment // i here } else // if (len == 0) { lps[i] = 0; i++; } } } } function KMPSearch(pat, txt) { let M = pat.length; let N = txt.length // create lps[] that will hold the longest prefix suffix // values for pattern let lps = []; // Preprocess the pattern (calculate lps[] array) computeLPSArray(pat, M, lps); let i = 0; // index for txt[] let j = 0; // index for pat[] while ((N - i) >= (M - j)) { if (pat[j] == txt[i]) { j++; i++; } if (j == M) { console.log( "Found pattern at index:" , i - j); j = lps[j - 1]; } // mismatch after j matches else if (i < N && pat[j] != txt[i]) { // Do not match lps[0..lps[j-1]] characters, // they will match anyway if (j != 0) j = lps[j - 1]; else i = i + 1; } } } // Fills lps[] for given pattern pat[0..M-1] // Driver program to test above function let txt = "ABABDABACDABABCABAB" ; let pat = "ABABCABAB" ; KMPSearch(pat, txt); // This code is contributed by ishankhandelwals. |
Found pattern at index 10
Time complexity: O(n + m)
Auxiliary Space: O(M)
Introduction to Pattern Searching – Data Structure and Algorithm Tutorial
Pattern searching is an algorithm that involves searching for patterns such as strings, words, images, etc.
We use certain algorithms to do the search process. The complexity of pattern searching varies from algorithm to algorithm. They are very useful when performing a search in a database. The Pattern Searching algorithm is useful for finding patterns in substrings of larger strings. This process can be accomplished using a variety of algorithms that we are going to discuss in this blog.