Substring Problems
A general template for most substring problems taken from link.
int findSubstring(string s){
vector<int> map(128,0);
int counter; // check whether the substring is valid
int begin=0, end=0; //two pointers, one point to tail and one head
int d; //the length of substring
for() { /* initialize the hash map here */ }
while(end<s.size()){
if(map[s[end++]]-- ?){ /* modify counter here */ }
while(/* counter condition */){
/* update d here if finding minimum*/
//increase begin to make it invalid/valid again
if(map[s[begin++]]++ ?){ /*modify counter here*/ }
}
/* update d here if finding maximum*/
}
return d;
}
Example:
Given a string S and a string T, find the minimum window in S which will contain all the characters in T in complexity O(n).
Algorithm
- Use two pointers: start and end to represent a window.
- Move end to find a valid window.
- When a valid window is found, move start to find a smaller window.
string minWindow(string s, string t) {
vector<int> map(128,0);
for(auto c: t) map[c]++;
int counter=t.size(), begin=0, end=0, d=INT_MAX, head=0;
while(end<s.size()){
if(map[s[end++]]-->0) counter--; //in t
while(counter==0){ //valid
if(end-begin<d) d=end-(head=begin);
if(map[s[begin++]]++==0) counter++; //make it invalid
}
}
return d==INT_MAX? "":s.substr(head, d);
}
A more readable version:
string minWindow(string s, string t) {
unordered_map<char, int> m;
// Statistic for count of char in t
for (auto c : t) m[c]++;
// counter represents the number of chars of t to be found in s.
size_t start = 0, end = 0, counter = t.size(), minStart = 0, minLen = INT_MAX;
size_t size = s.size();
// Move end to find a valid window.
while (end < size) {
// If char in s exists in t, decrease counter
if (m[s[end]] > 0)
counter--;
// Decrease m[s[end]]. If char does not exist in t, m[s[end]] will be negative.
m[s[end]]--;
end++;
// When we found a valid window, move start to find smaller window.
while (counter == 0) {
if (end - start < minLen) {
minStart = start;
minLen = end - start;
}
m[s[start]]++;
// When char exists in t, increase counter.
if (m[s[start]] > 0)
counter++;
start++;
}
}
if (minLen != INT_MAX)
return s.substr(minStart, minLen);
return "";
}
Java equivalent:
public String minWindow(String s, String t) {
int [] map = new int[128];
for (char c : t.toCharArray()) {
map[c]++;
}
int start = 0, end = 0, minStart = 0, minLen = Integer.MAX_VALUE, counter = t.length();
while (end < s.length()) {
final char c1 = s.charAt(end);
if (map[c1] > 0) counter--;
map[c1]--;
end++;
while (counter == 0) {
if (minLen > end - start) {
minLen = end - start;
minStart = start;
}
final char c2 = s.charAt(start);
map[c2]++;
if (map[c2] > 0) counter++;
start++;
}
}
return minLen == Integer.MAX_VALUE ? "" : s.substring(minStart, minStart + minLen);
}
Python equivalent:
class Solution(object):
def min_window(self, s, t):
"""
:type s: str
:type t: str
:rtype: str
"""
# Idea: Two pointers: moving end forward to find a valid window,
# moving start forward to find a smaller window
# counter and hash_map to determine if the window is valid or not
# Count the frequencies for chars in t
hash_map = dict()
for c in t:
if c in hash_map:
hash_map[c] += 1
else:
hash_map[c] = 1
start, end = 0, 0
# If the minimal length doesn't change, it means there's no valid window
min_window_length = len(s) + 1
# Start point of the minimal window
min_window_start = 0
# Works as a counter of how many chars still need to be included in a window
num_of_chars_to_be_included = len(t)
while end < len(s):
# If the current char is desired
if s[end] in hash_map:
# Then we decreased the counter, if this char is a "must-have" now, in a sense of critical value
if hash_map[s[end]] > 0:
num_of_chars_to_be_included -= 1
# And we decrease the hash_map value
hash_map[s[end]] -= 1
# If the current window has all the desired chars
while num_of_chars_to_be_included == 0:
# See if this window is smaller
if end - start + 1 < min_window_length:
min_window_length = end - start + 1
min_window_start = start
# if s[start] is desired, we need to update the hash_map value and the counter
if s[start] in hash_map:
hash_map[s[start]] += 1
# Still, update the counter only if the current char is "critical"
if hash_map[s[start]] > 0:
num_of_chars_to_be_included += 1
# Move start forward to find a smaller window
start += 1
# Move end forward to find another valid window
end += 1
if min_window_length == len(s) + 1:
return ""
else:
return s[min_window_start:min_window_start + min_window_length]
Alternatively, more concise version using collections.Counter
.
def minWindow(s, t):
need = collections.Counter(t) #hash table to store char frequency
missing = len(t) #total number of chars we care
start, end = 0, 0
i = 0
for j, char in enumerate(s, 1): #index j from 1
if need[char] > 0:
missing -= 1
need[char] -= 1
if missing == 0: #match all chars
while i < j and need[s[i]] < 0: #remove chars to find the real start
need[s[i]] += 1
i += 1
need[s[i]] += 1 #make sure the first appearing char satisfies need[char]>0
missing += 1 #we missed this first char, so add missing by 1
if end == 0 or j-i < end-start: #update window
start, end = i, j
i += 1 #update i to start+1 for next window
return s[start:end]
More examples of susbstring problems using the template:
// Longest Substring - at most K distinct characters
public int lengthOfLongestSubstringKDistinct(String s, int k) {
int[] map = new int[256];
int start = 0, end = 0, maxLen = Integer.MIN_VALUE, counter = 0;
while (end < s.length()) {
final char c1 = s.charAt(end);
if (map[c1] == 0) counter++;
map[c1]++;
end++;
while (counter > k) {
final char c2 = s.charAt(start);
if (map[c2] == 1) counter--;
map[c2]--;
start++;
}
maxLen = Math.max(maxLen, end - start);
}
return maxLen;
}
// Longest Substring - at most 2 distinct characters
public int lengthOfLongestSubstringTwoDistinct(String s) {
int[] map = new int[128];
int start = 0, end = 0, maxLen = 0, counter = 0;
while (end < s.length()) {
final char c1 = s.charAt(end);
if (map[c1] == 0) counter++;
map[c1]++;
end++;
while (counter > 2) {
final char c2 = s.charAt(start);
if (map[c2] == 1) counter--;
map[c2]--;
start++;
}
maxLen = Math.max(maxLen, end - start);
}
return maxLen;
}
// LongestSubstring - without repeating characters
public int lengthOfLongestSubstring2(String s) {
int[] map = new int[128];
int start = 0, end = 0, maxLen = 0, counter = 0;
while (end < s.length()) {
final char c1 = s.charAt(end);
if (map[c1] > 0)
counter++;
map[c1]++;
end++;
while (counter > 0) {
final char c2 = s.charAt(start);
if (map[c2] > 1) counter--;
map[c2]--;
start++;
}
maxLen = Math.max(maxLen, end - start);
}
return maxLen;
}
Written on July 31, 2019