extract_analytics_event_names.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. #!/usr/bin/env python3
  2. import os
  3. import sys
  4. import subprocess
  5. import datetime
  6. import argparse
  7. import re
  8. # This script is used to extract analytics event names from the codebase,
  9. # and convert them to constants in OWSAnalyticsEvents.
  10. git_repo_path = os.path.abspath(
  11. subprocess.check_output(["git", "rev-parse", "--show-toplevel"], text=True).strip()
  12. )
  13. def splitall(path):
  14. allparts = []
  15. while 1:
  16. parts = os.path.split(path)
  17. if parts[0] == path: # sentinel for absolute paths
  18. allparts.insert(0, parts[0])
  19. break
  20. elif parts[1] == path: # sentinel for relative paths
  21. allparts.insert(0, parts[1])
  22. break
  23. else:
  24. path = parts[0]
  25. allparts.insert(0, parts[1])
  26. return allparts
  27. def objc_name_for_event_name(event_name):
  28. while True:
  29. index = event_name.find('_')
  30. if index < 0:
  31. break
  32. if index >= len(event_name) - 1:
  33. break
  34. nextChar = event_name[index + 1]
  35. event_name = event_name[:index] + nextChar.upper() + event_name[index + 2:]
  36. return event_name
  37. event_names = []
  38. def process(filepath, c_macros, swift_macros):
  39. short_filepath = filepath[len(git_repo_path):]
  40. if short_filepath.startswith(os.sep):
  41. short_filepath = short_filepath[len(os.sep):]
  42. filename = os.path.basename(filepath)
  43. if filename.startswith('.'):
  44. return
  45. if filename == 'OWSAnalytics.h':
  46. return
  47. file_ext = os.path.splitext(filename)[1]
  48. is_swift = file_ext in ('.swift')
  49. if is_swift:
  50. macros = swift_macros
  51. else:
  52. macros = c_macros
  53. # print short_filepath, is_swift
  54. with open(filepath, 'rt') as f:
  55. text = f.read()
  56. replacement_map = {}
  57. position = 0
  58. has_printed_filename = False
  59. while True:
  60. best_match = None
  61. best_macro = None
  62. for macro in macros:
  63. pattern = r'''%s\(([^,\)]+)[,\)]''' % macro
  64. # print '\t pattern', pattern
  65. matcher = re.compile(pattern)
  66. # matcher = re.compile(r'#define (OWSProd)')
  67. match = matcher.search(text, pos=position)
  68. if match:
  69. event_name = match.group(1).strip()
  70. # Ignore swift func definitions
  71. if is_swift and ':' in event_name:
  72. continue
  73. # print '\t', 'event_name', event_name
  74. if not best_match:
  75. pass
  76. elif best_match.start(1) > match.start(1):
  77. pass
  78. else:
  79. continue
  80. best_match = match
  81. best_macro = macro
  82. # TODO:
  83. if not best_match:
  84. break
  85. position = best_match.end(1)
  86. if not has_printed_filename:
  87. has_printed_filename = True
  88. print(short_filepath)
  89. raw_event_name = best_match.group(1).strip()
  90. if is_swift:
  91. pattern = r'^"(.+)"$'
  92. else:
  93. pattern = r'^@"(.+)"$'
  94. # print 'pattern:', pattern
  95. matcher = re.compile(pattern)
  96. # matcher = re.compile(r'#define (OWSProd)')
  97. match = matcher.search(raw_event_name)
  98. if match:
  99. event_name = match.group(1).strip()
  100. else:
  101. print('\t', 'Ignoring event: _%s_' % raw_event_name)
  102. continue
  103. event_names.append(event_name)
  104. print('\t', 'event_name', event_name)
  105. if is_swift:
  106. before = '"%s"' % event_name
  107. after = 'OWSAnalyticsEvents.%s()' % objc_name_for_event_name(event_name)
  108. else:
  109. before = '@"%s"' % event_name
  110. after = '[OWSAnalyticsEvents %s]' % objc_name_for_event_name(event_name)
  111. replacement_map[before] = after
  112. # macros.append(macro)
  113. # break
  114. # print 'replacement_map', replacement_map
  115. for before in replacement_map:
  116. after = replacement_map[before]
  117. text = text.replace(before, after)
  118. # if original_text == text:
  119. # return
  120. print('Updating:', short_filepath)
  121. with open(filepath, 'wt') as f:
  122. f.write(text)
  123. def should_ignore_path(path):
  124. ignore_paths = [
  125. os.path.join(git_repo_path, '.git')
  126. ]
  127. for ignore_path in ignore_paths:
  128. if path.startswith(ignore_path):
  129. return True
  130. for component in splitall(path):
  131. if component.startswith('.'):
  132. return True
  133. if component.endswith('.framework'):
  134. return True
  135. if component in ('Pods', 'ThirdParty',):
  136. return True
  137. return False
  138. def process_if_appropriate(filepath, c_macros, swift_macros):
  139. filename = os.path.basename(filepath)
  140. if filename.startswith('.'):
  141. return
  142. file_ext = os.path.splitext(filename)[1]
  143. if file_ext not in ('.h', '.hpp', '.cpp', '.m', '.mm', '.pch', '.swift'):
  144. return
  145. if should_ignore_path(filepath):
  146. return
  147. process(filepath, c_macros, swift_macros)
  148. def extract_macros(filepath):
  149. filename = os.path.basename(filepath)
  150. file_ext = os.path.splitext(filename)[1]
  151. is_swift = file_ext in ('.swift')
  152. macros = []
  153. with open(filepath, 'rt') as f:
  154. text = f.read()
  155. lines = text.split('\n')
  156. for line in lines:
  157. # Match lines of this form:
  158. # #define OWSProdCritical(__eventName) ...
  159. if is_swift:
  160. matcher = re.compile(r'func (OWSProd[^\(]+)\(.+[,\)]')
  161. else:
  162. matcher = re.compile(r'#define (OWSProd[^\(]+)\(.+[,\)]')
  163. # matcher = re.compile(r'#define (OWSProd)')
  164. match = matcher.search(line)
  165. if match:
  166. macro = match.group(1).strip()
  167. # print 'macro', macro
  168. macros.append(macro)
  169. return macros
  170. def update_event_names(header_file_path, source_file_path):
  171. # global event_names
  172. # event_names = sorted(set(event_names))
  173. code_generation_marker = '#pragma mark - Code Generation Marker'
  174. # Source
  175. filepath = source_file_path
  176. with open(filepath, 'rt') as f:
  177. text = f.read()
  178. code_generation_start = text.find(code_generation_marker)
  179. code_generation_end = text.rfind(code_generation_marker)
  180. if code_generation_start < 0:
  181. print('Could not find marker in file:', file)
  182. sys.exit(1)
  183. if code_generation_end < 0 or code_generation_end == code_generation_start:
  184. print('Could not find marker in file:', file)
  185. sys.exit(1)
  186. event_name_map = {}
  187. print()
  188. print('Parsing old generated code')
  189. print()
  190. old_generated = text[code_generation_start + len(code_generation_marker):code_generation_end]
  191. # print 'old_generated', old_generated
  192. for split in old_generated.split('+'):
  193. split = split.strip()
  194. # print 'split:', split
  195. if not split:
  196. continue
  197. # Example:
  198. #(NSString *)call_service_call_already_set
  199. #{
  200. # return @"call_service_call_already_set";
  201. #}
  202. pattern = r'\(NSString \*\)([^\s\r\n\t]+)[\s\r\n\t]'
  203. matcher = re.compile(pattern)
  204. match = matcher.search(split)
  205. if not match:
  206. print('Could not parse:', split)
  207. print('In file:', filepath)
  208. sys.exit(1)
  209. method_name = match.group(1).strip()
  210. print('method_name:', method_name)
  211. pattern = r'return @"(.+)";'
  212. matcher = re.compile(pattern)
  213. match = matcher.search(split)
  214. if not match:
  215. print('Could not parse:', split)
  216. print('In file:', filepath)
  217. sys.exit(1)
  218. event_name = match.group(1).strip()
  219. print('event_name:', event_name)
  220. event_name_map[event_name] = method_name
  221. print()
  222. all_event_names = sorted(set(list(event_name_map.keys()) + event_names))
  223. print('all_event_names', all_event_names)
  224. generated = code_generation_marker
  225. for event_name in all_event_names:
  226. # Example:
  227. # + (NSString *)call_service_call_already_set;
  228. if event_name in event_name_map:
  229. objc_name = event_name_map[event_name]
  230. else:
  231. objc_name = objc_name_for_event_name(event_name)
  232. text_for_event = '''+ (NSString *)%s
  233. {
  234. return @"%s";
  235. }''' % (objc_name, event_name)
  236. generated = generated + '\n\n' + text_for_event
  237. generated = generated + '\n\n' + code_generation_marker
  238. print('generated', generated)
  239. new_text = text[:code_generation_start] + generated + text[code_generation_end + len(code_generation_marker):]
  240. print('text', new_text)
  241. with open(filepath, 'wt') as f:
  242. f.write(new_text)
  243. # Header
  244. filepath = header_file_path
  245. with open(filepath, 'rt') as f:
  246. text = f.read()
  247. code_generation_start = text.find(code_generation_marker)
  248. code_generation_end = text.rfind(code_generation_marker)
  249. if code_generation_start < 0:
  250. print('Could not find marker in file:', file)
  251. sys.exit(1)
  252. if code_generation_end < 0 or code_generation_end == code_generation_start:
  253. print('Could not find marker in file:', file)
  254. sys.exit(1)
  255. generated = code_generation_marker
  256. for event_name in all_event_names:
  257. # Example:
  258. # + (NSString *)call_service_call_already_set;
  259. objc_name = objc_name_for_event_name(event_name)
  260. text_for_event = '+ (NSString *)%s;' % (objc_name,)
  261. generated = generated + '\n\n' + text_for_event
  262. generated = generated + '\n\n' + code_generation_marker
  263. print('generated', generated)
  264. new_text = text[:code_generation_start] + generated + text[code_generation_end + len(code_generation_marker):]
  265. print('text', new_text)
  266. with open(filepath, 'wt') as f:
  267. f.write(new_text)
  268. if __name__ == "__main__":
  269. # print 'git_repo_path', git_repo_path
  270. macros_header_file_path = os.path.join(git_repo_path, 'SignalServiceKit', 'src', 'Util', 'OWSAnalytics.h')
  271. if not os.path.exists(macros_header_file_path):
  272. print('Macros header does not exist:', macros_header_file_path)
  273. sys.exit(1)
  274. c_macros = extract_macros(macros_header_file_path)
  275. print('c_macros:', c_macros)
  276. macros_header_file_path = os.path.join(git_repo_path, 'Signal', 'src', 'util', 'OWSAnalytics.swift')
  277. if not os.path.exists(macros_header_file_path):
  278. print('Macros header does not exist:', macros_header_file_path)
  279. sys.exit(1)
  280. swift_macros = extract_macros(macros_header_file_path)
  281. print('swift_macros:', swift_macros)
  282. event_names_header_file_path = os.path.join(git_repo_path, 'SignalServiceKit', 'src', 'Util', 'OWSAnalyticsEvents.h')
  283. if not os.path.exists(event_names_header_file_path):
  284. print('event_names_header_file_path does not exist:', event_names_header_file_path)
  285. sys.exit(1)
  286. event_names_source_file_path = os.path.join(git_repo_path, 'SignalServiceKit', 'src', 'Util', 'OWSAnalyticsEvents.m')
  287. if not os.path.exists(event_names_source_file_path):
  288. print('event_names_source_file_path does not exist:', event_names_source_file_path)
  289. sys.exit(1)
  290. for rootdir, dirnames, filenames in os.walk(git_repo_path):
  291. for filename in filenames:
  292. file_path = os.path.abspath(os.path.join(rootdir, filename))
  293. process_if_appropriate(file_path, c_macros, swift_macros)
  294. print()
  295. print('event_names', sorted(set(event_names)))
  296. update_event_names(event_names_header_file_path, event_names_source_file_path)