update_emoji.pl 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. #!/usr/bin/perl -w
  2. #
  3. # update_emoji.pl
  4. #
  5. # This script generates the emoji.plugin.zsh emoji definitions from the Unicode
  6. # character data for the emoji characters.
  7. #
  8. # The data file can be found at https://unicode.org/Public/emoji/latest/emoji-data.txt
  9. # as referenced in Unicode TR51 (https://www.unicode.org/reports/tr51/index.html).
  10. #
  11. # This is known to work with the data file from version 1.0. It may not work with later
  12. # versions if the format changes. In particular, this reads line comments to get the
  13. # emoji character name and unicode version.
  14. #
  15. # Country names have punctuation and other non-letter characters removed from their name,
  16. # to avoid possible complications with having to escape the strings when using them as
  17. # array subscripts. The definition file seems to use some combining characters like accents
  18. # that get stripped during this process.
  19. use strict;
  20. use warnings;
  21. use 5.010;
  22. use autodie;
  23. use Path::Class;
  24. use File::Copy;
  25. # Parse definitions out of the data file and convert
  26. sub process_emoji_data_file {
  27. my ( $infile, $outfilename ) = @_;
  28. my $file = file($infile);
  29. my $outfile = file($outfilename);
  30. my $outfilebase = $outfile->basename();
  31. my $tempfilename = "$outfilename.tmp";
  32. my $tempfile = file($tempfilename);
  33. my $outfh = $tempfile->openw();
  34. $outfh->print("
  35. # $outfilebase - Emoji character definitions for oh-my-zsh emoji plugin
  36. #
  37. # This file is auto-generated by update_emoji.pl. Do not edit it manually.
  38. #
  39. # This contains the definition for:
  40. # \$emoji - which maps character names to Unicode characters
  41. # \$emoji_flags - maps country names to Unicode flag characters using region indicators
  42. # Main emoji
  43. typeset -gAH emoji
  44. # National flags
  45. typeset -gAH emoji_flags
  46. # Combining modifiers
  47. typeset -gAH emoji_mod
  48. ");
  49. my $fh = $file->openr();
  50. my $line_num = 0;
  51. while ( my $line = $fh->getline() ) {
  52. $line_num++;
  53. $_ = $line;
  54. # Skip all-comment lines (from the header) and blank lines
  55. # (But don't strip comments on normal lines; we need to parse those for
  56. # the emoji names.)
  57. next if /^\s*#/ or /^\s*$/;
  58. if (/^(\S.*?\S)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w+)\s*;\s*(\w.*?)\s*#\s*V(\S+)\s\(.*?\)\s*(\w.*\S)\s*$/) {
  59. my ($code, $style, $level, $modifier_status, $sources, $version, $keycap_name)
  60. = ($1, $2, $3, $4, $5, $6, $7);
  61. #print "code=$code style=$style level=$level modifier_status=$modifier_status sources=$sources version=$version name=$keycap_name\n";
  62. my @code_points = split /\s+/, $code;
  63. my @sources = split /\s+/, $sources;
  64. my $flag_country = "";
  65. if ( $keycap_name =~ /^flag for (\S.*?)\s*$/) {
  66. $flag_country = $1;
  67. }
  68. my $zsh_code = join '', map { "\\U$_" } @code_points;
  69. # Convert keycap names to valid associative array names that do not require any
  70. # quoting. Works fine for most stuff, but is clumsy for flags.
  71. my $omz_name = lc($keycap_name);
  72. $omz_name =~ s/[^A-Za-z0-9]/_/g;
  73. my $zsh_flag_country = $flag_country;
  74. $zsh_flag_country =~ s/[^\p{Letter}]/_/g;
  75. if ($flag_country) {
  76. $outfh->print("emoji_flags[$zsh_flag_country]=\$'$zsh_code'\n");
  77. } else {
  78. $outfh->print("emoji[$omz_name]=\$'$zsh_code'\n");
  79. }
  80. # Modifiers are included in both the main set and their separate map,
  81. # because they have a standalone representation as a color swatch.
  82. if ( $modifier_status eq "modifier" ) {
  83. $outfh->print("emoji_mod[$omz_name]=\$'$zsh_code'\n");
  84. }
  85. } else {
  86. die "Failed parsing line $line_num: '$_'";
  87. }
  88. }
  89. $fh->close();
  90. $outfh->print("\n");
  91. $outfh->close();
  92. move($tempfilename, $outfilename)
  93. or die "Failed moving temp file to $outfilename: $!";
  94. }
  95. my $datafile = "emoji-data.txt";
  96. my $zsh_def_file = "emoji-char-definitions.zsh";
  97. process_emoji_data_file($datafile, $zsh_def_file);
  98. print "Updated definition file $zsh_def_file\n";