From 76c08672bc6c2984ebd7045a71862099890c9118 Mon Sep 17 00:00:00 2001 From: etobi Date: Tue, 3 Sep 2013 09:48:47 +0200 Subject: Imported Upstream version 1.1.1+rev1457 --- util/scan/scan.c | 509 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 465 insertions(+), 44 deletions(-) (limited to 'util/scan/scan.c') diff --git a/util/scan/scan.c b/util/scan/scan.c index 3058bdb..972d529 100644 --- a/util/scan/scan.c +++ b/util/scan/scan.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include @@ -67,6 +69,10 @@ static int vdr_version = 3; static struct lnb_types_st lnb_type; static int unique_anon_services; +char *default_charset = "ISO-6937"; +char *output_charset; +#define CS_OPTIONS "//TRANSLIT" + static enum fe_spectral_inversion spectral_inversion = INVERSION_AUTO; enum table_type { @@ -543,60 +549,445 @@ static void parse_frequency_list_descriptor (const unsigned char *buf, } } +/* + * handle character set correctly (e.g. via iconv) + * c.f. EN 300 468 annex A + */ + +struct charset_conv { + unsigned len; + unsigned char data[3]; +}; + +/* This table is the Latin 00 table. Basically ISO-6937 + Euro sign */ +struct charset_conv en300468_latin_00_to_utf8[256] = { + [0x00] = { 1, {0x00, } }, + [0x01] = { 1, {0x01, } }, + [0x02] = { 1, {0x02, } }, + [0x03] = { 1, {0x03, } }, + [0x04] = { 1, {0x04, } }, + [0x05] = { 1, {0x05, } }, + [0x06] = { 1, {0x06, } }, + [0x07] = { 1, {0x07, } }, + [0x08] = { 1, {0x08, } }, + [0x09] = { 1, {0x09, } }, + [0x0a] = { 1, {0x0a, } }, + [0x0b] = { 1, {0x0b, } }, + [0x0c] = { 1, {0x0c, } }, + [0x0d] = { 1, {0x0d, } }, + [0x0e] = { 1, {0x0e, } }, + [0x0f] = { 1, {0x0f, } }, + [0x10] = { 1, {0x10, } }, + [0x11] = { 1, {0x11, } }, + [0x12] = { 1, {0x12, } }, + [0x13] = { 1, {0x13, } }, + [0x14] = { 1, {0x14, } }, + [0x15] = { 1, {0x15, } }, + [0x16] = { 1, {0x16, } }, + [0x17] = { 1, {0x17, } }, + [0x18] = { 1, {0x18, } }, + [0x19] = { 1, {0x19, } }, + [0x1a] = { 1, {0x1a, } }, + [0x1b] = { 1, {0x1b, } }, + [0x1c] = { 1, {0x1c, } }, + [0x1d] = { 1, {0x1d, } }, + [0x1e] = { 1, {0x1e, } }, + [0x1f] = { 1, {0x1f, } }, + [0x20] = { 1, {0x20, } }, + [0x21] = { 1, {0x21, } }, + [0x22] = { 1, {0x22, } }, + [0x23] = { 1, {0x23, } }, + [0x24] = { 1, {0x24, } }, + [0x25] = { 1, {0x25, } }, + [0x26] = { 1, {0x26, } }, + [0x27] = { 1, {0x27, } }, + [0x28] = { 1, {0x28, } }, + [0x29] = { 1, {0x29, } }, + [0x2a] = { 1, {0x2a, } }, + [0x2b] = { 1, {0x2b, } }, + [0x2c] = { 1, {0x2c, } }, + [0x2d] = { 1, {0x2d, } }, + [0x2e] = { 1, {0x2e, } }, + [0x2f] = { 1, {0x2f, } }, + [0x30] = { 1, {0x30, } }, + [0x31] = { 1, {0x31, } }, + [0x32] = { 1, {0x32, } }, + [0x33] = { 1, {0x33, } }, + [0x34] = { 1, {0x34, } }, + [0x35] = { 1, {0x35, } }, + [0x36] = { 1, {0x36, } }, + [0x37] = { 1, {0x37, } }, + [0x38] = { 1, {0x38, } }, + [0x39] = { 1, {0x39, } }, + [0x3a] = { 1, {0x3a, } }, + [0x3b] = { 1, {0x3b, } }, + [0x3c] = { 1, {0x3c, } }, + [0x3d] = { 1, {0x3d, } }, + [0x3e] = { 1, {0x3e, } }, + [0x3f] = { 1, {0x3f, } }, + [0x40] = { 1, {0x40, } }, + [0x41] = { 1, {0x41, } }, + [0x42] = { 1, {0x42, } }, + [0x43] = { 1, {0x43, } }, + [0x44] = { 1, {0x44, } }, + [0x45] = { 1, {0x45, } }, + [0x46] = { 1, {0x46, } }, + [0x47] = { 1, {0x47, } }, + [0x48] = { 1, {0x48, } }, + [0x49] = { 1, {0x49, } }, + [0x4a] = { 1, {0x4a, } }, + [0x4b] = { 1, {0x4b, } }, + [0x4c] = { 1, {0x4c, } }, + [0x4d] = { 1, {0x4d, } }, + [0x4e] = { 1, {0x4e, } }, + [0x4f] = { 1, {0x4f, } }, + [0x50] = { 1, {0x50, } }, + [0x51] = { 1, {0x51, } }, + [0x52] = { 1, {0x52, } }, + [0x53] = { 1, {0x53, } }, + [0x54] = { 1, {0x54, } }, + [0x55] = { 1, {0x55, } }, + [0x56] = { 1, {0x56, } }, + [0x57] = { 1, {0x57, } }, + [0x58] = { 1, {0x58, } }, + [0x59] = { 1, {0x59, } }, + [0x5a] = { 1, {0x5a, } }, + [0x5b] = { 1, {0x5b, } }, + [0x5c] = { 1, {0x5c, } }, + [0x5d] = { 1, {0x5d, } }, + [0x5e] = { 1, {0x5e, } }, + [0x5f] = { 1, {0x5f, } }, + [0x60] = { 1, {0x60, } }, + [0x61] = { 1, {0x61, } }, + [0x62] = { 1, {0x62, } }, + [0x63] = { 1, {0x63, } }, + [0x64] = { 1, {0x64, } }, + [0x65] = { 1, {0x65, } }, + [0x66] = { 1, {0x66, } }, + [0x67] = { 1, {0x67, } }, + [0x68] = { 1, {0x68, } }, + [0x69] = { 1, {0x69, } }, + [0x6a] = { 1, {0x6a, } }, + [0x6b] = { 1, {0x6b, } }, + [0x6c] = { 1, {0x6c, } }, + [0x6d] = { 1, {0x6d, } }, + [0x6e] = { 1, {0x6e, } }, + [0x6f] = { 1, {0x6f, } }, + [0x70] = { 1, {0x70, } }, + [0x71] = { 1, {0x71, } }, + [0x72] = { 1, {0x72, } }, + [0x73] = { 1, {0x73, } }, + [0x74] = { 1, {0x74, } }, + [0x75] = { 1, {0x75, } }, + [0x76] = { 1, {0x76, } }, + [0x77] = { 1, {0x77, } }, + [0x78] = { 1, {0x78, } }, + [0x79] = { 1, {0x79, } }, + [0x7a] = { 1, {0x7a, } }, + [0x7b] = { 1, {0x7b, } }, + [0x7c] = { 1, {0x7c, } }, + [0x7d] = { 1, {0x7d, } }, + [0x7e] = { 1, {0x7e, } }, + [0x7f] = { 1, {0x7f, } }, + [0x80] = { 2, {0xc2, 0x80, } }, + [0x81] = { 2, {0xc2, 0x81, } }, + [0x82] = { 2, {0xc2, 0x82, } }, + [0x83] = { 2, {0xc2, 0x83, } }, + [0x84] = { 2, {0xc2, 0x84, } }, + [0x85] = { 2, {0xc2, 0x85, } }, + [0x86] = { 2, {0xc2, 0x86, } }, + [0x87] = { 2, {0xc2, 0x87, } }, + [0x88] = { 2, {0xc2, 0x88, } }, + [0x89] = { 2, {0xc2, 0x89, } }, + [0x8a] = { 2, {0xc2, 0x8a, } }, + [0x8b] = { 2, {0xc2, 0x8b, } }, + [0x8c] = { 2, {0xc2, 0x8c, } }, + [0x8d] = { 2, {0xc2, 0x8d, } }, + [0x8e] = { 2, {0xc2, 0x8e, } }, + [0x8f] = { 2, {0xc2, 0x8f, } }, + [0x90] = { 2, {0xc2, 0x90, } }, + [0x91] = { 2, {0xc2, 0x91, } }, + [0x92] = { 2, {0xc2, 0x92, } }, + [0x93] = { 2, {0xc2, 0x93, } }, + [0x94] = { 2, {0xc2, 0x94, } }, + [0x95] = { 2, {0xc2, 0x95, } }, + [0x96] = { 2, {0xc2, 0x96, } }, + [0x97] = { 2, {0xc2, 0x97, } }, + [0x98] = { 2, {0xc2, 0x98, } }, + [0x99] = { 2, {0xc2, 0x99, } }, + [0x9a] = { 2, {0xc2, 0x9a, } }, + [0x9b] = { 2, {0xc2, 0x9b, } }, + [0x9c] = { 2, {0xc2, 0x9c, } }, + [0x9d] = { 2, {0xc2, 0x9d, } }, + [0x9e] = { 2, {0xc2, 0x9e, } }, + [0x9f] = { 2, {0xc2, 0x9f, } }, + [0xa0] = { 2, {0xc2, 0xa0, } }, + [0xa1] = { 2, {0xc2, 0xa1, } }, + [0xa2] = { 2, {0xc2, 0xa2, } }, + [0xa3] = { 2, {0xc2, 0xa3, } }, + [0xa4] = { 3, { 0xe2, 0x82, 0xac,} }, /* Euro sign. Addition over the ISO-6937 standard */ + [0xa5] = { 2, {0xc2, 0xa5, } }, + [0xa6] = { 0, {} }, + [0xa7] = { 2, {0xc2, 0xa7, } }, + [0xa8] = { 2, {0xc2, 0xa4, } }, + [0xa9] = { 3, {0xe2, 0x80, 0x98, } }, + [0xaa] = { 3, {0xe2, 0x80, 0x9c, } }, + [0xab] = { 2, {0xc2, 0xab, } }, + [0xac] = { 3, {0xe2, 0x86, 0x90, } }, + [0xad] = { 3, {0xe2, 0x86, 0x91, } }, + [0xae] = { 3, {0xe2, 0x86, 0x92, } }, + [0xaf] = { 3, {0xe2, 0x86, 0x93, } }, + [0xb0] = { 2, {0xc2, 0xb0, } }, + [0xb1] = { 2, {0xc2, 0xb1, } }, + [0xb2] = { 2, {0xc2, 0xb2, } }, + [0xb3] = { 2, {0xc2, 0xb3, } }, + [0xb4] = { 2, {0xc3, 0x97, } }, + [0xb5] = { 2, {0xc2, 0xb5, } }, + [0xb6] = { 2, {0xc2, 0xb6, } }, + [0xb7] = { 2, {0xc2, 0xb7, } }, + [0xb8] = { 2, {0xc3, 0xb7, } }, + [0xb9] = { 3, {0xe2, 0x80, 0x99, } }, + [0xba] = { 3, {0xe2, 0x80, 0x9d, } }, + [0xbb] = { 2, {0xc2, 0xbb, } }, + [0xbc] = { 2, {0xc2, 0xbc, } }, + [0xbd] = { 2, {0xc2, 0xbd, } }, + [0xbe] = { 2, {0xc2, 0xbe, } }, + [0xbf] = { 2, {0xc2, 0xbf, } }, + [0xc0] = { 0, {} }, + [0xc1] = { 0, {} }, + [0xc2] = { 0, {} }, + [0xc3] = { 0, {} }, + [0xc4] = { 0, {} }, + [0xc5] = { 0, {} }, + [0xc6] = { 0, {} }, + [0xc7] = { 0, {} }, + [0xc8] = { 0, {} }, + [0xc9] = { 0, {} }, + [0xca] = { 0, {} }, + [0xcb] = { 0, {} }, + [0xcc] = { 0, {} }, + [0xcd] = { 0, {} }, + [0xce] = { 0, {} }, + [0xcf] = { 0, {} }, + [0xd0] = { 3, {0xe2, 0x80, 0x94, } }, + [0xd1] = { 2, {0xc2, 0xb9, } }, + [0xd2] = { 2, {0xc2, 0xae, } }, + [0xd3] = { 2, {0xc2, 0xa9, } }, + [0xd4] = { 3, {0xe2, 0x84, 0xa2, } }, + [0xd5] = { 3, {0xe2, 0x99, 0xaa, } }, + [0xd6] = { 2, {0xc2, 0xac, } }, + [0xd7] = { 2, {0xc2, 0xa6, } }, + [0xd8] = { 0, {} }, + [0xd9] = { 0, {} }, + [0xda] = { 0, {} }, + [0xdb] = { 0, {} }, + [0xdc] = { 3, {0xe2, 0x85, 0x9b, } }, + [0xdd] = { 3, {0xe2, 0x85, 0x9c, } }, + [0xde] = { 3, {0xe2, 0x85, 0x9d, } }, + [0xdf] = { 3, {0xe2, 0x85, 0x9e, } }, + [0xe0] = { 3, {0xe2, 0x84, 0xa6, } }, + [0xe1] = { 2, {0xc3, 0x86, } }, + [0xe2] = { 2, {0xc3, 0x90, } }, + [0xe3] = { 2, {0xc2, 0xaa, } }, + [0xe4] = { 2, {0xc4, 0xa6, } }, + [0xe5] = { 0, {} }, + [0xe6] = { 2, {0xc4, 0xb2, } }, + [0xe7] = { 2, {0xc4, 0xbf, } }, + [0xe8] = { 2, {0xc5, 0x81, } }, + [0xe9] = { 2, {0xc3, 0x98, } }, + [0xea] = { 2, {0xc5, 0x92, } }, + [0xeb] = { 2, {0xc2, 0xba, } }, + [0xec] = { 2, {0xc3, 0x9e, } }, + [0xed] = { 2, {0xc5, 0xa6, } }, + [0xee] = { 2, {0xc5, 0x8a, } }, + [0xef] = { 2, {0xc5, 0x89, } }, + [0xf0] = { 2, {0xc4, 0xb8, } }, + [0xf1] = { 2, {0xc3, 0xa6, } }, + [0xf2] = { 2, {0xc4, 0x91, } }, + [0xf3] = { 2, {0xc3, 0xb0, } }, + [0xf4] = { 2, {0xc4, 0xa7, } }, + [0xf5] = { 2, {0xc4, 0xb1, } }, + [0xf6] = { 2, {0xc4, 0xb3, } }, + [0xf7] = { 2, {0xc5, 0x80, } }, + [0xf8] = { 2, {0xc5, 0x82, } }, + [0xf9] = { 2, {0xc3, 0xb8, } }, + [0xfa] = { 2, {0xc5, 0x93, } }, + [0xfb] = { 2, {0xc3, 0x9f, } }, + [0xfc] = { 2, {0xc3, 0xbe, } }, + [0xfd] = { 2, {0xc5, 0xa7, } }, + [0xfe] = { 2, {0xc5, 0x8b, } }, + [0xff] = { 2, {0xc2, 0xad, } }, +}; + +static void descriptorcpy(char **dest, const unsigned char *src, size_t len) +{ + size_t destlen, i; + char *p, *type = default_charset; + unsigned char *tmp = NULL; + const unsigned char *s; + int emphasis = 0, need_conversion = 1; + + if (*dest) { + free (*dest); + *dest = NULL; + } + if (!len) + return; + + if (*src < 0x20) { + switch (*src) { + case 0x00: type = "ISO-6937"; break; + case 0x01: type = "ISO-8859-5"; break; + case 0x02: type = "ISO-8859-6"; break; + case 0x03: type = "ISO-8859-7"; break; + case 0x04: type = "ISO-8859-8"; break; + case 0x05: type = "ISO-8859-9"; break; + case 0x06: type = "ISO-8859-10"; break; + case 0x07: type = "ISO-8859-11"; break; + case 0x09: type = "ISO-8859-13"; break; + case 0x0a: type = "ISO-8859-14"; break; + case 0x0b: type = "ISO-8859-15"; break; + case 0x11: type = "ISO-10646"; break; + case 0x12: type = "ISO-2022-KR"; break; + case 0x13: type = "GB2312"; break; + case 0x14: type = "BIG5"; break; + case 0x15: type = "ISO-10646/UTF-8"; break; + case 0x10: /* ISO8859 */ + if ((*(src + 1) != 0) || *(src + 2) > 0x0f) + break; + src+=2; + len-=2; + switch(*src) { + case 0x01: type = "ISO-8859-1"; break; + case 0x02: type = "ISO-8859-2"; break; + case 0x03: type = "ISO-8859-3"; break; + case 0x04: type = "ISO-8859-4"; break; + case 0x05: type = "ISO-8859-5"; break; + case 0x06: type = "ISO-8859-6"; break; + case 0x07: type = "ISO-8859-7"; break; + case 0x08: type = "ISO-8859-8"; break; + case 0x09: type = "ISO-8859-9"; break; + case 0x0a: type = "ISO-8859-10"; break; + case 0x0b: type = "ISO-8859-11"; break; + case 0x0d: type = "ISO-8859-13"; break; + case 0x0e: type = "ISO-8859-14"; break; + case 0x0f: type = "ISO-8859-15"; break; + } + } + src++; + len--; + } + + /* + * Destination length should be bigger. As the worse case seems to + * use 3 chars for one code, use it for destlen + */ + destlen = len * 3; + *dest = malloc(destlen + 1); + + /* Remove special chars */ + if (!strncasecmp(type, "ISO-8859", 8) || !strcasecmp(type, "ISO-6937")) { + /* + * Handles the ISO/IEC 10646 1-byte control codes + * (EN 300 468 v1.11.1 Table A.1) + * Emphasis will be represented as: *emphased* + */ + tmp = malloc(len + 2); + p = (char *)tmp; + s = src; + for (i = 0; i < len; i++, s++) { + if (*s >= 0x20 && (*s < 0x80 || *s > 0x9f)) + *p++ = *s; + else if (*s == 0x86) { + *p++ = '*'; + emphasis = 1; + } else if (*s == 0x87 && emphasis) { + *p++ = '*'; + emphasis = 0; + } + } + if (emphasis) + *p++ = '*'; + *p = '\0'; + } else { + /* + * FIXME: need to handle the ISO/IEC 10646 2-byte control codes + * (EN 300 468 v1.11.1 Table A.2) + */ + } + + if (tmp) + s = tmp; + else + s = src; + + p = *dest; + if (!strcasecmp(type, "ISO-6937")) { + unsigned char *p1, *p2; + + /* Convert charset to UTF-8 using Code table 00 - Latin */ + for (p1 = (unsigned char *)s; p1 < s + len; p1++) + for (p2 = en300468_latin_00_to_utf8[*p1].data; + p2 < en300468_latin_00_to_utf8[*p1].data + en300468_latin_00_to_utf8[*p1].len; + p2++) + *p++ = *p2; + *p = '\0'; + + /* If desired charset is not UTF-8, prepare for conversion */ + if (strcasecmp(output_charset, "UTF-8")) { + if (tmp) + free(tmp); + tmp = (unsigned char *)*dest; + len = p - *dest; + + *dest = malloc(destlen + 1); + type = "UTF-8"; + s = tmp; + } else + need_conversion = 0; + + } + + /* Convert from original charset to the desired one */ + if (need_conversion) { + char out_cs[strlen(output_charset) + 1 + sizeof(CS_OPTIONS)]; + + p = *dest; + strcpy(out_cs, output_charset); + strcat(out_cs, CS_OPTIONS); + + iconv_t cd = iconv_open(out_cs, type); + if (cd == (iconv_t)(-1)) { + memcpy(p, s, len); + p[len] = '\0'; + warning("Conversion from %s to %s not supported\n", + type, output_charset); + } else { + iconv(cd, (char **)&s, &len, &p, &destlen); + iconv_close(cd); + *p = '\0'; + } + } + + if (tmp) + free(tmp); +} + static void parse_service_descriptor (const unsigned char *buf, struct service *s) { unsigned char len; - unsigned char *src, *dest; s->type = buf[2]; buf += 3; len = *buf; buf++; - - if (s->provider_name) - free (s->provider_name); - - s->provider_name = malloc (len + 1); - memcpy (s->provider_name, buf, len); - s->provider_name[len] = '\0'; - - /* remove control characters (FIXME: handle short/long name) */ - /* FIXME: handle character set correctly (e.g. via iconv) - * c.f. EN 300 468 annex A */ - for (src = dest = (unsigned char *) s->provider_name; *src; src++) - if (*src >= 0x20 && (*src < 0x80 || *src > 0x9f)) - *dest++ = *src; - *dest = '\0'; - if (!s->provider_name[0]) { - /* zap zero length names */ - free (s->provider_name); - s->provider_name = 0; - } - - if (s->service_name) - free (s->service_name); + descriptorcpy(&s->provider_name, buf, len); buf += len; len = *buf; buf++; - - s->service_name = malloc (len + 1); - memcpy (s->service_name, buf, len); - s->service_name[len] = '\0'; - - /* remove control characters (FIXME: handle short/long name) */ - /* FIXME: handle character set correctly (e.g. via iconv) - * c.f. EN 300 468 annex A */ - for (src = dest = (unsigned char *) s->service_name; *src; src++) - if (*src >= 0x20 && (*src < 0x80 || *src > 0x9f)) - *dest++ = *src; - *dest = '\0'; - if (!s->service_name[0]) { - /* zap zero length names */ - free (s->service_name); - s->service_name = 0; - } + descriptorcpy(&s->service_name, buf, len); info("0x%04x 0x%04x: pmt_pid 0x%04x %s -- %s (%s%s)\n", s->transport_stream_id, @@ -2118,7 +2509,10 @@ static const char *usage = "\n" " -P do not use ATSC PSIP tables for scanning\n" " (but only PAT and PMT) (applies for ATSC only)\n" " -A N check for ATSC 1=Terrestrial [default], 2=Cable or 3=both\n" - " -U Uniquely name unknown services\n"; + " -U Uniquely name unknown services\n" + " -C cs Override default charset for service name/provider (default = ISO-6937)\n" + " -D cs Output charset (default = %s)\n" + "Supported charsets by -C/-D parameters can be obtained via 'iconv -l' command\n"; void bad_usage(char *pname, int problem) @@ -2130,7 +2524,7 @@ bad_usage(char *pname, int problem) switch (problem) { default: case 0: - fprintf (stderr, usage, pname); + fprintf (stderr, usage, pname, output_charset); break; case 1: i = 0; @@ -2158,15 +2552,36 @@ int main (int argc, char **argv) int frontend_fd; int fe_open_mode; const char *initial = NULL; + char *charset; if (argc <= 1) { bad_usage(argv[0], 2); return -1; } + /* + * Get the environment charset, and use it as the default + * output charset. In thesis, using nl_langinfo should be + * enough, but, in my tests, it is not as reliable as checking + * the environment vars directly. + */ + if ((charset = getenv("LC_ALL")) || + (charset = getenv("LC_CTYPE")) || + (charset = getenv ("LANG"))) { + while (*charset != '.' && *charset) + charset++; + if (*charset == '.') + charset++; + if (*charset) + output_charset = charset; + else + output_charset = nl_langinfo(CODESET); + } else + output_charset = nl_langinfo(CODESET); + /* start with default lnb type */ lnb_type = *lnb_enum(0); - while ((opt = getopt(argc, argv, "5cnpa:f:d:s:o:x:e:t:i:l:vquPA:U")) != -1) { + while ((opt = getopt(argc, argv, "5cnpa:f:d:s:o:x:e:t:i:l:vquPA:UC:D:")) != -1) { switch (opt) { case 'a': adapter = strtoul(optarg, NULL, 0); @@ -2246,6 +2661,12 @@ int main (int argc, char **argv) case 'U': unique_anon_services = 1; break; + case 'C': + default_charset = optarg; + break; + case 'D': + output_charset = optarg; + break; default: bad_usage(argv[0], 0); return -1; -- cgit v1.2.3