Skip to content

Commit 6652944

Browse files
Add more encodings
1 parent 44783bf commit 6652944

File tree

14 files changed

+326
-12
lines changed

14 files changed

+326
-12
lines changed

httpbin/core.py

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
parse_multi_value_header,
5252
next_stale_after_value,
5353
digest_challenge_response,
54+
normalize_charset,
5455
)
5556
from .utils import weighted_choice
5657
from .structures import CaseInsensitiveDict
@@ -1407,20 +1408,96 @@ def cache_control(value):
14071408
return response
14081409

14091410

1410-
@app.route("/encoding/utf8")
1411-
def encoding():
1412-
"""Returns a UTF-8 encoded body.
1411+
@app.route("/encoding/<charset>")
1412+
def encoding(charset):
1413+
"""Returns the requested charset and encoding.
14131414
---
14141415
tags:
14151416
- Response formats
1417+
parameters:
1418+
- in: path
1419+
name: charset
1420+
type:
1421+
default: 'utf8'
1422+
- in: query
1423+
name: content-type
1424+
type: string
1425+
description: The content type of the response. If unset will use response content type ("accept" header).
1426+
default: ''
1427+
produces:
1428+
- text/html
1429+
- text/plain
1430+
- '*/*'
1431+
responses:
1432+
200:
1433+
description: Content with the requested encoding and content type.
1434+
"""
1435+
return encoding_generic(charset, None)
1436+
1437+
1438+
@app.route("/encoding/<charset>/<body>")
1439+
def encoding_generic(charset, body):
1440+
"""Returns the requested charset and encoding.
1441+
---
1442+
tags:
1443+
- Response formats
1444+
parameters:
1445+
- in: path
1446+
name: charset
1447+
type:
1448+
default: 'utf8'
1449+
- in: query
1450+
name: content-type
1451+
type: string
1452+
description: The content type of the response. If unset will use response content type ("accept" header).
1453+
default: ''
1454+
- in: path
1455+
name: body
1456+
type: string
1457+
default: SFRUUEJJTiDjga_mnIDpq5jjgafjgZk=
14161458
produces:
14171459
- text/html
1460+
- text/plain
1461+
- '*/*'
14181462
responses:
14191463
200:
1420-
description: Encoded UTF-8 content.
1464+
description: Content with the requested encoding and content type and body.
14211465
"""
1466+
response = make_response()
1467+
1468+
charset = charset or request.headers.get("accept-charset", "utf-8")
1469+
accept_header = request.headers.get("accept")
1470+
if accept_header is not None:
1471+
accept_header = accept_header.split(";")[0].split(",")[0]
1472+
response.content_type = (request.args.get("content-type", accept_header) or "text/html") + "; charset=" + charset
1473+
normalized_charset = (normalize_charset(charset) or "utf-8").lower()
14221474

1423-
return render_template("UTF-8-demo.txt")
1475+
if body:
1476+
response.data = base64.urlsafe_b64decode(body)
1477+
return response
1478+
elif normalized_charset in ["utf-8", "utf-16", "utf-32"]:
1479+
template_data = {
1480+
"title": "Unicode Demo",
1481+
"citation_url": "http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-demo.txt",
1482+
"body_template": "encoding/utf-8.txt",
1483+
"citation_prefix": ("Taken from" if normalized_charset == "utf-8"
1484+
else f"Re-encoded to {normalized_charset} from the utf-8 taken from")
1485+
}
1486+
else:
1487+
template_data = {
1488+
"title": f"{normalized_charset} Demo",
1489+
"citation_url": "",
1490+
"body_template": f"encoding/{normalized_charset}.txt",
1491+
"citation_prefix": ""
1492+
}
1493+
1494+
if response.content_type.startswith("text/html"):
1495+
template_name = "encoding/demo.html.j2"
1496+
else:
1497+
template_name = template_data["body_template"]
1498+
response.data = render_template(template_name, **template_data).encode(normalized_charset)
1499+
1500+
return response
14241501

14251502

14261503
@app.route("/bytes/<int:n>")

httpbin/helpers.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,3 +483,28 @@ def digest_challenge_response(app, qop, algorithm, stale = False):
483483
auth = WWWAuthenticate("digest", values=values)
484484
response.headers['WWW-Authenticate'] = auth.to_header()
485485
return response
486+
487+
488+
def normalize_charset(charset):
489+
charset = charset.lower()
490+
charset_aliases = {
491+
"utf[-_]?8": "UTF-8",
492+
"utf[-_]?16": "UTF-16",
493+
"utf[-_]?32": "UTF-32",
494+
"iso-ir-6|ansi_x3.4-1968|ansi_x3.4-1986|iso_646.irv:1991|ascii|iso646-us|us|csascii": "US-ASCII",
495+
"iso[-_]?8859[-_]?2|iso-ir-101|csisolatin2|latin[-_]?2|l2|ibm912|cp912": "ISO-8859-2",
496+
"iso[-_]?8859[-_]?3|iso-ir-109|csisolatin3|latin[-_]?3|l3|ibm913|cp913": "ISO-8859-3",
497+
"iso[-_]?8859[-_]?4|iso-ir-110|csisolatin4|latin[-_]?4|l4|ibm914|cp914": "ISO-8859-4",
498+
"iso[-_]?8859[-_]?1?|iso-ir-100|csisolatin1|latin[-_]?1|l1|ibm819|cp819": "ISO-8859-1",
499+
"big5|csbig5|cn-big5": "Big5",
500+
"gb2312|csgb2312|chinese": "GB2312",
501+
"euc-jp|.*japanese": "EUC-JP",
502+
"shift_jis|csshiftjis|ms_kanji|x-sjis": "Shift_JIS",
503+
"windows-1252|windows1252|cp1252|ms-ee": "Windows-1252",
504+
}
505+
506+
for pattern, normalized in charset_aliases.items():
507+
if re.match(pattern, charset):
508+
return normalized
509+
510+
return None
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
這是一個中文文本的範例。這段文本是以Big5編碼的。Big5是用來處理中文字符的編碼之一。以下是一段中文的文章。
2+
3+
中文中有很多不同的字符,包括漢字、標點符號等。漢字是從古代流傳下來的字符,每個字符都有其獨特的意義。例如,“中文”這個詞是由兩個漢字組成的:“中”和“文”。
4+
5+
今天的天氣非常好。天空湛藍,微風習習。公園裡有許多人在散步。孩子們在遊樂場玩耍,大人們坐在長椅上聊天。很多人帶著狗在公園裡散步。在自然環境中度過時光,讓人感到非常放鬆。
6+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<h1>{{ title }}</h1>
2+
3+
<p>{{ citation_prefix }} <a
4+
href="{{ citation_url }}">{{ citation_url }}</a></p>
5+
6+
<pre>
7+
{% include body_template ignore missing %}
8+
</pre>
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
これは日本語のテキストの例です。このテキストは、EUC-JPエンコーディングで符号化されています。EUC-JPは、日本語をコンピュータで扱うための文字エンコーディングの一つです。以下に、日本語の文章を続けます。
2+
3+
日本語には、漢字、ひらがな、カタカナの三種類の文字があります。漢字は中国から伝わった文字で、意味を持つ文字です。ひらがなとカタカナは、日本独自の音節文字で、発音を表します。例えば、「日本語」という単語は、漢字で「日本」と書き、ひらがなで「ご」と書きます。
4+
5+
今日の天気は晴れです。青い空が広がっており、風も心地よいです。公園では、多くの人々が散歩を楽しんでいます。子供たちは遊具で遊び、大人たちはベンチに座って話をしています。犬を連れた人も多く見かけます。自然の中で過ごす時間は、とてもリフレッシュできます。
6+
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
这是一个中文文本的范例。这段文本是以GB2312编码的。GB2312是用来处理中文字符的编码之一。以下是一段中文的文章。
2+
3+
中文中有很多不同的字符,包括汉字、标点符号等。汉字是从古代流传下来的字符,每个字符都有其独特的意义。例如,“中文”这个词是由两个汉字组成的:“中”和“文”。
4+
5+
今天的天气非常好。天空湛蓝,微风习习。公园里有许多人在散步。孩子们在游乐场玩耍,大人们坐在长椅上聊天。很多人带着狗在公园里散步。在自然环境中度过时光,让人感到非常放松。
6+
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
https://en.wikipedia.org/wiki/ISO/IEC_8859-1
2+
3+
! " # $ % & ' ( ) * + , - . /
4+
0 1 2 3 4 5 6 7 8 9 : ; < = > ?
5+
@ A B C D E F G H I J K L M N O
6+
P Q R S T U V W X Y Z [ \ ] ^ _
7+
` a b c d e f g h i j k l m n o
8+
p q r s t u v w x y z { | } ~
9+
NBSP¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ SHY ® ¯
10+
° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
11+
À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
12+
Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
13+
à á â ã ä å æ ç è é ê ë ì í î ï
14+
ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
15+
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
https://en.wikipedia.org/wiki/ISO/IEC_8859-2
2+
3+
! " # $ % & ' ( ) * + , - . /
4+
0 1 2 3 4 5 6 7 8 9 : ; < = > ?
5+
@ A B C D E F G H I J K L M N O
6+
P Q R S T U V W X Y Z [ \ ] ^ _
7+
` a b c d e f g h i j k l m n o
8+
p q r s t u v w x y z { | } ~
9+
NBSPĄ ˘ Ł ¤ Ľ Ś § ¨ Š Ş Ť Ź SHY Ž Ż
10+
° ą ˛ ł ´ ľ ś ˇ ¸ š ş ť ź ˝ ž ż
11+
Ŕ Á Â Ă Ä Ĺ Ć Ç Č É Ę Ë Ě Í Î Ď
12+
Đ Ń Ň Ó Ô Ő Ö × Ř Ů Ú Ű Ü Ý Ţ ß
13+
ŕ á â ă ä ĺ ć ç č é ę ë ě í î ď
14+
đ ń ň ó ô ő ö ÷ ř ů ú ű ü ý ţ ˙
15+
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
https://en.wikipedia.org/wiki/ISO/IEC_8859-3
2+
3+
! " # $ % & ' ( ) * + , - . /
4+
0 1 2 3 4 5 6 7 8 9 : ; < = > ?
5+
@ A B C D E F G H I J K L M N O
6+
P Q R S T U V W X Y Z [ \ ] ^ _
7+
` a b c d e f g h i j k l m n o
8+
p q r s t u v w x y z { | } ~
9+
NBSPĦ ˘ £ ¤ Ĥ § ¨ İ Ş Ğ Ĵ SHY Ż
10+
° ħ ² ³ ´ µ ĥ · ¸ ı ş ğ ĵ ½ ż
11+
À Á Â Ä Ċ Ĉ Ç È É Ê Ë Ì Í Î Ï
12+
Ñ Ò Ó Ô Ġ Ö × Ĝ Ù Ú Û Ü Ŭ Ŝ ß
13+
à á â ä ċ ĉ ç è é ê ë ì í î ï
14+
ñ ò ó ô ġ ö ÷ ĝ ù ú û ü ŭ ŝ ˙
15+
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
https://en.wikipedia.org/wiki/ISO/IEC_8859-4
2+
3+
! " # $ % & ' ( ) * + , - . /
4+
0 1 2 3 4 5 6 7 8 9 : ; < = > ?
5+
@ A B C D E F G H I J K L M N O
6+
P Q R S T U V W X Y Z [ \ ] ^ _
7+
` a b c d e f g h i j k l m n o
8+
p q r s t u v w x y z { | } ~
9+
NBSPĄ ĸ Ŗ ¤ Ĩ Ļ § ¨ Š Ē Ģ Ŧ Ž SPH ¯
10+
° ą ˛ ŗ ´ ĩ ļ ˇ ¸ š ē ģ ŧ Ŋ ž ŋ
11+
Ā Á Â Ã Ä Å Æ Į Č É Ę Ë Ė Í Î Ī
12+
Đ Ņ Ō Ķ Ô Õ Ö × Ø Ų Ú Û Ü Ũ Ū ß
13+
ā á â ã ä å æ į č é ę ë ė í î ī
14+
đ ņ ō ķ ô õ ö ÷ ø ų ú û ü ũ ū ˙
15+

0 commit comments

Comments
 (0)